Java: Move common CSV logic for sources and sinks into shared library

This commit is contained in:
Tom Hvitved
2021-06-02 15:52:44 +02:00
parent 7382b349c2
commit d0b6808299
7 changed files with 256 additions and 177 deletions

View File

@@ -68,6 +68,7 @@ import java
private import semmle.code.java.dataflow.DataFlow::DataFlow
private import internal.DataFlowPrivate
private import internal.FlowSummaryImpl::Private::External
private import internal.FlowSummaryImplSpecific
private import FlowSummary
/**
@@ -359,7 +360,8 @@ private predicate summaryModel(string row) {
any(SummaryModelCsv s).row(row)
}
private predicate sourceModel(
/** Holds if a source model exists for the given parameters. */
predicate sourceModel(
string namespace, string type, boolean subtypes, string name, string signature, string ext,
string output, string kind
) {
@@ -377,7 +379,8 @@ private predicate sourceModel(
)
}
private predicate sinkModel(
/** Holds if a sink model exists for the given parameters. */
predicate sinkModel(
string namespace, string type, boolean subtypes, string name, string signature, string ext,
string input, string kind
) {
@@ -395,7 +398,8 @@ private predicate sinkModel(
)
}
private predicate summaryModel(
/** Holds if a summary model exists for the given parameters. */
predicate summaryModel(
string namespace, string type, boolean subtypes, string name, string signature, string ext,
string input, string output, string kind
) {
@@ -600,7 +604,8 @@ private Element interpretElement0(
)
}
private Element interpretElement(
/** Gets the source/sink/summary element corresponding to the supplied parameters. */
Element interpretElement(
string namespace, string type, boolean subtypes, string name, string signature, string ext
) {
elementSpec(namespace, type, subtypes, name, signature, ext) and
@@ -611,166 +616,25 @@ private Element interpretElement(
)
}
private predicate sourceElement(Element e, string output, string kind) {
exists(
string namespace, string type, boolean subtypes, string name, string signature, string ext
|
sourceModel(namespace, type, subtypes, name, signature, ext, output, kind) and
e = interpretElement(namespace, type, subtypes, name, signature, ext)
)
cached
private module Cached {
/**
* Holds if `node` is specified as a source with the given kind in a CSV flow
* model.
*/
cached
predicate sourceNode(Node node, string kind) {
exists(InterpretNode n | isSourceNode(n, kind) and n.asNode() = node)
}
/**
* Holds if `node` is specified as a sink with the given kind in a CSV flow
* model.
*/
cached
predicate sinkNode(Node node, string kind) {
exists(InterpretNode n | isSinkNode(n, kind) and n.asNode() = node)
}
}
private predicate sinkElement(Element e, string input, string kind) {
exists(
string namespace, string type, boolean subtypes, string name, string signature, string ext
|
sinkModel(namespace, type, subtypes, name, signature, ext, input, kind) and
e = interpretElement(namespace, type, subtypes, name, signature, ext)
)
}
/**
* Holds if an external flow summary exists for `e` with input specification
* `input`, output specification `output`, and kind `kind`.
*/
predicate summaryElement(Element e, string input, string output, string kind) {
exists(
string namespace, string type, boolean subtypes, string name, string signature, string ext
|
summaryModel(namespace, type, subtypes, name, signature, ext, input, output, kind) and
e = interpretElement(namespace, type, subtypes, name, signature, ext)
)
}
/** Gets a specification used in a source model, sink model, or summary model. */
string inOutSpec() {
sourceModel(_, _, _, _, _, _, result, _) or
sinkModel(_, _, _, _, _, _, result, _) or
summaryModel(_, _, _, _, _, _, result, _, _) or
summaryModel(_, _, _, _, _, _, _, result, _)
}
private predicate inputNeedsReference(string c) {
c = "Argument" or
parseArg(c, _)
}
private predicate outputNeedsReference(string c) {
c = "Argument" or
parseArg(c, _) or
c = "ReturnValue"
}
private predicate sourceElementRef(Top ref, string output, string kind) {
exists(Element e |
sourceElement(e, output, kind) and
if outputNeedsReference(specLast(output))
then ref.(Call).getCallee().getSourceDeclaration() = e
else ref = e
)
}
private predicate sinkElementRef(Top ref, string input, string kind) {
exists(Element e |
sinkElement(e, input, kind) and
if inputNeedsReference(specLast(input))
then ref.(Call).getCallee().getSourceDeclaration() = e
else ref = e
)
}
private predicate summaryElementRef(Top ref, string input, string output, string kind) {
exists(Element e |
summaryElement(e, input, output, kind) and
if inputNeedsReference(specLast(input))
then ref.(Call).getCallee().getSourceDeclaration() = e
else ref = e
)
}
private newtype TAstOrNode =
TAst(Top t) or
TNode(Node n)
private predicate interpretOutput(string output, int idx, Top ref, TAstOrNode node) {
(
sourceElementRef(ref, output, _) or
summaryElementRef(ref, _, output, _)
) and
specLength(output, idx) and
node = TAst(ref)
or
exists(Top mid, string c, Node n |
interpretOutput(output, idx + 1, ref, TAst(mid)) and
specSplit(output, c, idx) and
node = TNode(n)
|
exists(int pos | n.(PostUpdateNode).getPreUpdateNode().(ArgumentNode).argumentOf(mid, pos) |
c = "Argument" or parseArg(c, pos)
)
or
exists(int pos | n.(ParameterNode).isParameterOf(mid, pos) |
c = "Parameter" or parseParam(c, pos)
)
or
(c = "Parameter" or c = "") and
n.asParameter() = mid
or
c = "ReturnValue" and
n.asExpr().(Call) = mid
or
c = "" and
n.asExpr().(FieldRead).getField() = mid
)
}
private predicate interpretInput(string input, int idx, Top ref, TAstOrNode node) {
(
sinkElementRef(ref, input, _) or
summaryElementRef(ref, input, _, _)
) and
specLength(input, idx) and
node = TAst(ref)
or
exists(Top mid, string c, Node n |
interpretInput(input, idx + 1, ref, TAst(mid)) and
specSplit(input, c, idx) and
node = TNode(n)
|
exists(int pos | n.(ArgumentNode).argumentOf(mid, pos) | c = "Argument" or parseArg(c, pos))
or
exists(ReturnStmt ret |
c = "ReturnValue" and
n.asExpr() = ret.getResult() and
mid = ret.getEnclosingCallable()
)
or
exists(FieldWrite fw |
c = "" and
fw.getField() = mid and
n.asExpr() = fw.getRHS()
)
)
}
/**
* Holds if `node` is specified as a source with the given kind in a CSV flow
* model.
*/
predicate sourceNode(Node node, string kind) {
exists(Top ref, string output |
sourceElementRef(ref, output, kind) and
interpretOutput(output, 0, ref, TNode(node))
)
}
/**
* Holds if `node` is specified as a sink with the given kind in a CSV flow
* model.
*/
predicate sinkNode(Node node, string kind) {
exists(Top ref, string input |
sinkElementRef(ref, input, kind) and
interpretInput(input, 0, ref, TNode(node))
)
}
import Cached

View File

@@ -4,13 +4,21 @@ private import DataFlowUtil
private import semmle.code.java.dataflow.InstanceAccess
private import semmle.code.java.dataflow.FlowSummary
private import semmle.code.java.dispatch.VirtualDispatch as VirtualDispatch
private import semmle.code.java.dataflow.internal.FlowSummaryImplSpecific
private module DispatchImpl {
/** Gets a viable implementation of the target of the given `Call`. */
Callable viableCallable(Call c) {
result = VirtualDispatch::viableCallable(c)
or
result.(SummarizedCallable) = c.getCallee().getSourceDeclaration()
result = c.getCallee().getSourceDeclaration() and
(
result instanceof SummarizedCallable
or
sourceElement(result, _, _)
or
sinkElement(result, _, _)
)
}
/**

View File

@@ -9,10 +9,11 @@ private import semmle.code.java.controlflow.Guards
private import semmle.code.java.dataflow.ExternalFlow
private import semmle.code.java.dataflow.FlowSteps
private import semmle.code.java.dataflow.FlowSummary
import semmle.code.java.dataflow.InstanceAccess
private import semmle.code.java.dataflow.InstanceAccess
private import FlowSummaryImpl as FlowSummaryImpl
private import TaintTrackingUtil as TaintTrackingUtil
import DataFlowNodes::Public
import semmle.code.Unit
/** Holds if `n` is an access to an unqualified `this` at `cfgnode`. */
private predicate thisAccess(Node n, ControlFlowNode cfgnode) {

View File

@@ -9,6 +9,7 @@
private import FlowSummaryImplSpecific
private import DataFlowImplSpecific::Private
private import DataFlowImplSpecific::Public
private import DataFlowImplCommon as DataFlowImplCommon
/** Provides classes and predicates for defining flow summaries. */
module Public {
@@ -178,7 +179,6 @@ module Public {
*/
module Private {
private import Public
private import DataFlowImplCommon as DataFlowImplCommon
newtype TSummaryComponent =
TContentSummaryComponent(Content c) or
@@ -580,6 +580,14 @@ module Private {
* summaries into a `SummarizedCallable`s.
*/
module External {
/** Holds if `spec` is a relevant external specification. */
private predicate relevantSpec(string spec) {
summaryElement(_, spec, _, _) or
summaryElement(_, _, spec, _) or
sourceElement(_, spec, _) or
sinkElement(_, spec, _)
}
/** Holds if the `n`th component of specification `s` is `c`. */
predicate specSplit(string s, string c, int n) { relevantSpec(s) and s.splitAt(" of ", n) = c }
@@ -629,6 +637,8 @@ module Private {
or
exists(int pos | parseParam(c, pos) and result = SummaryComponent::parameter(pos))
or
c = "ReturnValue" and result = SummaryComponent::return(getReturnValueKind())
or
result = interpretComponentSpecific(c)
)
}
@@ -664,13 +674,13 @@ module Private {
}
private class SummarizedCallableExternal extends SummarizedCallable {
SummarizedCallableExternal() { externalSummary(this, _, _, _) }
SummarizedCallableExternal() { summaryElement(this, _, _, _) }
override predicate propagatesFlow(
SummaryComponentStack input, SummaryComponentStack output, boolean preservesValue
) {
exists(string inSpec, string outSpec, string kind |
externalSummary(this, inSpec, outSpec, kind) and
summaryElement(this, inSpec, outSpec, kind) and
interpretSpec(inSpec, 0, input) and
interpretSpec(outSpec, 0, output)
|
@@ -686,6 +696,111 @@ module Private {
specSplit(spec, c, _) and
not exists(interpretComponent(c))
}
private predicate inputNeedsReference(string c) {
c = "Argument" or
parseArg(c, _)
}
private predicate outputNeedsReference(string c) {
c = "Argument" or
parseArg(c, _) or
c = "ReturnValue"
}
private predicate sourceElementRef(InterpretNode ref, string output, string kind) {
exists(SourceOrSinkElement e |
sourceElement(e, output, kind) and
if outputNeedsReference(specLast(output))
then viableCallable(ref.asCall()) = any(InterpretNode n | n.asElement() = e).asCallable()
else ref.asElement() = e
)
}
private predicate sinkElementRef(InterpretNode ref, string input, string kind) {
exists(SourceOrSinkElement e |
sinkElement(e, input, kind) and
if inputNeedsReference(specLast(input))
then viableCallable(ref.asCall()) = any(InterpretNode n | n.asElement() = e).asCallable()
else ref.asElement() = e
)
}
private predicate interpretOutput(string output, int idx, InterpretNode ref, InterpretNode node) {
sourceElementRef(ref, output, _) and
specLength(output, idx) and
node = ref
or
exists(InterpretNode mid, string c |
interpretOutput(output, idx + 1, ref, mid) and
specSplit(output, c, idx)
|
exists(int pos |
node.asNode()
.(PostUpdateNode)
.getPreUpdateNode()
.(ArgumentNode)
.argumentOf(mid.asCall(), pos)
|
c = "Argument" or parseArg(c, pos)
)
or
exists(int pos | node.asNode().(ParameterNode).isParameterOf(mid.asCallable(), pos) |
c = "Parameter" or parseParam(c, pos)
)
or
c = "ReturnValue" and
node.asNode() = getAnOutNode(mid.asCall(), getReturnValueKind())
or
interpretOutputSpecific(c, mid, node)
)
}
private predicate interpretInput(string input, int idx, InterpretNode ref, InterpretNode node) {
sinkElementRef(ref, input, _) and
specLength(input, idx) and
node = ref
or
exists(InterpretNode mid, string c |
interpretInput(input, idx + 1, ref, mid) and
specSplit(input, c, idx)
|
exists(int pos | node.asNode().(ArgumentNode).argumentOf(mid.asCall(), pos) |
c = "Argument" or parseArg(c, pos)
)
or
exists(ReturnNode ret |
c = "ReturnValue" and
ret = node.asNode() and
ret.getKind() = getReturnValueKind() and
mid.asCallable() = DataFlowImplCommon::getNodeEnclosingCallable(ret)
)
or
interpretInputSpecific(c, mid, node)
)
}
/**
* Holds if `node` is specified as a source with the given kind in a CSV flow
* model.
*/
predicate isSourceNode(InterpretNode node, string kind) {
exists(InterpretNode ref, string output |
sourceElementRef(ref, output, kind) and
interpretOutput(output, 0, ref, node)
)
}
/**
* Holds if `node` is specified as a sink with the given kind in a CSV flow
* model.
*/
predicate isSinkNode(InterpretNode node, string kind) {
exists(InterpretNode ref, string input |
sinkElementRef(ref, input, kind) and
interpretInput(input, 0, ref, node)
)
}
}
/** Provides a query predicate for outputting a set of relevant flow summaries. */

View File

@@ -51,22 +51,22 @@ DataFlowType getCallbackParameterType(DataFlowType t, int i) { none() }
*/
DataFlowType getCallbackReturnType(DataFlowType t, ReturnKind rk) { none() }
/** Holds if `spec` is a relevant external specification. */
predicate relevantSpec(string spec) { spec = inOutSpec() }
/**
* Holds if an external flow summary exists for `c` with input specification
* `input`, output specification `output`, and kind `kind`.
*/
predicate externalSummary(DataFlowCallable c, string input, string output, string kind) {
summaryElement(c, input, output, kind)
predicate summaryElement(DataFlowCallable c, string input, string output, string kind) {
exists(
string namespace, string type, boolean subtypes, string name, string signature, string ext
|
summaryModel(namespace, type, subtypes, name, signature, ext, input, output, kind) and
c = interpretElement(namespace, type, subtypes, name, signature, ext)
)
}
/** Gets the summary component for specification component `c`, if any. */
bindingset[c]
SummaryComponent interpretComponentSpecific(string c) {
c = "ReturnValue" and result = SummaryComponent::return(_)
or
c = "ArrayElement" and result = SummaryComponent::content(any(ArrayContent c0))
or
c = "Element" and result = SummaryComponent::content(any(CollectionContent c0))
@@ -75,3 +75,92 @@ SummaryComponent interpretComponentSpecific(string c) {
or
c = "MapValue" and result = SummaryComponent::content(any(MapValueContent c0))
}
class SourceOrSinkElement = Top;
/**
* Holds if an external source specification exists for `e` with output specification
* `output` and kind `kind`.
*/
predicate sourceElement(SourceOrSinkElement e, string output, string kind) {
exists(
string namespace, string type, boolean subtypes, string name, string signature, string ext
|
sourceModel(namespace, type, subtypes, name, signature, ext, output, kind) and
e = interpretElement(namespace, type, subtypes, name, signature, ext)
)
}
/**
* Holds if an external sink specification exists for `e` with input specification
* `input` and kind `kind`.
*/
predicate sinkElement(SourceOrSinkElement e, string input, string kind) {
exists(
string namespace, string type, boolean subtypes, string name, string signature, string ext
|
sinkModel(namespace, type, subtypes, name, signature, ext, input, kind) and
e = interpretElement(namespace, type, subtypes, name, signature, ext)
)
}
/** Gets the return kind corresponding to specification `"ReturnValue"`. */
ReturnKind getReturnValueKind() { any() }
private newtype TInterpretNode =
TElement(SourceOrSinkElement n) or
TNode(Node n)
/** An entity used to interpret a source/sink specification. */
class InterpretNode extends TInterpretNode {
/** Gets the element that this node corresponds to, if any. */
SourceOrSinkElement asElement() { this = TElement(result) }
/** Gets the data-flow node that this node corresponds to, if any. */
Node asNode() { this = TNode(result) }
/** Gets the call that this node corresponds to, if any. */
DataFlowCall asCall() { result = this.asElement() }
/** Gets the callable that this node corresponds to, if any. */
DataFlowCallable asCallable() { result = this.asElement() }
/** Gets a textual representation of this node. */
string toString() {
result = this.asElement().toString()
or
result = this.asNode().toString()
}
/** Gets the location of this node. */
Location getLocation() {
result = this.asElement().getLocation()
or
result = this.asNode().getLocation()
}
}
/** Provides additional sink specification logic required for annotations. */
pragma[inline]
predicate interpretOutputSpecific(string c, InterpretNode mid, InterpretNode node) {
exists(Node n, Top ast |
n = node.asNode() and
ast = mid.asElement()
|
(c = "Parameter" or c = "") and
node.asNode().asParameter() = mid.asElement()
or
c = "" and
n.asExpr().(FieldRead).getField() = ast
)
}
/** Provides additional source specification logic required for annotations. */
pragma[inline]
predicate interpretInputSpecific(string c, InterpretNode mid, InterpretNode n) {
exists(FieldWrite fw |
c = "" and
fw.getField() = mid.asElement() and
n.asNode().asExpr() = fw.getRHS()
)
}

View File

@@ -11,6 +11,7 @@ private import VirtualDispatch
private import semmle.code.java.dataflow.internal.BaseSSA
private import semmle.code.java.dataflow.internal.DataFlowUtil
private import semmle.code.java.dataflow.internal.DataFlowPrivate
private import semmle.code.java.dataflow.InstanceAccess
private import semmle.code.java.Collections
private import semmle.code.java.Maps

View File

@@ -14,6 +14,7 @@ private import semmle.code.java.dataflow.internal.BaseSSA
private import semmle.code.java.dataflow.internal.DataFlowUtil
private import semmle.code.java.dataflow.internal.DataFlowPrivate
private import semmle.code.java.dataflow.internal.ContainerFlow
private import semmle.code.java.dataflow.InstanceAccess
/**
* Gets a viable dispatch target for `ma`. This is the input dispatch relation.