diff --git a/python/ql/lib/semmle/python/dataflow/new/FlowSummary.qll b/python/ql/lib/semmle/python/dataflow/new/FlowSummary.qll index 4fff5e3c66d..6668af9e3b2 100644 --- a/python/ql/lib/semmle/python/dataflow/new/FlowSummary.qll +++ b/python/ql/lib/semmle/python/dataflow/new/FlowSummary.qll @@ -94,10 +94,10 @@ abstract class SummarizedCallable extends LibraryCallable { /** * Holds if values stored inside `content` are cleared on objects passed as - * the `i`th argument to this callable. + * arguments at position `pos` to this callable. */ pragma[nomagic] - predicate clearsContent(int i, DataFlow::Content content) { none() } + predicate clearsContent(ParameterPosition pos, DataFlow::ContentSet content) { none() } } private class SummarizedCallableAdapter extends Impl::Public::SummarizedCallable { @@ -111,7 +111,7 @@ private class SummarizedCallableAdapter extends Impl::Public::SummarizedCallable sc.propagatesFlow(input, output, preservesValue) } - final override predicate clearsContent(ParameterPosition pos, DataFlow::Content content) { + final override predicate clearsContent(ParameterPosition pos, DataFlow::ContentSet content) { sc.clearsContent(pos, content) } } diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/AccessPathSyntax.qll b/python/ql/lib/semmle/python/dataflow/new/internal/AccessPathSyntax.qll index 8e126868cc1..3d40e04b815 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/AccessPathSyntax.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/AccessPathSyntax.qll @@ -42,9 +42,7 @@ module AccessPath { * Parses a lower-bounded interval `n..` and gets the lower bound. */ bindingset[arg] - private int parseLowerBound(string arg) { - result = arg.regexpCapture("(-?\\d+)\\.\\.", 1).toInt() - } + int parseLowerBound(string arg) { result = arg.regexpCapture("(-?\\d+)\\.\\.", 1).toInt() } /** * Parses an integer constant or interval (bounded or unbounded) that explicitly @@ -151,7 +149,7 @@ class AccessPath extends string instanceof AccessPath::Range { * An access part token such as `Argument[1]` or `ReturnValue`, appearing in one or more access paths. */ class AccessPathToken extends string { - AccessPathToken() { this = getRawToken(any(AccessPath path), _) } + AccessPathToken() { this = getRawToken(_, _) } private string getPart(int part) { result = this.regexpCapture("([^\\[]+)(?:\\[([^\\]]*)\\])?", part) diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/FlowSummaryImpl.qll b/python/ql/lib/semmle/python/dataflow/new/internal/FlowSummaryImpl.qll index 36cfb2c98b4..b975a31c394 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/FlowSummaryImpl.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/FlowSummaryImpl.qll @@ -24,7 +24,7 @@ module Public { class SummaryComponent extends TSummaryComponent { /** Gets a textual representation of this summary component. */ string toString() { - exists(Content c | this = TContentSummaryComponent(c) and result = c.toString()) + exists(ContentSet c | this = TContentSummaryComponent(c) and result = c.toString()) or exists(ArgumentPosition pos | this = TParameterSummaryComponent(pos) and result = "parameter " + pos @@ -41,7 +41,7 @@ module Public { /** Provides predicates for constructing summary components. */ module SummaryComponent { /** Gets a summary component for content `c`. */ - SummaryComponent content(Content c) { result = TContentSummaryComponent(c) } + SummaryComponent content(ContentSet c) { result = TContentSummaryComponent(c) } /** Gets a summary component for a parameter at position `pos`. */ SummaryComponent parameter(ArgumentPosition pos) { result = TParameterSummaryComponent(pos) } @@ -218,7 +218,7 @@ module Public { * arguments at position `pos` to this callable. */ pragma[nomagic] - predicate clearsContent(ParameterPosition pos, Content content) { none() } + predicate clearsContent(ParameterPosition pos, ContentSet content) { none() } } } @@ -231,7 +231,7 @@ module Private { import AccessPathSyntax newtype TSummaryComponent = - TContentSummaryComponent(Content c) or + TContentSummaryComponent(ContentSet c) or TParameterSummaryComponent(ArgumentPosition pos) or TArgumentSummaryComponent(ParameterPosition pos) or TReturnSummaryComponent(ReturnKind rk) @@ -540,7 +540,7 @@ module Private { exists(SummarizedCallable c, SummaryComponentStack s, SummaryComponent head | head = s.head() | n = summaryNodeInputState(c, s) and ( - exists(Content cont | + exists(ContentSet cont | head = TContentSummaryComponent(cont) and result = getContentType(cont) ) or @@ -554,7 +554,7 @@ module Private { or n = summaryNodeOutputState(c, s) and ( - exists(Content cont | + exists(ContentSet cont | head = TContentSummaryComponent(cont) and result = getContentType(cont) ) or @@ -669,7 +669,7 @@ module Private { * Holds if there is a read step of content `c` from `pred` to `succ`, which * is synthesized from a flow summary. */ - predicate summaryReadStep(Node pred, Content c, Node succ) { + predicate summaryReadStep(Node pred, ContentSet c, Node succ) { exists(SummarizedCallable sc, SummaryComponentStack s | pred = summaryNodeInputState(sc, s.drop(1)) and succ = summaryNodeInputState(sc, s) and @@ -681,7 +681,7 @@ module Private { * Holds if there is a store step of content `c` from `pred` to `succ`, which * is synthesized from a flow summary. */ - predicate summaryStoreStep(Node pred, Content c, Node succ) { + predicate summaryStoreStep(Node pred, ContentSet c, Node succ) { exists(SummarizedCallable sc, SummaryComponentStack s | pred = summaryNodeOutputState(sc, s) and succ = summaryNodeOutputState(sc, s.drop(1)) and @@ -708,7 +708,7 @@ module Private { * `a` on line 2 to the post-update node for `a` on that line (via an intermediate * node where field `b` is cleared). */ - predicate summaryClearsContent(Node n, Content c) { + predicate summaryClearsContent(Node n, ContentSet c) { exists(SummarizedCallable sc, ParameterPosition pos | n = summaryNode(sc, TSummaryNodeClearsContentState(pos, true)) and sc.clearsContent(pos, c) @@ -730,7 +730,8 @@ module Private { * In such cases, it is important to prevent use-use flow out of * `arg` (see comment for `summaryClearsContent`). */ - predicate summaryClearsContentArg(ArgNode arg, Content c) { + pragma[nomagic] + predicate summaryClearsContentArg(ArgNode arg, ContentSet c) { exists(DataFlowCall call, SummarizedCallable sc, ParameterPosition ppos | argumentPositionMatch(call, arg, ppos) and viableParam(call, sc, ppos, _) and @@ -775,7 +776,7 @@ module Private { * NOTE: This step should not be used in global data-flow/taint-tracking, but may * be useful to include in the exposed local data-flow/taint-tracking relations. */ - predicate summaryGetterStep(ArgNode arg, Content c, Node out) { + predicate summaryGetterStep(ArgNode arg, ContentSet c, Node out) { exists(ReturnKindExt rk, Node mid, ReturnNodeExt ret | summaryReadStep(summaryArgParam(arg, rk, out), c, mid) and summaryLocalStep(mid, ret, _) and @@ -790,7 +791,7 @@ module Private { * NOTE: This step should not be used in global data-flow/taint-tracking, but may * be useful to include in the exposed local data-flow/taint-tracking relations. */ - predicate summarySetterStep(ArgNode arg, Content c, Node out) { + predicate summarySetterStep(ArgNode arg, ContentSet c, Node out) { exists(ReturnKindExt rk, Node mid, ReturnNodeExt ret | summaryLocalStep(summaryArgParam(arg, rk, out), mid, _) and summaryStoreStep(mid, c, ret) and @@ -806,10 +807,10 @@ module Private { module External { /** Holds if `spec` is a relevant external specification. */ private predicate relevantSpec(string spec) { - summaryElement(_, spec, _, _) or - summaryElement(_, _, spec, _) or - sourceElement(_, spec, _) or - sinkElement(_, spec, _) + summaryElement(_, spec, _, _, _) or + summaryElement(_, _, spec, _, _) or + sourceElement(_, spec, _, _) or + sinkElement(_, spec, _, _) } private class AccessPathRange extends AccessPath::Range { @@ -875,13 +876,20 @@ module Private { } private class SummarizedCallableExternal extends SummarizedCallable { - SummarizedCallableExternal() { summaryElement(this, _, _, _) } + SummarizedCallableExternal() { summaryElement(this, _, _, _, _) } + + private predicate relevantSummaryElement(AccessPath inSpec, AccessPath outSpec, string kind) { + summaryElement(this, inSpec, outSpec, kind, false) + or + summaryElement(this, inSpec, outSpec, kind, true) and + not summaryElement(this, _, _, _, false) + } override predicate propagatesFlow( SummaryComponentStack input, SummaryComponentStack output, boolean preservesValue ) { exists(AccessPath inSpec, AccessPath outSpec, string kind | - summaryElement(this, inSpec, outSpec, kind) and + this.relevantSummaryElement(inSpec, outSpec, kind) and interpretSpec(inSpec, input) and interpretSpec(outSpec, output) | @@ -910,7 +918,7 @@ module Private { private predicate sourceElementRef(InterpretNode ref, AccessPath output, string kind) { exists(SourceOrSinkElement e | - sourceElement(e, output, kind) and + sourceElement(e, output, kind, _) and if outputNeedsReference(output.getToken(0)) then e = ref.getCallTarget() else e = ref.asElement() @@ -919,7 +927,7 @@ module Private { private predicate sinkElementRef(InterpretNode ref, AccessPath input, string kind) { exists(SourceOrSinkElement e | - sinkElement(e, input, kind) and + sinkElement(e, input, kind, _) and if inputNeedsReference(input.getToken(0)) then e = ref.getCallTarget() else e = ref.asElement() @@ -1056,8 +1064,8 @@ module Private { | c.relevantSummary(input, output, preservesValue) and csv = - c.getCallableCsv() + ";" + getComponentStackCsv(input) + ";" + - getComponentStackCsv(output) + ";" + renderKind(preservesValue) + c.getCallableCsv() + getComponentStackCsv(input) + ";" + getComponentStackCsv(output) + + ";" + renderKind(preservesValue) ) } } @@ -1123,7 +1131,7 @@ module Private { if preservesValue = true then value = "value" else value = "taint" ) or - exists(Content c | + exists(ContentSet c | Private::Steps::summaryReadStep(a.asNode(), c, b.asNode()) and value = "read (" + c + ")" or diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/FlowSummaryImplSpecific.qll b/python/ql/lib/semmle/python/dataflow/new/internal/FlowSummaryImplSpecific.qll index f532df598cb..7dbecbecda5 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/FlowSummaryImplSpecific.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/FlowSummaryImplSpecific.qll @@ -41,13 +41,17 @@ DataFlowType getCallbackReturnType(DataFlowType t, ReturnKind rk) { any() } /** * Holds if an external flow summary exists for `c` with input specification - * `input`, output specification `output`, and kind `kind`. + * `input`, output specification `output`, kind `kind`, and a flag `generated` + * stating whether the summary is autogenerated. */ -predicate summaryElement(DataFlowCallable c, string input, string output, string kind) { +predicate summaryElement( + DataFlowCallable c, string input, string output, string kind, boolean generated +) { exists(FlowSummary::SummarizedCallable sc, boolean preservesValue | sc.propagatesFlowExt(input, output, preservesValue) and c.asLibraryCallable() = sc and - if preservesValue = true then kind = "value" else kind = "taint" + (if preservesValue = true then kind = "value" else kind = "taint") and + generated = false ) } @@ -92,16 +96,18 @@ ReturnKind getReturnValueKind() { any() } */ private module UnusedSourceSinkInterpretation { /** - * Holds if an external source specification exists for `e` with output specification - * `output` and kind `kind`. + * Holds if an external source specification exists for `n` with output specification + * `output`, kind `kind`, and a flag `generated` stating whether the source specification is + * autogenerated. */ - predicate sourceElement(AstNode n, string output, string kind) { none() } + predicate sourceElement(AstNode n, string output, string kind, boolean generated) { none() } /** * Holds if an external sink specification exists for `n` with input specification - * `input` and kind `kind`. + * `input`, kind `kind` and a flag `generated` stating whether the sink specification is + * autogenerated. */ - predicate sinkElement(AstNode n, string input, string kind) { none() } + predicate sinkElement(AstNode n, string input, string kind, boolean generated) { none() } class SourceOrSinkElement = AstNode; diff --git a/python/ql/src/meta/analysis-quality/CallGraph.ql b/python/ql/src/meta/analysis-quality/CallGraph.ql index d23ee43014c..4504fcf99b0 100644 --- a/python/ql/src/meta/analysis-quality/CallGraph.ql +++ b/python/ql/src/meta/analysis-quality/CallGraph.ql @@ -11,6 +11,6 @@ import python import semmle.python.dataflow.new.internal.DataFlowPrivate -from DataFlowCall c, DataFlowCallableValue f +from DataFlowSourceCall c, DataFlowCallableValue f where c.getCallable() = f select c, "Call to $@", f.getScope(), f.toString() diff --git a/python/ql/test/experimental/dataflow/TestUtil/RoutingTest.qll b/python/ql/test/experimental/dataflow/TestUtil/RoutingTest.qll index 64907bb4554..12e96f4d456 100644 --- a/python/ql/test/experimental/dataflow/TestUtil/RoutingTest.qll +++ b/python/ql/test/experimental/dataflow/TestUtil/RoutingTest.qll @@ -49,6 +49,12 @@ abstract class RoutingTest extends InlineExpectationsTest { pragma[inline] private string toFunc(DataFlow::Node toNode) { - result = toNode.getEnclosingCallable().(DataFlowPrivate::NonLibraryDataFlowCallable).getCallableValue().getScope().getQualifiedName() // TODO: More robust pretty printing? + result = + toNode + .getEnclosingCallable() + .(DataFlowPrivate::NonLibraryDataFlowCallable) + .getCallableValue() + .getScope() + .getQualifiedName() // TODO: More robust pretty printing? } } diff --git a/python/ql/test/experimental/dataflow/summaries/TestSummaries.qll b/python/ql/test/experimental/dataflow/summaries/TestSummaries.qll index ac21d98994a..04cb39209e4 100644 --- a/python/ql/test/experimental/dataflow/summaries/TestSummaries.qll +++ b/python/ql/test/experimental/dataflow/summaries/TestSummaries.qll @@ -58,13 +58,16 @@ private class SummarizedCallableMap extends SummarizedCallable { preservesValue = true } } + // Typetracking needs to use a local flow step not including summaries // Typetracking needs to use a call graph not including summaries private class SummarizedCallableJsonLoads extends SummarizedCallable { SummarizedCallableJsonLoads() { this = "json.loads" } + override Call getACall() { result = API::moduleImport("json").getMember("loads").getACall().asExpr() } + override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { input = "Argument[0]" and output = "ReturnValue.ListElement" and