diff --git a/config/identical-files.json b/config/identical-files.json
index 3c16c953129..ceed02ba5d6 100644
--- a/config/identical-files.json
+++ b/config/identical-files.json
@@ -523,6 +523,10 @@
"python/ql/lib/semmle/python/dataflow/new/internal/TypeTracker.qll",
"ruby/ql/lib/codeql/ruby/typetracking/TypeTracker.qll"
],
+ "SummaryTypeTracker": [
+ "python/ql/lib/semmle/python/dataflow/new/internal/SummaryTypeTracker.qll",
+ "ruby/ql/lib/codeql/ruby/typetracking/internal/SummaryTypeTracker.qll"
+ ],
"AccessPathSyntax": [
"csharp/ql/lib/semmle/code/csharp/dataflow/internal/AccessPathSyntax.qll",
"go/ql/lib/semmle/go/dataflow/internal/AccessPathSyntax.qll",
diff --git a/python/ql/lib/change-notes/2023-05-30-typetracking-via-flow-summaries.md b/python/ql/lib/change-notes/2023-05-30-typetracking-via-flow-summaries.md
new file mode 100644
index 00000000000..11c01629987
--- /dev/null
+++ b/python/ql/lib/change-notes/2023-05-30-typetracking-via-flow-summaries.md
@@ -0,0 +1,4 @@
+---
+category: minorAnalysis
+---
+* Type tracking is now aware of flow summaries. This leads to a richer API graph, and may lead to more results in some queries.
diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
index 3084983a605..ba451a21fdf 100644
--- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
@@ -251,6 +251,9 @@ abstract class LibraryCallable extends string {
/** Gets a call to this library callable. */
abstract CallCfgNode getACall();
+ /** Same as `getACall` but without referring to the call graph or API graph. */
+ CallCfgNode getACallSimple() { none() }
+
/** Gets a data-flow node, where this library callable is used as a call-back. */
abstract ArgumentNode getACallback();
}
diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/SummaryTypeTracker.qll b/python/ql/lib/semmle/python/dataflow/new/internal/SummaryTypeTracker.qll
new file mode 100644
index 00000000000..9c6f841651d
--- /dev/null
+++ b/python/ql/lib/semmle/python/dataflow/new/internal/SummaryTypeTracker.qll
@@ -0,0 +1,391 @@
+/**
+ * Provides the implementation of type tracking steps through flow summaries.
+ * To use this, you must implement the `Input` signature. You can then use the predicates in the `Output`
+ * signature to implement the predicates of the same names inside `TypeTrackerSpecific.qll`.
+ */
+
+/** The classes and predicates needed to generate type-tracking steps from summaries. */
+signature module Input {
+ // Dataflow nodes
+ class Node;
+
+ // Content
+ class TypeTrackerContent;
+
+ class TypeTrackerContentFilter;
+
+ // Relating content and filters
+ /**
+ * Gets a content filter to use for a `WithoutContent[content]` step, (data is not allowed to be stored in `content`)
+ * or has no result if
+ * the step should be treated as ordinary flow.
+ *
+ * `WithoutContent` is often used to perform strong updates on individual collection elements, but for
+ * type-tracking this is rarely beneficial and quite expensive. However, `WithoutContent` can be quite useful
+ * for restricting the type of an object, and in these cases we translate it to a filter.
+ */
+ TypeTrackerContentFilter getFilterFromWithoutContentStep(TypeTrackerContent content);
+
+ /**
+ * Gets a content filter to use for a `WithContent[content]` step, (data must be stored in `content`)
+ * or has no result if
+ * the step cannot be handled by type-tracking.
+ *
+ * `WithContent` is often used to perform strong updates on individual collection elements (or rather
+ * to preserve those that didn't get updated). But for type-tracking this is rarely beneficial and quite expensive.
+ * However, `WithContent` can be quite useful for restricting the type of an object, and in these cases we translate it to a filter.
+ */
+ TypeTrackerContentFilter getFilterFromWithContentStep(TypeTrackerContent content);
+
+ // Summaries and their stacks
+ class SummaryComponent;
+
+ class SummaryComponentStack {
+ SummaryComponent head();
+ }
+
+ /** Gets a singleton stack containing `component`. */
+ SummaryComponentStack singleton(SummaryComponent component);
+
+ /**
+ * Gets the stack obtained by pushing `head` onto `tail`.
+ */
+ SummaryComponentStack push(SummaryComponent head, SummaryComponentStack tail);
+
+ /** Gets a singleton stack representing a return. */
+ SummaryComponent return();
+
+ // Relating content to summaries
+ /** Gets a summary component for content `c`. */
+ SummaryComponent content(TypeTrackerContent contents);
+
+ /** Gets a summary component where data is not allowed to be stored in `contents`. */
+ SummaryComponent withoutContent(TypeTrackerContent contents);
+
+ /** Gets a summary component where data must be stored in `contents`. */
+ SummaryComponent withContent(TypeTrackerContent contents);
+
+ // Callables
+ class SummarizedCallable {
+ predicate propagatesFlow(
+ SummaryComponentStack input, SummaryComponentStack output, boolean preservesValue
+ );
+ }
+
+ // Relating nodes to summaries
+ /** Gets a dataflow node respresenting the argument of `call` indicated by `arg`. */
+ Node argumentOf(Node call, SummaryComponent arg);
+
+ /** Gets a dataflow node respresenting the parameter of `callable` indicated by `param`. */
+ Node parameterOf(Node callable, SummaryComponent param);
+
+ /** Gets a dataflow node respresenting the return of `callable` indicated by `return`. */
+ Node returnOf(Node callable, SummaryComponent return);
+
+ // Relating callables to nodes
+ /** Gets a dataflow node respresenting a call to `callable`. */
+ Node callTo(SummarizedCallable callable);
+}
+
+/**
+ * The predicates provided by a summary type tracker.
+ * These are meant to be used in `TypeTrackerSpecific.qll`
+ * inside the predicates of the same names.
+ */
+signature module Output {
+ /**
+ * Holds if there is a level step from `nodeFrom` to `nodeTo`, which does not depend on the call graph.
+ */
+ predicate levelStepNoCall(I::Node nodeFrom, I::Node nodeTo);
+
+ /**
+ * Holds if `nodeTo` is the result of accessing the `content` content of `nodeFrom`.
+ */
+ predicate basicLoadStep(I::Node nodeFrom, I::Node nodeTo, I::TypeTrackerContent content);
+
+ /**
+ * Holds if `nodeFrom` is being written to the `content` content of the object in `nodeTo`.
+ */
+ predicate basicStoreStep(I::Node nodeFrom, I::Node nodeTo, I::TypeTrackerContent content);
+
+ /**
+ * Holds if the `loadContent` of `nodeFrom` is stored in the `storeContent` of `nodeTo`.
+ */
+ predicate basicLoadStoreStep(
+ I::Node nodeFrom, I::Node nodeTo, I::TypeTrackerContent loadContent,
+ I::TypeTrackerContent storeContent
+ );
+
+ /**
+ * Holds if type-tracking should step from `nodeFrom` to `nodeTo` but block flow of contents matched by `filter` through here.
+ */
+ predicate basicWithoutContentStep(
+ I::Node nodeFrom, I::Node nodeTo, I::TypeTrackerContentFilter filter
+ );
+
+ /**
+ * Holds if type-tracking should step from `nodeFrom` to `nodeTo` if inside a content matched by `filter`.
+ */
+ predicate basicWithContentStep(
+ I::Node nodeFrom, I::Node nodeTo, I::TypeTrackerContentFilter filter
+ );
+}
+
+/**
+ * Implementation of the summary type tracker, that is type tracking through flow summaries.
+ */
+module SummaryFlow implements Output {
+ pragma[nomagic]
+ private predicate isNonLocal(I::SummaryComponent component) {
+ component = I::content(_)
+ or
+ component = I::withContent(_)
+ }
+
+ pragma[nomagic]
+ private predicate hasLoadSummary(
+ I::SummarizedCallable callable, I::TypeTrackerContent contents, I::SummaryComponentStack input,
+ I::SummaryComponentStack output
+ ) {
+ callable.propagatesFlow(I::push(I::content(contents), input), output, true) and
+ not isNonLocal(input.head()) and
+ not isNonLocal(output.head())
+ }
+
+ pragma[nomagic]
+ private predicate hasStoreSummary(
+ I::SummarizedCallable callable, I::TypeTrackerContent contents, I::SummaryComponentStack input,
+ I::SummaryComponentStack output
+ ) {
+ not isNonLocal(input.head()) and
+ not isNonLocal(output.head()) and
+ (
+ callable.propagatesFlow(input, I::push(I::content(contents), output), true)
+ or
+ // Allow the input to start with an arbitrary WithoutContent[X].
+ // Since type-tracking only tracks one content deep, and we're about to store into another content,
+ // we're already preventing the input from being in a content.
+ callable
+ .propagatesFlow(I::push(I::withoutContent(_), input),
+ I::push(I::content(contents), output), true)
+ )
+ }
+
+ pragma[nomagic]
+ private predicate hasLoadStoreSummary(
+ I::SummarizedCallable callable, I::TypeTrackerContent loadContents,
+ I::TypeTrackerContent storeContents, I::SummaryComponentStack input,
+ I::SummaryComponentStack output
+ ) {
+ callable
+ .propagatesFlow(I::push(I::content(loadContents), input),
+ I::push(I::content(storeContents), output), true) and
+ not isNonLocal(input.head()) and
+ not isNonLocal(output.head())
+ }
+
+ pragma[nomagic]
+ private predicate hasWithoutContentSummary(
+ I::SummarizedCallable callable, I::TypeTrackerContentFilter filter,
+ I::SummaryComponentStack input, I::SummaryComponentStack output
+ ) {
+ exists(I::TypeTrackerContent content |
+ callable.propagatesFlow(I::push(I::withoutContent(content), input), output, true) and
+ filter = I::getFilterFromWithoutContentStep(content) and
+ not isNonLocal(input.head()) and
+ not isNonLocal(output.head()) and
+ input != output
+ )
+ }
+
+ pragma[nomagic]
+ private predicate hasWithContentSummary(
+ I::SummarizedCallable callable, I::TypeTrackerContentFilter filter,
+ I::SummaryComponentStack input, I::SummaryComponentStack output
+ ) {
+ exists(I::TypeTrackerContent content |
+ callable.propagatesFlow(I::push(I::withContent(content), input), output, true) and
+ filter = I::getFilterFromWithContentStep(content) and
+ not isNonLocal(input.head()) and
+ not isNonLocal(output.head()) and
+ input != output
+ )
+ }
+
+ private predicate componentLevelStep(I::SummaryComponent component) {
+ exists(I::TypeTrackerContent content |
+ component = I::withoutContent(content) and
+ not exists(I::getFilterFromWithoutContentStep(content))
+ )
+ }
+
+ /**
+ * Gets a data flow `I::Node` corresponding an argument or return value of `call`,
+ * as specified by `component`.
+ */
+ bindingset[call, component]
+ private I::Node evaluateSummaryComponentLocal(I::Node call, I::SummaryComponent component) {
+ result = I::argumentOf(call, component)
+ or
+ component = I::return() and
+ result = call
+ }
+
+ /**
+ * Holds if `callable` is relevant for type-tracking and we therefore want `stack` to
+ * be evaluated locally at its call sites.
+ */
+ pragma[nomagic]
+ private predicate dependsOnSummaryComponentStack(
+ I::SummarizedCallable callable, I::SummaryComponentStack stack
+ ) {
+ exists(I::callTo(callable)) and
+ (
+ callable.propagatesFlow(stack, _, true)
+ or
+ callable.propagatesFlow(_, stack, true)
+ or
+ // include store summaries as they may skip an initial step at the input
+ hasStoreSummary(callable, _, stack, _)
+ )
+ or
+ dependsOnSummaryComponentStackCons(callable, _, stack)
+ }
+
+ pragma[nomagic]
+ private predicate dependsOnSummaryComponentStackCons(
+ I::SummarizedCallable callable, I::SummaryComponent head, I::SummaryComponentStack tail
+ ) {
+ dependsOnSummaryComponentStack(callable, I::push(head, tail))
+ }
+
+ pragma[nomagic]
+ private predicate dependsOnSummaryComponentStackConsLocal(
+ I::SummarizedCallable callable, I::SummaryComponent head, I::SummaryComponentStack tail
+ ) {
+ dependsOnSummaryComponentStackCons(callable, head, tail) and
+ not isNonLocal(head)
+ }
+
+ pragma[nomagic]
+ private predicate dependsOnSummaryComponentStackLeaf(
+ I::SummarizedCallable callable, I::SummaryComponent leaf
+ ) {
+ dependsOnSummaryComponentStack(callable, I::singleton(leaf))
+ }
+
+ /**
+ * Gets a data flow I::Node corresponding to the local input or output of `call`
+ * identified by `stack`, if possible.
+ */
+ pragma[nomagic]
+ private I::Node evaluateSummaryComponentStackLocal(
+ I::SummarizedCallable callable, I::Node call, I::SummaryComponentStack stack
+ ) {
+ exists(I::SummaryComponent component |
+ dependsOnSummaryComponentStackLeaf(callable, component) and
+ stack = I::singleton(component) and
+ call = I::callTo(callable) and
+ result = evaluateSummaryComponentLocal(call, component)
+ )
+ or
+ exists(I::Node prev, I::SummaryComponent head, I::SummaryComponentStack tail |
+ prev = evaluateSummaryComponentStackLocal(callable, call, tail) and
+ dependsOnSummaryComponentStackConsLocal(callable, pragma[only_bind_into](head),
+ pragma[only_bind_out](tail)) and
+ stack = I::push(pragma[only_bind_out](head), pragma[only_bind_out](tail))
+ |
+ result = I::parameterOf(prev, head)
+ or
+ result = I::returnOf(prev, head)
+ or
+ componentLevelStep(head) and
+ result = prev
+ )
+ }
+
+ // Implement Output
+ predicate levelStepNoCall(I::Node nodeFrom, I::Node nodeTo) {
+ exists(
+ I::SummarizedCallable callable, I::Node call, I::SummaryComponentStack input,
+ I::SummaryComponentStack output
+ |
+ callable.propagatesFlow(input, output, true) and
+ call = I::callTo(callable) and
+ nodeFrom = evaluateSummaryComponentStackLocal(callable, call, input) and
+ nodeTo = evaluateSummaryComponentStackLocal(callable, call, output)
+ )
+ }
+
+ predicate basicLoadStep(I::Node nodeFrom, I::Node nodeTo, I::TypeTrackerContent content) {
+ exists(
+ I::SummarizedCallable callable, I::Node call, I::SummaryComponentStack input,
+ I::SummaryComponentStack output
+ |
+ hasLoadSummary(callable, content, pragma[only_bind_into](input),
+ pragma[only_bind_into](output)) and
+ call = I::callTo(callable) and
+ nodeFrom = evaluateSummaryComponentStackLocal(callable, call, input) and
+ nodeTo = evaluateSummaryComponentStackLocal(callable, call, output)
+ )
+ }
+
+ predicate basicStoreStep(I::Node nodeFrom, I::Node nodeTo, I::TypeTrackerContent content) {
+ exists(
+ I::SummarizedCallable callable, I::Node call, I::SummaryComponentStack input,
+ I::SummaryComponentStack output
+ |
+ hasStoreSummary(callable, content, pragma[only_bind_into](input),
+ pragma[only_bind_into](output)) and
+ call = I::callTo(callable) and
+ nodeFrom = evaluateSummaryComponentStackLocal(callable, call, input) and
+ nodeTo = evaluateSummaryComponentStackLocal(callable, call, output)
+ )
+ }
+
+ predicate basicLoadStoreStep(
+ I::Node nodeFrom, I::Node nodeTo, I::TypeTrackerContent loadContent,
+ I::TypeTrackerContent storeContent
+ ) {
+ exists(
+ I::SummarizedCallable callable, I::Node call, I::SummaryComponentStack input,
+ I::SummaryComponentStack output
+ |
+ hasLoadStoreSummary(callable, loadContent, storeContent, pragma[only_bind_into](input),
+ pragma[only_bind_into](output)) and
+ call = I::callTo(callable) and
+ nodeFrom = evaluateSummaryComponentStackLocal(callable, call, input) and
+ nodeTo = evaluateSummaryComponentStackLocal(callable, call, output)
+ )
+ }
+
+ predicate basicWithoutContentStep(
+ I::Node nodeFrom, I::Node nodeTo, I::TypeTrackerContentFilter filter
+ ) {
+ exists(
+ I::SummarizedCallable callable, I::Node call, I::SummaryComponentStack input,
+ I::SummaryComponentStack output
+ |
+ hasWithoutContentSummary(callable, filter, pragma[only_bind_into](input),
+ pragma[only_bind_into](output)) and
+ call = I::callTo(callable) and
+ nodeFrom = evaluateSummaryComponentStackLocal(callable, call, input) and
+ nodeTo = evaluateSummaryComponentStackLocal(callable, call, output)
+ )
+ }
+
+ predicate basicWithContentStep(
+ I::Node nodeFrom, I::Node nodeTo, I::TypeTrackerContentFilter filter
+ ) {
+ exists(
+ I::SummarizedCallable callable, I::Node call, I::SummaryComponentStack input,
+ I::SummaryComponentStack output
+ |
+ hasWithContentSummary(callable, filter, pragma[only_bind_into](input),
+ pragma[only_bind_into](output)) and
+ call = I::callTo(callable) and
+ nodeFrom = evaluateSummaryComponentStackLocal(callable, call, input) and
+ nodeTo = evaluateSummaryComponentStackLocal(callable, call, output)
+ )
+ }
+}
diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackerSpecific.qll b/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackerSpecific.qll
index 9e05b7869c5..bac194aae9e 100644
--- a/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackerSpecific.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackerSpecific.qll
@@ -61,7 +61,9 @@ predicate capturedJumpStep(Node nodeFrom, Node nodeTo) {
predicate levelStepCall(Node nodeFrom, Node nodeTo) { none() }
/** Holds if there is a level step from `nodeFrom` to `nodeTo`, which does not depend on the call graph. */
-predicate levelStepNoCall(Node nodeFrom, Node nodeTo) { none() }
+predicate levelStepNoCall(Node nodeFrom, Node nodeTo) {
+ TypeTrackerSummaryFlow::levelStepNoCall(nodeFrom, nodeTo)
+}
/**
* Gets the name of a possible piece of content. For Python, this is currently only attribute names,
@@ -108,6 +110,12 @@ predicate basicStoreStep(Node nodeFrom, Node nodeTo, string content) {
nodeFrom = a.getValue() and
nodeTo = a.getObject()
)
+ or
+ exists(DataFlowPublic::ContentSet contents |
+ contents.(DataFlowPublic::AttributeContent).getAttribute() = content
+ |
+ TypeTrackerSummaryFlow::basicStoreStep(nodeFrom, nodeTo, contents)
+ )
}
/**
@@ -119,13 +127,24 @@ predicate basicLoadStep(Node nodeFrom, Node nodeTo, string content) {
nodeFrom = a.getObject() and
nodeTo = a
)
+ or
+ exists(DataFlowPublic::ContentSet contents |
+ contents.(DataFlowPublic::AttributeContent).getAttribute() = content
+ |
+ TypeTrackerSummaryFlow::basicLoadStep(nodeFrom, nodeTo, contents)
+ )
}
/**
* Holds if the `loadContent` of `nodeFrom` is stored in the `storeContent` of `nodeTo`.
*/
predicate basicLoadStoreStep(Node nodeFrom, Node nodeTo, string loadContent, string storeContent) {
- none()
+ exists(DataFlowPublic::ContentSet loadContents, DataFlowPublic::ContentSet storeContents |
+ loadContents.(DataFlowPublic::AttributeContent).getAttribute() = loadContent and
+ storeContents.(DataFlowPublic::AttributeContent).getAttribute() = storeContent
+ |
+ TypeTrackerSummaryFlow::basicLoadStoreStep(nodeFrom, nodeTo, loadContents, storeContents)
+ )
}
/**
@@ -144,3 +163,93 @@ predicate basicWithContentStep(Node nodeFrom, Node nodeTo, ContentFilter filter)
class Boolean extends boolean {
Boolean() { this = true or this = false }
}
+
+private import SummaryTypeTracker as SummaryTypeTracker
+private import semmle.python.dataflow.new.FlowSummary as FlowSummary
+private import semmle.python.dataflow.new.internal.DataFlowDispatch as DataFlowDispatch
+
+pragma[noinline]
+private predicate argumentPositionMatch(
+ DataFlowPublic::CallCfgNode call, DataFlowPublic::Node arg,
+ DataFlowDispatch::ParameterPosition ppos
+) {
+ exists(DataFlowDispatch::ArgumentPosition apos |
+ DataFlowDispatch::parameterMatch(ppos, apos) and
+ DataFlowDispatch::normalCallArg(call.getNode(), arg, apos)
+ )
+}
+
+private module SummaryTypeTrackerInput implements SummaryTypeTracker::Input {
+ // Dataflow nodes
+ class Node = DataFlowPublic::Node;
+
+ // Content
+ class TypeTrackerContent = DataFlowPublic::ContentSet;
+
+ class TypeTrackerContentFilter = ContentFilter;
+
+ TypeTrackerContentFilter getFilterFromWithoutContentStep(TypeTrackerContent content) { none() }
+
+ TypeTrackerContentFilter getFilterFromWithContentStep(TypeTrackerContent content) { none() }
+
+ // Callables
+ class SummarizedCallable = FlowSummary::SummarizedCallable;
+
+ // Summaries and their stacks
+ class SummaryComponent = FlowSummary::SummaryComponent;
+
+ class SummaryComponentStack = FlowSummary::SummaryComponentStack;
+
+ predicate singleton = FlowSummary::SummaryComponentStack::singleton/1;
+
+ predicate push = FlowSummary::SummaryComponentStack::push/2;
+
+ // Relating content to summaries
+ predicate content = FlowSummary::SummaryComponent::content/1;
+
+ SummaryComponent withoutContent(TypeTrackerContent contents) { none() }
+
+ SummaryComponent withContent(TypeTrackerContent contents) { none() }
+
+ predicate return = FlowSummary::SummaryComponent::return/0;
+
+ // Relating nodes to summaries
+ Node argumentOf(Node call, SummaryComponent arg) {
+ exists(DataFlowDispatch::ParameterPosition pos |
+ arg = FlowSummary::SummaryComponent::argument(pos) and
+ argumentPositionMatch(call, result, pos)
+ )
+ }
+
+ Node parameterOf(Node callable, SummaryComponent param) {
+ exists(
+ DataFlowDispatch::ArgumentPosition apos, DataFlowDispatch::ParameterPosition ppos, Parameter p
+ |
+ param = FlowSummary::SummaryComponent::parameter(apos) and
+ DataFlowDispatch::parameterMatch(ppos, apos) and
+ // pick the SsaNode rather than the CfgNode
+ result.asVar().getDefinition().(ParameterDefinition).getParameter() = p and
+ (
+ exists(int i | ppos.isPositional(i) |
+ p = callable.getALocalSource().asExpr().(CallableExpr).getInnerScope().getArg(i)
+ )
+ or
+ exists(string name | ppos.isKeyword(name) |
+ p = callable.getALocalSource().asExpr().(CallableExpr).getInnerScope().getArgByName(name)
+ )
+ )
+ )
+ }
+
+ Node returnOf(Node callable, SummaryComponent return) {
+ return = FlowSummary::SummaryComponent::return() and
+ // `result` should be the return value of a callable expression (lambda or function) referenced by `callable`
+ result.asCfgNode() =
+ callable.getALocalSource().asExpr().(CallableExpr).getInnerScope().getAReturnValueFlowNode()
+ }
+
+ // Relating callables to nodes
+ Node callTo(SummarizedCallable callable) { result = callable.getACallSimple() }
+}
+
+private module TypeTrackerSummaryFlow = SummaryTypeTracker::SummaryFlow;
diff --git a/python/ql/test/experimental/dataflow/summaries/TestSummaries.qll b/python/ql/test/experimental/dataflow/summaries/TestSummaries.qll
index 971a653c469..cdd61420bbb 100644
--- a/python/ql/test/experimental/dataflow/summaries/TestSummaries.qll
+++ b/python/ql/test/experimental/dataflow/summaries/TestSummaries.qll
@@ -60,7 +60,7 @@ private class SummarizedCallableApplyLambda extends SummarizedCallable {
}
private class SummarizedCallableReversed extends SummarizedCallable {
- SummarizedCallableReversed() { this = "reversed" }
+ SummarizedCallableReversed() { this = "list_reversed" }
override DataFlow::CallCfgNode getACall() {
result.getFunction().asCfgNode().(NameNode).getId() = this
diff --git a/python/ql/test/experimental/dataflow/summaries/summaries.py b/python/ql/test/experimental/dataflow/summaries/summaries.py
index 1532a5393d8..806b39f6dc8 100644
--- a/python/ql/test/experimental/dataflow/summaries/summaries.py
+++ b/python/ql/test/experimental/dataflow/summaries/summaries.py
@@ -66,3 +66,21 @@ SINK(tainted_list[0]) # $ flow="SOURCE, l:-1 -> tainted_list[0]"
from json import loads as json_loads
tainted_resultlist = json_loads(SOURCE)
SINK(tainted_resultlist[0]) # $ flow="SOURCE, l:-1 -> tainted_resultlist[0]"
+
+
+# Class methods are not handled right now
+
+class MyClass:
+ @staticmethod
+ def foo(x):
+ return x
+
+ def bar(self, x):
+ return x
+
+through_staticmethod = apply_lambda(MyClass.foo, SOURCE)
+through_staticmethod # $ MISSING: flow
+
+mc = MyClass()
+through_method = apply_lambda(mc.bar, SOURCE)
+through_method # $ MISSING: flow
diff --git a/python/ql/test/experimental/dataflow/typetracking-summaries/TestSummaries.qll b/python/ql/test/experimental/dataflow/typetracking-summaries/TestSummaries.qll
new file mode 100644
index 00000000000..8d626b332a3
--- /dev/null
+++ b/python/ql/test/experimental/dataflow/typetracking-summaries/TestSummaries.qll
@@ -0,0 +1,189 @@
+private import python
+private import semmle.python.dataflow.new.FlowSummary
+private import semmle.python.ApiGraphs
+
+/**
+ * This module ensures that the `callStep` predicate in
+ * our type tracker implementation does not refer to the
+ * `getACall` predicate on `SummarizedCallable`.
+ */
+module RecursionGuard {
+ private import semmle.python.dataflow.new.internal.TypeTrackerSpecific as TT
+
+ private class RecursionGuard extends SummarizedCallable {
+ RecursionGuard() { this = "TypeTrackingSummariesRecursionGuard" }
+
+ override DataFlow::CallCfgNode getACall() {
+ result.getFunction().asCfgNode().(NameNode).getId() = this and
+ (TT::callStep(_, _) implies any())
+ }
+
+ override DataFlow::CallCfgNode getACallSimple() { none() }
+
+ override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this }
+ }
+
+ predicate test(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
+ TT::levelStepNoCall(nodeFrom, nodeTo)
+ }
+}
+
+private class SummarizedCallableIdentity extends SummarizedCallable {
+ SummarizedCallableIdentity() { this = "TTS_identity" }
+
+ override DataFlow::CallCfgNode getACall() { none() }
+
+ override DataFlow::CallCfgNode getACallSimple() {
+ result.getFunction().asCfgNode().(NameNode).getId() = this
+ }
+
+ override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this }
+
+ override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
+ input = "Argument[0]" and
+ output = "ReturnValue" and
+ preservesValue = true
+ }
+}
+
+// For lambda flow to work, implement lambdaCall and lambdaCreation
+private class SummarizedCallableApplyLambda extends SummarizedCallable {
+ SummarizedCallableApplyLambda() { this = "TTS_apply_lambda" }
+
+ override DataFlow::CallCfgNode getACall() { none() }
+
+ override DataFlow::CallCfgNode getACallSimple() {
+ result.getFunction().asCfgNode().(NameNode).getId() = this
+ }
+
+ override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this }
+
+ override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
+ input = "Argument[1]" and
+ output = "Argument[0].Parameter[0]" and
+ preservesValue = true
+ or
+ input = "Argument[0].ReturnValue" and
+ output = "ReturnValue" and
+ preservesValue = true
+ }
+}
+
+private class SummarizedCallableReversed extends SummarizedCallable {
+ SummarizedCallableReversed() { this = "TTS_reversed" }
+
+ override DataFlow::CallCfgNode getACall() { none() }
+
+ override DataFlow::CallCfgNode getACallSimple() {
+ result.getFunction().asCfgNode().(NameNode).getId() = this
+ }
+
+ override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this }
+
+ override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
+ input = "Argument[0].ListElement" and
+ output = "ReturnValue.ListElement" and
+ preservesValue = true
+ }
+}
+
+private class SummarizedCallableMap extends SummarizedCallable {
+ SummarizedCallableMap() { this = "TTS_list_map" }
+
+ override DataFlow::CallCfgNode getACall() { none() }
+
+ override DataFlow::CallCfgNode getACallSimple() {
+ result.getFunction().asCfgNode().(NameNode).getId() = this
+ }
+
+ override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this }
+
+ override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
+ input = "Argument[1].ListElement" and
+ output = "Argument[0].Parameter[0]" and
+ preservesValue = true
+ or
+ input = "Argument[0].ReturnValue" and
+ output = "ReturnValue.ListElement" and
+ preservesValue = true
+ }
+}
+
+private class SummarizedCallableAppend extends SummarizedCallable {
+ SummarizedCallableAppend() { this = "TTS_append_to_list" }
+
+ override DataFlow::CallCfgNode getACall() { none() }
+
+ override DataFlow::CallCfgNode getACallSimple() {
+ result.getFunction().asCfgNode().(NameNode).getId() = this
+ }
+
+ override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this }
+
+ override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
+ input = "Argument[0]" and
+ output = "ReturnValue" and
+ preservesValue = false
+ or
+ input = "Argument[1]" and
+ output = "ReturnValue.ListElement" and
+ preservesValue = true
+ }
+}
+
+private class SummarizedCallableJsonLoads extends SummarizedCallable {
+ SummarizedCallableJsonLoads() { this = "TTS_json.loads" }
+
+ override DataFlow::CallCfgNode getACall() {
+ result = API::moduleImport("json").getMember("loads").getACall()
+ }
+
+ override DataFlow::CallCfgNode getACallSimple() { none() }
+
+ override DataFlow::ArgumentNode getACallback() {
+ result = API::moduleImport("json").getMember("loads").getAValueReachableFromSource()
+ }
+
+ override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
+ input = "Argument[0]" and
+ output = "ReturnValue.ListElement" and
+ preservesValue = true
+ }
+}
+
+// read and store
+private class SummarizedCallableReadSecret extends SummarizedCallable {
+ SummarizedCallableReadSecret() { this = "TTS_read_secret" }
+
+ override DataFlow::CallCfgNode getACall() { none() }
+
+ override DataFlow::CallCfgNode getACallSimple() {
+ result.getFunction().asCfgNode().(NameNode).getId() = this
+ }
+
+ override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this }
+
+ override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
+ input = "Argument[0].Attribute[secret]" and
+ output = "ReturnValue" and
+ preservesValue = true
+ }
+}
+
+private class SummarizedCallableSetSecret extends SummarizedCallable {
+ SummarizedCallableSetSecret() { this = "TTS_set_secret" }
+
+ override DataFlow::CallCfgNode getACall() { none() }
+
+ override DataFlow::CallCfgNode getACallSimple() {
+ result.getFunction().asCfgNode().(NameNode).getId() = this
+ }
+
+ override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this }
+
+ override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
+ input = "Argument[1]" and
+ output = "Argument[0].Attribute[secret]" and
+ preservesValue = true
+ }
+}
diff --git a/python/ql/test/experimental/dataflow/typetracking-summaries/summaries.py b/python/ql/test/experimental/dataflow/typetracking-summaries/summaries.py
new file mode 100644
index 00000000000..f838032b063
--- /dev/null
+++ b/python/ql/test/experimental/dataflow/typetracking-summaries/summaries.py
@@ -0,0 +1,78 @@
+import sys
+import os
+
+# Simple summary
+tainted = TTS_identity(tracked) # $ tracked
+tainted # $ tracked
+
+# Lambda summary
+# I think the missing result is expected because type tracking
+# is not allowed to flow back out of a call.
+tainted_lambda = TTS_apply_lambda(lambda x: x, tracked) # $ tracked
+tainted_lambda # $ MISSING: tracked
+
+# A lambda that directly introduces taint
+bad_lambda = TTS_apply_lambda(lambda x: tracked, 1) # $ tracked
+bad_lambda # $ tracked
+
+# A lambda that breaks the flow
+untainted_lambda = TTS_apply_lambda(lambda x: 1, tracked) # $ tracked
+untainted_lambda
+
+# Collection summaries
+tainted_list = TTS_reversed([tracked]) # $ tracked
+tl = tainted_list[0]
+tl # $ MISSING: tracked
+
+# Complex summaries
+def add_colon(x):
+ return x + ":"
+
+tainted_mapped = TTS_list_map(add_colon, [tracked]) # $ tracked
+tm = tainted_mapped[0]
+tm # $ MISSING: tracked
+
+def explicit_identity(x):
+ return x
+
+tainted_mapped_explicit = TTS_list_map(explicit_identity, [tracked]) # $ tracked
+tainted_mapped_explicit[0] # $ MISSING: tracked
+
+tainted_mapped_summary = TTS_list_map(identity, [tracked]) # $ tracked
+tms = tainted_mapped_summary[0]
+tms # $ MISSING: tracked
+
+another_tainted_list = TTS_append_to_list([], tracked) # $ tracked
+atl = another_tainted_list[0]
+atl # $ MISSING: tracked
+
+# This will not work, as the call is not found by `getACallSimple`.
+from json import loads as json_loads
+tainted_resultlist = json_loads(tracked) # $ tracked
+tr = tainted_resultlist[0]
+tr # $ MISSING: tracked
+
+x.secret = tracked # $ tracked=secret tracked
+r = TTS_read_secret(x) # $ tracked=secret tracked
+r # $ tracked
+
+y # $ tracked=secret
+TTS_set_secret(y, tracked) # $ tracked tracked=secret
+y.secret # $ tracked tracked=secret
+
+# Class methods are not handled right now
+
+class MyClass:
+ @staticmethod
+ def foo(x):
+ return x
+
+ def bar(self, x):
+ return x
+
+through_staticmethod = TTS_apply_lambda(MyClass.foo, tracked) # $ tracked
+through_staticmethod # $ MISSING: tracked
+
+mc = MyClass()
+through_method = TTS_apply_lambda(mc.bar, tracked) # $ tracked
+through_method # $ MISSING: tracked
diff --git a/python/ql/test/experimental/dataflow/typetracking-summaries/tracked.expected b/python/ql/test/experimental/dataflow/typetracking-summaries/tracked.expected
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/python/ql/test/experimental/dataflow/typetracking-summaries/tracked.ql b/python/ql/test/experimental/dataflow/typetracking-summaries/tracked.ql
new file mode 100644
index 00000000000..e5bf62053a0
--- /dev/null
+++ b/python/ql/test/experimental/dataflow/typetracking-summaries/tracked.ql
@@ -0,0 +1,36 @@
+import python
+import semmle.python.dataflow.new.DataFlow
+import semmle.python.dataflow.new.TypeTracker
+import TestUtilities.InlineExpectationsTest
+import semmle.python.ApiGraphs
+import TestSummaries
+
+// -----------------------------------------------------------------------------
+// tracked
+// -----------------------------------------------------------------------------
+private DataFlow::TypeTrackingNode tracked(TypeTracker t) {
+ t.start() and
+ result.asCfgNode() = any(NameNode n | n.getId() = "tracked")
+ or
+ exists(TypeTracker t2 | result = tracked(t2).track(t2, t))
+}
+
+class TrackedTest extends InlineExpectationsTest {
+ TrackedTest() { this = "TrackedTest" }
+
+ override string getARelevantTag() { result = "tracked" }
+
+ override predicate hasActualResult(Location location, string element, string tag, string value) {
+ exists(DataFlow::Node e, TypeTracker t |
+ exists(e.getLocation().getFile().getRelativePath()) and
+ e.getLocation().getStartLine() > 0 and
+ tracked(t).flowsTo(e) and
+ // Module variables have no sensible location, and hence can't be annotated.
+ not e instanceof DataFlow::ModuleVariableNode and
+ tag = "tracked" and
+ location = e.getLocation() and
+ value = t.getAttr() and
+ element = e.toString()
+ )
+ }
+}
diff --git a/ruby/ql/lib/codeql/ruby/typetracking/TypeTrackerSpecific.qll b/ruby/ql/lib/codeql/ruby/typetracking/TypeTrackerSpecific.qll
index 7725d201fba..d14c182d127 100644
--- a/ruby/ql/lib/codeql/ruby/typetracking/TypeTrackerSpecific.qll
+++ b/ruby/ql/lib/codeql/ruby/typetracking/TypeTrackerSpecific.qll
@@ -112,15 +112,7 @@ predicate levelStepCall(Node nodeFrom, Node nodeTo) {
/** Holds if there is a level step from `nodeFrom` to `nodeTo`, which does not depend on the call graph. */
pragma[nomagic]
predicate levelStepNoCall(Node nodeFrom, Node nodeTo) {
- exists(
- SummarizedCallable callable, DataFlowPublic::CallNode call, SummaryComponentStack input,
- SummaryComponentStack output
- |
- callable.propagatesFlow(input, output, true) and
- call.asExpr().getExpr() = callable.getACallSimple() and
- nodeFrom = evaluateSummaryComponentStackLocal(callable, call, input) and
- nodeTo = evaluateSummaryComponentStackLocal(callable, call, output)
- )
+ TypeTrackerSummaryFlow::levelStepNoCall(nodeFrom, nodeTo)
or
localFieldStep(nodeFrom, nodeTo)
}
@@ -276,16 +268,7 @@ predicate returnStep(Node nodeFrom, Node nodeTo) {
predicate basicStoreStep(Node nodeFrom, Node nodeTo, DataFlow::ContentSet contents) {
storeStepIntoSourceNode(nodeFrom, nodeTo, contents)
or
- exists(
- SummarizedCallable callable, DataFlowPublic::CallNode call, SummaryComponentStack input,
- SummaryComponentStack output
- |
- hasStoreSummary(callable, contents, pragma[only_bind_into](input),
- pragma[only_bind_into](output)) and
- call.asExpr().getExpr() = callable.getACallSimple() and
- nodeFrom = evaluateSummaryComponentStackLocal(callable, call, input) and
- nodeTo = evaluateSummaryComponentStackLocal(callable, call, output)
- )
+ TypeTrackerSummaryFlow::basicStoreStep(nodeFrom, nodeTo, contents)
}
/**
@@ -319,15 +302,7 @@ predicate basicLoadStep(Node nodeFrom, Node nodeTo, DataFlow::ContentSet content
nodeTo.asExpr() = call
)
or
- exists(
- SummarizedCallable callable, DataFlowPublic::CallNode call, SummaryComponentStack input,
- SummaryComponentStack output
- |
- hasLoadSummary(callable, contents, pragma[only_bind_into](input), pragma[only_bind_into](output)) and
- call.asExpr().getExpr() = callable.getACallSimple() and
- nodeFrom = evaluateSummaryComponentStackLocal(callable, call, input) and
- nodeTo = evaluateSummaryComponentStackLocal(callable, call, output)
- )
+ TypeTrackerSummaryFlow::basicLoadStep(nodeFrom, nodeTo, contents)
}
/**
@@ -336,48 +311,21 @@ predicate basicLoadStep(Node nodeFrom, Node nodeTo, DataFlow::ContentSet content
predicate basicLoadStoreStep(
Node nodeFrom, Node nodeTo, DataFlow::ContentSet loadContent, DataFlow::ContentSet storeContent
) {
- exists(
- SummarizedCallable callable, DataFlowPublic::CallNode call, SummaryComponentStack input,
- SummaryComponentStack output
- |
- hasLoadStoreSummary(callable, loadContent, storeContent, pragma[only_bind_into](input),
- pragma[only_bind_into](output)) and
- call.asExpr().getExpr() = callable.getACallSimple() and
- nodeFrom = evaluateSummaryComponentStackLocal(callable, call, input) and
- nodeTo = evaluateSummaryComponentStackLocal(callable, call, output)
- )
+ TypeTrackerSummaryFlow::basicLoadStoreStep(nodeFrom, nodeTo, loadContent, storeContent)
}
/**
* Holds if type-tracking should step from `nodeFrom` to `nodeTo` but block flow of contents matched by `filter` through here.
*/
predicate basicWithoutContentStep(Node nodeFrom, Node nodeTo, ContentFilter filter) {
- exists(
- SummarizedCallable callable, DataFlowPublic::CallNode call, SummaryComponentStack input,
- SummaryComponentStack output
- |
- hasWithoutContentSummary(callable, filter, pragma[only_bind_into](input),
- pragma[only_bind_into](output)) and
- call.asExpr().getExpr() = callable.getACallSimple() and
- nodeFrom = evaluateSummaryComponentStackLocal(callable, call, input) and
- nodeTo = evaluateSummaryComponentStackLocal(callable, call, output)
- )
+ TypeTrackerSummaryFlow::basicWithoutContentStep(nodeFrom, nodeTo, filter)
}
/**
* Holds if type-tracking should step from `nodeFrom` to `nodeTo` if inside a content matched by `filter`.
*/
predicate basicWithContentStep(Node nodeFrom, Node nodeTo, ContentFilter filter) {
- exists(
- SummarizedCallable callable, DataFlowPublic::CallNode call, SummaryComponentStack input,
- SummaryComponentStack output
- |
- hasWithContentSummary(callable, filter, pragma[only_bind_into](input),
- pragma[only_bind_into](output)) and
- call.asExpr().getExpr() = callable.getACallSimple() and
- nodeFrom = evaluateSummaryComponentStackLocal(callable, call, input) and
- nodeTo = evaluateSummaryComponentStackLocal(callable, call, output)
- )
+ TypeTrackerSummaryFlow::basicWithContentStep(nodeFrom, nodeTo, filter)
}
/**
@@ -389,121 +337,6 @@ class Boolean extends boolean {
private import SummaryComponentStack
-pragma[nomagic]
-private predicate hasStoreSummary(
- SummarizedCallable callable, DataFlow::ContentSet contents, SummaryComponentStack input,
- SummaryComponentStack output
-) {
- not isNonLocal(input.head()) and
- not isNonLocal(output.head()) and
- (
- callable.propagatesFlow(input, push(SummaryComponent::content(contents), output), true)
- or
- // Allow the input to start with an arbitrary WithoutContent[X].
- // Since type-tracking only tracks one content deep, and we're about to store into another content,
- // we're already preventing the input from being in a content.
- callable
- .propagatesFlow(push(SummaryComponent::withoutContent(_), input),
- push(SummaryComponent::content(contents), output), true)
- )
-}
-
-pragma[nomagic]
-private predicate hasLoadSummary(
- SummarizedCallable callable, DataFlow::ContentSet contents, SummaryComponentStack input,
- SummaryComponentStack output
-) {
- callable.propagatesFlow(push(SummaryComponent::content(contents), input), output, true) and
- not isNonLocal(input.head()) and
- not isNonLocal(output.head())
-}
-
-pragma[nomagic]
-private predicate hasLoadStoreSummary(
- SummarizedCallable callable, DataFlow::ContentSet loadContents,
- DataFlow::ContentSet storeContents, SummaryComponentStack input, SummaryComponentStack output
-) {
- callable
- .propagatesFlow(push(SummaryComponent::content(loadContents), input),
- push(SummaryComponent::content(storeContents), output), true) and
- not isNonLocal(input.head()) and
- not isNonLocal(output.head())
-}
-
-/**
- * Gets a content filter to use for a `WithoutContent[content]` step, or has no result if
- * the step should be treated as ordinary flow.
- *
- * `WithoutContent` is often used to perform strong updates on individual collection elements, but for
- * type-tracking this is rarely beneficial and quite expensive. However, `WithoutContent` can be quite useful
- * for restricting the type of an object, and in these cases we translate it to a filter.
- */
-private ContentFilter getFilterFromWithoutContentStep(DataFlow::ContentSet content) {
- (
- content.isAnyElement()
- or
- content.isElementLowerBoundOrUnknown(_)
- or
- content.isElementOfTypeOrUnknown(_)
- or
- content.isSingleton(any(DataFlow::Content::UnknownElementContent c))
- ) and
- result = MkElementFilter()
-}
-
-pragma[nomagic]
-private predicate hasWithoutContentSummary(
- SummarizedCallable callable, ContentFilter filter, SummaryComponentStack input,
- SummaryComponentStack output
-) {
- exists(DataFlow::ContentSet content |
- callable.propagatesFlow(push(SummaryComponent::withoutContent(content), input), output, true) and
- filter = getFilterFromWithoutContentStep(content) and
- not isNonLocal(input.head()) and
- not isNonLocal(output.head()) and
- input != output
- )
-}
-
-/**
- * Gets a content filter to use for a `WithContent[content]` step, or has no result if
- * the step cannot be handled by type-tracking.
- *
- * `WithContent` is often used to perform strong updates on individual collection elements (or rather
- * to preserve those that didn't get updated). But for type-tracking this is rarely beneficial and quite expensive.
- * However, `WithContent` can be quite useful for restricting the type of an object, and in these cases we translate it to a filter.
- */
-private ContentFilter getFilterFromWithContentStep(DataFlow::ContentSet content) {
- (
- content.isAnyElement()
- or
- content.isElementLowerBound(_)
- or
- content.isElementLowerBoundOrUnknown(_)
- or
- content.isElementOfType(_)
- or
- content.isElementOfTypeOrUnknown(_)
- or
- content.isSingleton(any(DataFlow::Content::ElementContent c))
- ) and
- result = MkElementFilter()
-}
-
-pragma[nomagic]
-private predicate hasWithContentSummary(
- SummarizedCallable callable, ContentFilter filter, SummaryComponentStack input,
- SummaryComponentStack output
-) {
- exists(DataFlow::ContentSet content |
- callable.propagatesFlow(push(SummaryComponent::withContent(content), input), output, true) and
- filter = getFilterFromWithContentStep(content) and
- not isNonLocal(input.head()) and
- not isNonLocal(output.head()) and
- input != output
- )
-}
-
/**
* Holds if the given component can't be evaluated by `evaluateSummaryComponentStackLocal`.
*/
@@ -514,101 +347,95 @@ predicate isNonLocal(SummaryComponent component) {
component = SC::withContent(_)
}
-/**
- * Gets a data flow node corresponding an argument or return value of `call`,
- * as specified by `component`.
- */
-bindingset[call, component]
-private DataFlow::Node evaluateSummaryComponentLocal(
- DataFlow::CallNode call, SummaryComponent component
-) {
- exists(DataFlowDispatch::ParameterPosition pos |
- component = SummaryComponent::argument(pos) and
- argumentPositionMatch(call.asExpr(), result, pos)
- )
- or
- component = SummaryComponent::return() and
- result = call
-}
+private import internal.SummaryTypeTracker as SummaryTypeTracker
+private import codeql.ruby.dataflow.FlowSummary as FlowSummary
-/**
- * Holds if `callable` is relevant for type-tracking and we therefore want `stack` to
- * be evaluated locally at its call sites.
- */
-pragma[nomagic]
-private predicate dependsOnSummaryComponentStack(
- SummarizedCallable callable, SummaryComponentStack stack
-) {
- exists(callable.getACallSimple()) and
- (
- callable.propagatesFlow(stack, _, true)
- or
- callable.propagatesFlow(_, stack, true)
- or
- // include store summaries as they may skip an initial step at the input
- hasStoreSummary(callable, _, stack, _)
- )
- or
- dependsOnSummaryComponentStackCons(callable, _, stack)
-}
+private module SummaryTypeTrackerInput implements SummaryTypeTracker::Input {
+ // Dataflow nodes
+ class Node = DataFlow::Node;
-pragma[nomagic]
-private predicate dependsOnSummaryComponentStackCons(
- SummarizedCallable callable, SummaryComponent head, SummaryComponentStack tail
-) {
- dependsOnSummaryComponentStack(callable, SCS::push(head, tail))
-}
+ // Content
+ class TypeTrackerContent = DataFlowPublic::ContentSet;
-pragma[nomagic]
-private predicate dependsOnSummaryComponentStackConsLocal(
- SummarizedCallable callable, SummaryComponent head, SummaryComponentStack tail
-) {
- dependsOnSummaryComponentStackCons(callable, head, tail) and
- not isNonLocal(head)
-}
+ class TypeTrackerContentFilter = ContentFilter;
-pragma[nomagic]
-private predicate dependsOnSummaryComponentStackLeaf(
- SummarizedCallable callable, SummaryComponent leaf
-) {
- dependsOnSummaryComponentStack(callable, SCS::singleton(leaf))
-}
+ TypeTrackerContentFilter getFilterFromWithoutContentStep(TypeTrackerContent content) {
+ (
+ content.isAnyElement()
+ or
+ content.isElementLowerBoundOrUnknown(_)
+ or
+ content.isElementOfTypeOrUnknown(_)
+ or
+ content.isSingleton(any(DataFlow::Content::UnknownElementContent c))
+ ) and
+ result = MkElementFilter()
+ }
-/**
- * Gets a data flow node corresponding to the local input or output of `call`
- * identified by `stack`, if possible.
- */
-pragma[nomagic]
-private DataFlow::Node evaluateSummaryComponentStackLocal(
- SummarizedCallable callable, DataFlow::CallNode call, SummaryComponentStack stack
-) {
- exists(SummaryComponent component |
- dependsOnSummaryComponentStackLeaf(callable, component) and
- stack = SCS::singleton(component) and
- call.asExpr().getExpr() = callable.getACallSimple() and
- result = evaluateSummaryComponentLocal(call, component)
- )
- or
- exists(DataFlow::Node prev, SummaryComponent head, SummaryComponentStack tail |
- prev = evaluateSummaryComponentStackLocal(callable, call, tail) and
- dependsOnSummaryComponentStackConsLocal(callable, pragma[only_bind_into](head),
- pragma[only_bind_out](tail)) and
- stack = SCS::push(pragma[only_bind_out](head), pragma[only_bind_out](tail))
- |
+ TypeTrackerContentFilter getFilterFromWithContentStep(TypeTrackerContent content) {
+ (
+ content.isAnyElement()
+ or
+ content.isElementLowerBound(_)
+ or
+ content.isElementLowerBoundOrUnknown(_)
+ or
+ content.isElementOfType(_)
+ or
+ content.isElementOfTypeOrUnknown(_)
+ or
+ content.isSingleton(any(DataFlow::Content::ElementContent c))
+ ) and
+ result = MkElementFilter()
+ }
+
+ // Summaries and their stacks
+ class SummaryComponent = FlowSummary::SummaryComponent;
+
+ class SummaryComponentStack = FlowSummary::SummaryComponentStack;
+
+ predicate singleton = FlowSummary::SummaryComponentStack::singleton/1;
+
+ predicate push = FlowSummary::SummaryComponentStack::push/2;
+
+ // Relating content to summaries
+ predicate content = FlowSummary::SummaryComponent::content/1;
+
+ predicate withoutContent = FlowSummary::SummaryComponent::withoutContent/1;
+
+ predicate withContent = FlowSummary::SummaryComponent::withContent/1;
+
+ predicate return = FlowSummary::SummaryComponent::return/0;
+
+ // Callables
+ class SummarizedCallable = FlowSummary::SummarizedCallable;
+
+ // Relating nodes to summaries
+ Node argumentOf(Node call, SummaryComponent arg) {
+ exists(DataFlowDispatch::ParameterPosition pos |
+ arg = SummaryComponent::argument(pos) and
+ argumentPositionMatch(call.asExpr(), result, pos)
+ )
+ }
+
+ Node parameterOf(Node callable, SummaryComponent param) {
exists(DataFlowDispatch::ArgumentPosition apos, DataFlowDispatch::ParameterPosition ppos |
- head = SummaryComponent::parameter(apos) and
+ param = SummaryComponent::parameter(apos) and
DataFlowDispatch::parameterMatch(ppos, apos) and
- result.(DataFlowPrivate::ParameterNodeImpl).isSourceParameterOf(prev.asExpr().getExpr(), ppos)
+ result
+ .(DataFlowPrivate::ParameterNodeImpl)
+ .isSourceParameterOf(callable.asExpr().getExpr(), ppos)
)
- or
- head = SummaryComponent::return() and
+ }
+
+ Node returnOf(Node callable, SummaryComponent return) {
+ return = SummaryComponent::return() and
result.(DataFlowPrivate::ReturnNode).(DataFlowPrivate::NodeImpl).getCfgScope() =
- prev.asExpr().getExpr()
- or
- exists(DataFlow::ContentSet content |
- head = SummaryComponent::withoutContent(content) and
- not exists(getFilterFromWithoutContentStep(content)) and
- result = prev
- )
- )
+ callable.asExpr().getExpr()
+ }
+
+ // Relating callables to nodes
+ Node callTo(SummarizedCallable callable) { result.asExpr().getExpr() = callable.getACallSimple() }
}
+
+private module TypeTrackerSummaryFlow = SummaryTypeTracker::SummaryFlow;
diff --git a/ruby/ql/lib/codeql/ruby/typetracking/internal/SummaryTypeTracker.qll b/ruby/ql/lib/codeql/ruby/typetracking/internal/SummaryTypeTracker.qll
new file mode 100644
index 00000000000..9c6f841651d
--- /dev/null
+++ b/ruby/ql/lib/codeql/ruby/typetracking/internal/SummaryTypeTracker.qll
@@ -0,0 +1,391 @@
+/**
+ * Provides the implementation of type tracking steps through flow summaries.
+ * To use this, you must implement the `Input` signature. You can then use the predicates in the `Output`
+ * signature to implement the predicates of the same names inside `TypeTrackerSpecific.qll`.
+ */
+
+/** The classes and predicates needed to generate type-tracking steps from summaries. */
+signature module Input {
+ // Dataflow nodes
+ class Node;
+
+ // Content
+ class TypeTrackerContent;
+
+ class TypeTrackerContentFilter;
+
+ // Relating content and filters
+ /**
+ * Gets a content filter to use for a `WithoutContent[content]` step, (data is not allowed to be stored in `content`)
+ * or has no result if
+ * the step should be treated as ordinary flow.
+ *
+ * `WithoutContent` is often used to perform strong updates on individual collection elements, but for
+ * type-tracking this is rarely beneficial and quite expensive. However, `WithoutContent` can be quite useful
+ * for restricting the type of an object, and in these cases we translate it to a filter.
+ */
+ TypeTrackerContentFilter getFilterFromWithoutContentStep(TypeTrackerContent content);
+
+ /**
+ * Gets a content filter to use for a `WithContent[content]` step, (data must be stored in `content`)
+ * or has no result if
+ * the step cannot be handled by type-tracking.
+ *
+ * `WithContent` is often used to perform strong updates on individual collection elements (or rather
+ * to preserve those that didn't get updated). But for type-tracking this is rarely beneficial and quite expensive.
+ * However, `WithContent` can be quite useful for restricting the type of an object, and in these cases we translate it to a filter.
+ */
+ TypeTrackerContentFilter getFilterFromWithContentStep(TypeTrackerContent content);
+
+ // Summaries and their stacks
+ class SummaryComponent;
+
+ class SummaryComponentStack {
+ SummaryComponent head();
+ }
+
+ /** Gets a singleton stack containing `component`. */
+ SummaryComponentStack singleton(SummaryComponent component);
+
+ /**
+ * Gets the stack obtained by pushing `head` onto `tail`.
+ */
+ SummaryComponentStack push(SummaryComponent head, SummaryComponentStack tail);
+
+ /** Gets a singleton stack representing a return. */
+ SummaryComponent return();
+
+ // Relating content to summaries
+ /** Gets a summary component for content `c`. */
+ SummaryComponent content(TypeTrackerContent contents);
+
+ /** Gets a summary component where data is not allowed to be stored in `contents`. */
+ SummaryComponent withoutContent(TypeTrackerContent contents);
+
+ /** Gets a summary component where data must be stored in `contents`. */
+ SummaryComponent withContent(TypeTrackerContent contents);
+
+ // Callables
+ class SummarizedCallable {
+ predicate propagatesFlow(
+ SummaryComponentStack input, SummaryComponentStack output, boolean preservesValue
+ );
+ }
+
+ // Relating nodes to summaries
+ /** Gets a dataflow node respresenting the argument of `call` indicated by `arg`. */
+ Node argumentOf(Node call, SummaryComponent arg);
+
+ /** Gets a dataflow node respresenting the parameter of `callable` indicated by `param`. */
+ Node parameterOf(Node callable, SummaryComponent param);
+
+ /** Gets a dataflow node respresenting the return of `callable` indicated by `return`. */
+ Node returnOf(Node callable, SummaryComponent return);
+
+ // Relating callables to nodes
+ /** Gets a dataflow node respresenting a call to `callable`. */
+ Node callTo(SummarizedCallable callable);
+}
+
+/**
+ * The predicates provided by a summary type tracker.
+ * These are meant to be used in `TypeTrackerSpecific.qll`
+ * inside the predicates of the same names.
+ */
+signature module Output {
+ /**
+ * Holds if there is a level step from `nodeFrom` to `nodeTo`, which does not depend on the call graph.
+ */
+ predicate levelStepNoCall(I::Node nodeFrom, I::Node nodeTo);
+
+ /**
+ * Holds if `nodeTo` is the result of accessing the `content` content of `nodeFrom`.
+ */
+ predicate basicLoadStep(I::Node nodeFrom, I::Node nodeTo, I::TypeTrackerContent content);
+
+ /**
+ * Holds if `nodeFrom` is being written to the `content` content of the object in `nodeTo`.
+ */
+ predicate basicStoreStep(I::Node nodeFrom, I::Node nodeTo, I::TypeTrackerContent content);
+
+ /**
+ * Holds if the `loadContent` of `nodeFrom` is stored in the `storeContent` of `nodeTo`.
+ */
+ predicate basicLoadStoreStep(
+ I::Node nodeFrom, I::Node nodeTo, I::TypeTrackerContent loadContent,
+ I::TypeTrackerContent storeContent
+ );
+
+ /**
+ * Holds if type-tracking should step from `nodeFrom` to `nodeTo` but block flow of contents matched by `filter` through here.
+ */
+ predicate basicWithoutContentStep(
+ I::Node nodeFrom, I::Node nodeTo, I::TypeTrackerContentFilter filter
+ );
+
+ /**
+ * Holds if type-tracking should step from `nodeFrom` to `nodeTo` if inside a content matched by `filter`.
+ */
+ predicate basicWithContentStep(
+ I::Node nodeFrom, I::Node nodeTo, I::TypeTrackerContentFilter filter
+ );
+}
+
+/**
+ * Implementation of the summary type tracker, that is type tracking through flow summaries.
+ */
+module SummaryFlow implements Output {
+ pragma[nomagic]
+ private predicate isNonLocal(I::SummaryComponent component) {
+ component = I::content(_)
+ or
+ component = I::withContent(_)
+ }
+
+ pragma[nomagic]
+ private predicate hasLoadSummary(
+ I::SummarizedCallable callable, I::TypeTrackerContent contents, I::SummaryComponentStack input,
+ I::SummaryComponentStack output
+ ) {
+ callable.propagatesFlow(I::push(I::content(contents), input), output, true) and
+ not isNonLocal(input.head()) and
+ not isNonLocal(output.head())
+ }
+
+ pragma[nomagic]
+ private predicate hasStoreSummary(
+ I::SummarizedCallable callable, I::TypeTrackerContent contents, I::SummaryComponentStack input,
+ I::SummaryComponentStack output
+ ) {
+ not isNonLocal(input.head()) and
+ not isNonLocal(output.head()) and
+ (
+ callable.propagatesFlow(input, I::push(I::content(contents), output), true)
+ or
+ // Allow the input to start with an arbitrary WithoutContent[X].
+ // Since type-tracking only tracks one content deep, and we're about to store into another content,
+ // we're already preventing the input from being in a content.
+ callable
+ .propagatesFlow(I::push(I::withoutContent(_), input),
+ I::push(I::content(contents), output), true)
+ )
+ }
+
+ pragma[nomagic]
+ private predicate hasLoadStoreSummary(
+ I::SummarizedCallable callable, I::TypeTrackerContent loadContents,
+ I::TypeTrackerContent storeContents, I::SummaryComponentStack input,
+ I::SummaryComponentStack output
+ ) {
+ callable
+ .propagatesFlow(I::push(I::content(loadContents), input),
+ I::push(I::content(storeContents), output), true) and
+ not isNonLocal(input.head()) and
+ not isNonLocal(output.head())
+ }
+
+ pragma[nomagic]
+ private predicate hasWithoutContentSummary(
+ I::SummarizedCallable callable, I::TypeTrackerContentFilter filter,
+ I::SummaryComponentStack input, I::SummaryComponentStack output
+ ) {
+ exists(I::TypeTrackerContent content |
+ callable.propagatesFlow(I::push(I::withoutContent(content), input), output, true) and
+ filter = I::getFilterFromWithoutContentStep(content) and
+ not isNonLocal(input.head()) and
+ not isNonLocal(output.head()) and
+ input != output
+ )
+ }
+
+ pragma[nomagic]
+ private predicate hasWithContentSummary(
+ I::SummarizedCallable callable, I::TypeTrackerContentFilter filter,
+ I::SummaryComponentStack input, I::SummaryComponentStack output
+ ) {
+ exists(I::TypeTrackerContent content |
+ callable.propagatesFlow(I::push(I::withContent(content), input), output, true) and
+ filter = I::getFilterFromWithContentStep(content) and
+ not isNonLocal(input.head()) and
+ not isNonLocal(output.head()) and
+ input != output
+ )
+ }
+
+ private predicate componentLevelStep(I::SummaryComponent component) {
+ exists(I::TypeTrackerContent content |
+ component = I::withoutContent(content) and
+ not exists(I::getFilterFromWithoutContentStep(content))
+ )
+ }
+
+ /**
+ * Gets a data flow `I::Node` corresponding an argument or return value of `call`,
+ * as specified by `component`.
+ */
+ bindingset[call, component]
+ private I::Node evaluateSummaryComponentLocal(I::Node call, I::SummaryComponent component) {
+ result = I::argumentOf(call, component)
+ or
+ component = I::return() and
+ result = call
+ }
+
+ /**
+ * Holds if `callable` is relevant for type-tracking and we therefore want `stack` to
+ * be evaluated locally at its call sites.
+ */
+ pragma[nomagic]
+ private predicate dependsOnSummaryComponentStack(
+ I::SummarizedCallable callable, I::SummaryComponentStack stack
+ ) {
+ exists(I::callTo(callable)) and
+ (
+ callable.propagatesFlow(stack, _, true)
+ or
+ callable.propagatesFlow(_, stack, true)
+ or
+ // include store summaries as they may skip an initial step at the input
+ hasStoreSummary(callable, _, stack, _)
+ )
+ or
+ dependsOnSummaryComponentStackCons(callable, _, stack)
+ }
+
+ pragma[nomagic]
+ private predicate dependsOnSummaryComponentStackCons(
+ I::SummarizedCallable callable, I::SummaryComponent head, I::SummaryComponentStack tail
+ ) {
+ dependsOnSummaryComponentStack(callable, I::push(head, tail))
+ }
+
+ pragma[nomagic]
+ private predicate dependsOnSummaryComponentStackConsLocal(
+ I::SummarizedCallable callable, I::SummaryComponent head, I::SummaryComponentStack tail
+ ) {
+ dependsOnSummaryComponentStackCons(callable, head, tail) and
+ not isNonLocal(head)
+ }
+
+ pragma[nomagic]
+ private predicate dependsOnSummaryComponentStackLeaf(
+ I::SummarizedCallable callable, I::SummaryComponent leaf
+ ) {
+ dependsOnSummaryComponentStack(callable, I::singleton(leaf))
+ }
+
+ /**
+ * Gets a data flow I::Node corresponding to the local input or output of `call`
+ * identified by `stack`, if possible.
+ */
+ pragma[nomagic]
+ private I::Node evaluateSummaryComponentStackLocal(
+ I::SummarizedCallable callable, I::Node call, I::SummaryComponentStack stack
+ ) {
+ exists(I::SummaryComponent component |
+ dependsOnSummaryComponentStackLeaf(callable, component) and
+ stack = I::singleton(component) and
+ call = I::callTo(callable) and
+ result = evaluateSummaryComponentLocal(call, component)
+ )
+ or
+ exists(I::Node prev, I::SummaryComponent head, I::SummaryComponentStack tail |
+ prev = evaluateSummaryComponentStackLocal(callable, call, tail) and
+ dependsOnSummaryComponentStackConsLocal(callable, pragma[only_bind_into](head),
+ pragma[only_bind_out](tail)) and
+ stack = I::push(pragma[only_bind_out](head), pragma[only_bind_out](tail))
+ |
+ result = I::parameterOf(prev, head)
+ or
+ result = I::returnOf(prev, head)
+ or
+ componentLevelStep(head) and
+ result = prev
+ )
+ }
+
+ // Implement Output
+ predicate levelStepNoCall(I::Node nodeFrom, I::Node nodeTo) {
+ exists(
+ I::SummarizedCallable callable, I::Node call, I::SummaryComponentStack input,
+ I::SummaryComponentStack output
+ |
+ callable.propagatesFlow(input, output, true) and
+ call = I::callTo(callable) and
+ nodeFrom = evaluateSummaryComponentStackLocal(callable, call, input) and
+ nodeTo = evaluateSummaryComponentStackLocal(callable, call, output)
+ )
+ }
+
+ predicate basicLoadStep(I::Node nodeFrom, I::Node nodeTo, I::TypeTrackerContent content) {
+ exists(
+ I::SummarizedCallable callable, I::Node call, I::SummaryComponentStack input,
+ I::SummaryComponentStack output
+ |
+ hasLoadSummary(callable, content, pragma[only_bind_into](input),
+ pragma[only_bind_into](output)) and
+ call = I::callTo(callable) and
+ nodeFrom = evaluateSummaryComponentStackLocal(callable, call, input) and
+ nodeTo = evaluateSummaryComponentStackLocal(callable, call, output)
+ )
+ }
+
+ predicate basicStoreStep(I::Node nodeFrom, I::Node nodeTo, I::TypeTrackerContent content) {
+ exists(
+ I::SummarizedCallable callable, I::Node call, I::SummaryComponentStack input,
+ I::SummaryComponentStack output
+ |
+ hasStoreSummary(callable, content, pragma[only_bind_into](input),
+ pragma[only_bind_into](output)) and
+ call = I::callTo(callable) and
+ nodeFrom = evaluateSummaryComponentStackLocal(callable, call, input) and
+ nodeTo = evaluateSummaryComponentStackLocal(callable, call, output)
+ )
+ }
+
+ predicate basicLoadStoreStep(
+ I::Node nodeFrom, I::Node nodeTo, I::TypeTrackerContent loadContent,
+ I::TypeTrackerContent storeContent
+ ) {
+ exists(
+ I::SummarizedCallable callable, I::Node call, I::SummaryComponentStack input,
+ I::SummaryComponentStack output
+ |
+ hasLoadStoreSummary(callable, loadContent, storeContent, pragma[only_bind_into](input),
+ pragma[only_bind_into](output)) and
+ call = I::callTo(callable) and
+ nodeFrom = evaluateSummaryComponentStackLocal(callable, call, input) and
+ nodeTo = evaluateSummaryComponentStackLocal(callable, call, output)
+ )
+ }
+
+ predicate basicWithoutContentStep(
+ I::Node nodeFrom, I::Node nodeTo, I::TypeTrackerContentFilter filter
+ ) {
+ exists(
+ I::SummarizedCallable callable, I::Node call, I::SummaryComponentStack input,
+ I::SummaryComponentStack output
+ |
+ hasWithoutContentSummary(callable, filter, pragma[only_bind_into](input),
+ pragma[only_bind_into](output)) and
+ call = I::callTo(callable) and
+ nodeFrom = evaluateSummaryComponentStackLocal(callable, call, input) and
+ nodeTo = evaluateSummaryComponentStackLocal(callable, call, output)
+ )
+ }
+
+ predicate basicWithContentStep(
+ I::Node nodeFrom, I::Node nodeTo, I::TypeTrackerContentFilter filter
+ ) {
+ exists(
+ I::SummarizedCallable callable, I::Node call, I::SummaryComponentStack input,
+ I::SummaryComponentStack output
+ |
+ hasWithContentSummary(callable, filter, pragma[only_bind_into](input),
+ pragma[only_bind_into](output)) and
+ call = I::callTo(callable) and
+ nodeFrom = evaluateSummaryComponentStackLocal(callable, call, input) and
+ nodeTo = evaluateSummaryComponentStackLocal(callable, call, output)
+ )
+ }
+}