diff --git a/config/identical-files.json b/config/identical-files.json index 3c16c953129..ceed02ba5d6 100644 --- a/config/identical-files.json +++ b/config/identical-files.json @@ -523,6 +523,10 @@ "python/ql/lib/semmle/python/dataflow/new/internal/TypeTracker.qll", "ruby/ql/lib/codeql/ruby/typetracking/TypeTracker.qll" ], + "SummaryTypeTracker": [ + "python/ql/lib/semmle/python/dataflow/new/internal/SummaryTypeTracker.qll", + "ruby/ql/lib/codeql/ruby/typetracking/internal/SummaryTypeTracker.qll" + ], "AccessPathSyntax": [ "csharp/ql/lib/semmle/code/csharp/dataflow/internal/AccessPathSyntax.qll", "go/ql/lib/semmle/go/dataflow/internal/AccessPathSyntax.qll", diff --git a/python/ql/lib/change-notes/2023-05-30-typetracking-via-flow-summaries.md b/python/ql/lib/change-notes/2023-05-30-typetracking-via-flow-summaries.md new file mode 100644 index 00000000000..11c01629987 --- /dev/null +++ b/python/ql/lib/change-notes/2023-05-30-typetracking-via-flow-summaries.md @@ -0,0 +1,4 @@ +--- +category: minorAnalysis +--- +* Type tracking is now aware of flow summaries. This leads to a richer API graph, and may lead to more results in some queries. diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll index 3084983a605..ba451a21fdf 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll @@ -251,6 +251,9 @@ abstract class LibraryCallable extends string { /** Gets a call to this library callable. */ abstract CallCfgNode getACall(); + /** Same as `getACall` but without referring to the call graph or API graph. */ + CallCfgNode getACallSimple() { none() } + /** Gets a data-flow node, where this library callable is used as a call-back. */ abstract ArgumentNode getACallback(); } diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/SummaryTypeTracker.qll b/python/ql/lib/semmle/python/dataflow/new/internal/SummaryTypeTracker.qll new file mode 100644 index 00000000000..9c6f841651d --- /dev/null +++ b/python/ql/lib/semmle/python/dataflow/new/internal/SummaryTypeTracker.qll @@ -0,0 +1,391 @@ +/** + * Provides the implementation of type tracking steps through flow summaries. + * To use this, you must implement the `Input` signature. You can then use the predicates in the `Output` + * signature to implement the predicates of the same names inside `TypeTrackerSpecific.qll`. + */ + +/** The classes and predicates needed to generate type-tracking steps from summaries. */ +signature module Input { + // Dataflow nodes + class Node; + + // Content + class TypeTrackerContent; + + class TypeTrackerContentFilter; + + // Relating content and filters + /** + * Gets a content filter to use for a `WithoutContent[content]` step, (data is not allowed to be stored in `content`) + * or has no result if + * the step should be treated as ordinary flow. + * + * `WithoutContent` is often used to perform strong updates on individual collection elements, but for + * type-tracking this is rarely beneficial and quite expensive. However, `WithoutContent` can be quite useful + * for restricting the type of an object, and in these cases we translate it to a filter. + */ + TypeTrackerContentFilter getFilterFromWithoutContentStep(TypeTrackerContent content); + + /** + * Gets a content filter to use for a `WithContent[content]` step, (data must be stored in `content`) + * or has no result if + * the step cannot be handled by type-tracking. + * + * `WithContent` is often used to perform strong updates on individual collection elements (or rather + * to preserve those that didn't get updated). But for type-tracking this is rarely beneficial and quite expensive. + * However, `WithContent` can be quite useful for restricting the type of an object, and in these cases we translate it to a filter. + */ + TypeTrackerContentFilter getFilterFromWithContentStep(TypeTrackerContent content); + + // Summaries and their stacks + class SummaryComponent; + + class SummaryComponentStack { + SummaryComponent head(); + } + + /** Gets a singleton stack containing `component`. */ + SummaryComponentStack singleton(SummaryComponent component); + + /** + * Gets the stack obtained by pushing `head` onto `tail`. + */ + SummaryComponentStack push(SummaryComponent head, SummaryComponentStack tail); + + /** Gets a singleton stack representing a return. */ + SummaryComponent return(); + + // Relating content to summaries + /** Gets a summary component for content `c`. */ + SummaryComponent content(TypeTrackerContent contents); + + /** Gets a summary component where data is not allowed to be stored in `contents`. */ + SummaryComponent withoutContent(TypeTrackerContent contents); + + /** Gets a summary component where data must be stored in `contents`. */ + SummaryComponent withContent(TypeTrackerContent contents); + + // Callables + class SummarizedCallable { + predicate propagatesFlow( + SummaryComponentStack input, SummaryComponentStack output, boolean preservesValue + ); + } + + // Relating nodes to summaries + /** Gets a dataflow node respresenting the argument of `call` indicated by `arg`. */ + Node argumentOf(Node call, SummaryComponent arg); + + /** Gets a dataflow node respresenting the parameter of `callable` indicated by `param`. */ + Node parameterOf(Node callable, SummaryComponent param); + + /** Gets a dataflow node respresenting the return of `callable` indicated by `return`. */ + Node returnOf(Node callable, SummaryComponent return); + + // Relating callables to nodes + /** Gets a dataflow node respresenting a call to `callable`. */ + Node callTo(SummarizedCallable callable); +} + +/** + * The predicates provided by a summary type tracker. + * These are meant to be used in `TypeTrackerSpecific.qll` + * inside the predicates of the same names. + */ +signature module Output { + /** + * Holds if there is a level step from `nodeFrom` to `nodeTo`, which does not depend on the call graph. + */ + predicate levelStepNoCall(I::Node nodeFrom, I::Node nodeTo); + + /** + * Holds if `nodeTo` is the result of accessing the `content` content of `nodeFrom`. + */ + predicate basicLoadStep(I::Node nodeFrom, I::Node nodeTo, I::TypeTrackerContent content); + + /** + * Holds if `nodeFrom` is being written to the `content` content of the object in `nodeTo`. + */ + predicate basicStoreStep(I::Node nodeFrom, I::Node nodeTo, I::TypeTrackerContent content); + + /** + * Holds if the `loadContent` of `nodeFrom` is stored in the `storeContent` of `nodeTo`. + */ + predicate basicLoadStoreStep( + I::Node nodeFrom, I::Node nodeTo, I::TypeTrackerContent loadContent, + I::TypeTrackerContent storeContent + ); + + /** + * Holds if type-tracking should step from `nodeFrom` to `nodeTo` but block flow of contents matched by `filter` through here. + */ + predicate basicWithoutContentStep( + I::Node nodeFrom, I::Node nodeTo, I::TypeTrackerContentFilter filter + ); + + /** + * Holds if type-tracking should step from `nodeFrom` to `nodeTo` if inside a content matched by `filter`. + */ + predicate basicWithContentStep( + I::Node nodeFrom, I::Node nodeTo, I::TypeTrackerContentFilter filter + ); +} + +/** + * Implementation of the summary type tracker, that is type tracking through flow summaries. + */ +module SummaryFlow implements Output { + pragma[nomagic] + private predicate isNonLocal(I::SummaryComponent component) { + component = I::content(_) + or + component = I::withContent(_) + } + + pragma[nomagic] + private predicate hasLoadSummary( + I::SummarizedCallable callable, I::TypeTrackerContent contents, I::SummaryComponentStack input, + I::SummaryComponentStack output + ) { + callable.propagatesFlow(I::push(I::content(contents), input), output, true) and + not isNonLocal(input.head()) and + not isNonLocal(output.head()) + } + + pragma[nomagic] + private predicate hasStoreSummary( + I::SummarizedCallable callable, I::TypeTrackerContent contents, I::SummaryComponentStack input, + I::SummaryComponentStack output + ) { + not isNonLocal(input.head()) and + not isNonLocal(output.head()) and + ( + callable.propagatesFlow(input, I::push(I::content(contents), output), true) + or + // Allow the input to start with an arbitrary WithoutContent[X]. + // Since type-tracking only tracks one content deep, and we're about to store into another content, + // we're already preventing the input from being in a content. + callable + .propagatesFlow(I::push(I::withoutContent(_), input), + I::push(I::content(contents), output), true) + ) + } + + pragma[nomagic] + private predicate hasLoadStoreSummary( + I::SummarizedCallable callable, I::TypeTrackerContent loadContents, + I::TypeTrackerContent storeContents, I::SummaryComponentStack input, + I::SummaryComponentStack output + ) { + callable + .propagatesFlow(I::push(I::content(loadContents), input), + I::push(I::content(storeContents), output), true) and + not isNonLocal(input.head()) and + not isNonLocal(output.head()) + } + + pragma[nomagic] + private predicate hasWithoutContentSummary( + I::SummarizedCallable callable, I::TypeTrackerContentFilter filter, + I::SummaryComponentStack input, I::SummaryComponentStack output + ) { + exists(I::TypeTrackerContent content | + callable.propagatesFlow(I::push(I::withoutContent(content), input), output, true) and + filter = I::getFilterFromWithoutContentStep(content) and + not isNonLocal(input.head()) and + not isNonLocal(output.head()) and + input != output + ) + } + + pragma[nomagic] + private predicate hasWithContentSummary( + I::SummarizedCallable callable, I::TypeTrackerContentFilter filter, + I::SummaryComponentStack input, I::SummaryComponentStack output + ) { + exists(I::TypeTrackerContent content | + callable.propagatesFlow(I::push(I::withContent(content), input), output, true) and + filter = I::getFilterFromWithContentStep(content) and + not isNonLocal(input.head()) and + not isNonLocal(output.head()) and + input != output + ) + } + + private predicate componentLevelStep(I::SummaryComponent component) { + exists(I::TypeTrackerContent content | + component = I::withoutContent(content) and + not exists(I::getFilterFromWithoutContentStep(content)) + ) + } + + /** + * Gets a data flow `I::Node` corresponding an argument or return value of `call`, + * as specified by `component`. + */ + bindingset[call, component] + private I::Node evaluateSummaryComponentLocal(I::Node call, I::SummaryComponent component) { + result = I::argumentOf(call, component) + or + component = I::return() and + result = call + } + + /** + * Holds if `callable` is relevant for type-tracking and we therefore want `stack` to + * be evaluated locally at its call sites. + */ + pragma[nomagic] + private predicate dependsOnSummaryComponentStack( + I::SummarizedCallable callable, I::SummaryComponentStack stack + ) { + exists(I::callTo(callable)) and + ( + callable.propagatesFlow(stack, _, true) + or + callable.propagatesFlow(_, stack, true) + or + // include store summaries as they may skip an initial step at the input + hasStoreSummary(callable, _, stack, _) + ) + or + dependsOnSummaryComponentStackCons(callable, _, stack) + } + + pragma[nomagic] + private predicate dependsOnSummaryComponentStackCons( + I::SummarizedCallable callable, I::SummaryComponent head, I::SummaryComponentStack tail + ) { + dependsOnSummaryComponentStack(callable, I::push(head, tail)) + } + + pragma[nomagic] + private predicate dependsOnSummaryComponentStackConsLocal( + I::SummarizedCallable callable, I::SummaryComponent head, I::SummaryComponentStack tail + ) { + dependsOnSummaryComponentStackCons(callable, head, tail) and + not isNonLocal(head) + } + + pragma[nomagic] + private predicate dependsOnSummaryComponentStackLeaf( + I::SummarizedCallable callable, I::SummaryComponent leaf + ) { + dependsOnSummaryComponentStack(callable, I::singleton(leaf)) + } + + /** + * Gets a data flow I::Node corresponding to the local input or output of `call` + * identified by `stack`, if possible. + */ + pragma[nomagic] + private I::Node evaluateSummaryComponentStackLocal( + I::SummarizedCallable callable, I::Node call, I::SummaryComponentStack stack + ) { + exists(I::SummaryComponent component | + dependsOnSummaryComponentStackLeaf(callable, component) and + stack = I::singleton(component) and + call = I::callTo(callable) and + result = evaluateSummaryComponentLocal(call, component) + ) + or + exists(I::Node prev, I::SummaryComponent head, I::SummaryComponentStack tail | + prev = evaluateSummaryComponentStackLocal(callable, call, tail) and + dependsOnSummaryComponentStackConsLocal(callable, pragma[only_bind_into](head), + pragma[only_bind_out](tail)) and + stack = I::push(pragma[only_bind_out](head), pragma[only_bind_out](tail)) + | + result = I::parameterOf(prev, head) + or + result = I::returnOf(prev, head) + or + componentLevelStep(head) and + result = prev + ) + } + + // Implement Output + predicate levelStepNoCall(I::Node nodeFrom, I::Node nodeTo) { + exists( + I::SummarizedCallable callable, I::Node call, I::SummaryComponentStack input, + I::SummaryComponentStack output + | + callable.propagatesFlow(input, output, true) and + call = I::callTo(callable) and + nodeFrom = evaluateSummaryComponentStackLocal(callable, call, input) and + nodeTo = evaluateSummaryComponentStackLocal(callable, call, output) + ) + } + + predicate basicLoadStep(I::Node nodeFrom, I::Node nodeTo, I::TypeTrackerContent content) { + exists( + I::SummarizedCallable callable, I::Node call, I::SummaryComponentStack input, + I::SummaryComponentStack output + | + hasLoadSummary(callable, content, pragma[only_bind_into](input), + pragma[only_bind_into](output)) and + call = I::callTo(callable) and + nodeFrom = evaluateSummaryComponentStackLocal(callable, call, input) and + nodeTo = evaluateSummaryComponentStackLocal(callable, call, output) + ) + } + + predicate basicStoreStep(I::Node nodeFrom, I::Node nodeTo, I::TypeTrackerContent content) { + exists( + I::SummarizedCallable callable, I::Node call, I::SummaryComponentStack input, + I::SummaryComponentStack output + | + hasStoreSummary(callable, content, pragma[only_bind_into](input), + pragma[only_bind_into](output)) and + call = I::callTo(callable) and + nodeFrom = evaluateSummaryComponentStackLocal(callable, call, input) and + nodeTo = evaluateSummaryComponentStackLocal(callable, call, output) + ) + } + + predicate basicLoadStoreStep( + I::Node nodeFrom, I::Node nodeTo, I::TypeTrackerContent loadContent, + I::TypeTrackerContent storeContent + ) { + exists( + I::SummarizedCallable callable, I::Node call, I::SummaryComponentStack input, + I::SummaryComponentStack output + | + hasLoadStoreSummary(callable, loadContent, storeContent, pragma[only_bind_into](input), + pragma[only_bind_into](output)) and + call = I::callTo(callable) and + nodeFrom = evaluateSummaryComponentStackLocal(callable, call, input) and + nodeTo = evaluateSummaryComponentStackLocal(callable, call, output) + ) + } + + predicate basicWithoutContentStep( + I::Node nodeFrom, I::Node nodeTo, I::TypeTrackerContentFilter filter + ) { + exists( + I::SummarizedCallable callable, I::Node call, I::SummaryComponentStack input, + I::SummaryComponentStack output + | + hasWithoutContentSummary(callable, filter, pragma[only_bind_into](input), + pragma[only_bind_into](output)) and + call = I::callTo(callable) and + nodeFrom = evaluateSummaryComponentStackLocal(callable, call, input) and + nodeTo = evaluateSummaryComponentStackLocal(callable, call, output) + ) + } + + predicate basicWithContentStep( + I::Node nodeFrom, I::Node nodeTo, I::TypeTrackerContentFilter filter + ) { + exists( + I::SummarizedCallable callable, I::Node call, I::SummaryComponentStack input, + I::SummaryComponentStack output + | + hasWithContentSummary(callable, filter, pragma[only_bind_into](input), + pragma[only_bind_into](output)) and + call = I::callTo(callable) and + nodeFrom = evaluateSummaryComponentStackLocal(callable, call, input) and + nodeTo = evaluateSummaryComponentStackLocal(callable, call, output) + ) + } +} diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackerSpecific.qll b/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackerSpecific.qll index 9e05b7869c5..bac194aae9e 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackerSpecific.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackerSpecific.qll @@ -61,7 +61,9 @@ predicate capturedJumpStep(Node nodeFrom, Node nodeTo) { predicate levelStepCall(Node nodeFrom, Node nodeTo) { none() } /** Holds if there is a level step from `nodeFrom` to `nodeTo`, which does not depend on the call graph. */ -predicate levelStepNoCall(Node nodeFrom, Node nodeTo) { none() } +predicate levelStepNoCall(Node nodeFrom, Node nodeTo) { + TypeTrackerSummaryFlow::levelStepNoCall(nodeFrom, nodeTo) +} /** * Gets the name of a possible piece of content. For Python, this is currently only attribute names, @@ -108,6 +110,12 @@ predicate basicStoreStep(Node nodeFrom, Node nodeTo, string content) { nodeFrom = a.getValue() and nodeTo = a.getObject() ) + or + exists(DataFlowPublic::ContentSet contents | + contents.(DataFlowPublic::AttributeContent).getAttribute() = content + | + TypeTrackerSummaryFlow::basicStoreStep(nodeFrom, nodeTo, contents) + ) } /** @@ -119,13 +127,24 @@ predicate basicLoadStep(Node nodeFrom, Node nodeTo, string content) { nodeFrom = a.getObject() and nodeTo = a ) + or + exists(DataFlowPublic::ContentSet contents | + contents.(DataFlowPublic::AttributeContent).getAttribute() = content + | + TypeTrackerSummaryFlow::basicLoadStep(nodeFrom, nodeTo, contents) + ) } /** * Holds if the `loadContent` of `nodeFrom` is stored in the `storeContent` of `nodeTo`. */ predicate basicLoadStoreStep(Node nodeFrom, Node nodeTo, string loadContent, string storeContent) { - none() + exists(DataFlowPublic::ContentSet loadContents, DataFlowPublic::ContentSet storeContents | + loadContents.(DataFlowPublic::AttributeContent).getAttribute() = loadContent and + storeContents.(DataFlowPublic::AttributeContent).getAttribute() = storeContent + | + TypeTrackerSummaryFlow::basicLoadStoreStep(nodeFrom, nodeTo, loadContents, storeContents) + ) } /** @@ -144,3 +163,93 @@ predicate basicWithContentStep(Node nodeFrom, Node nodeTo, ContentFilter filter) class Boolean extends boolean { Boolean() { this = true or this = false } } + +private import SummaryTypeTracker as SummaryTypeTracker +private import semmle.python.dataflow.new.FlowSummary as FlowSummary +private import semmle.python.dataflow.new.internal.DataFlowDispatch as DataFlowDispatch + +pragma[noinline] +private predicate argumentPositionMatch( + DataFlowPublic::CallCfgNode call, DataFlowPublic::Node arg, + DataFlowDispatch::ParameterPosition ppos +) { + exists(DataFlowDispatch::ArgumentPosition apos | + DataFlowDispatch::parameterMatch(ppos, apos) and + DataFlowDispatch::normalCallArg(call.getNode(), arg, apos) + ) +} + +private module SummaryTypeTrackerInput implements SummaryTypeTracker::Input { + // Dataflow nodes + class Node = DataFlowPublic::Node; + + // Content + class TypeTrackerContent = DataFlowPublic::ContentSet; + + class TypeTrackerContentFilter = ContentFilter; + + TypeTrackerContentFilter getFilterFromWithoutContentStep(TypeTrackerContent content) { none() } + + TypeTrackerContentFilter getFilterFromWithContentStep(TypeTrackerContent content) { none() } + + // Callables + class SummarizedCallable = FlowSummary::SummarizedCallable; + + // Summaries and their stacks + class SummaryComponent = FlowSummary::SummaryComponent; + + class SummaryComponentStack = FlowSummary::SummaryComponentStack; + + predicate singleton = FlowSummary::SummaryComponentStack::singleton/1; + + predicate push = FlowSummary::SummaryComponentStack::push/2; + + // Relating content to summaries + predicate content = FlowSummary::SummaryComponent::content/1; + + SummaryComponent withoutContent(TypeTrackerContent contents) { none() } + + SummaryComponent withContent(TypeTrackerContent contents) { none() } + + predicate return = FlowSummary::SummaryComponent::return/0; + + // Relating nodes to summaries + Node argumentOf(Node call, SummaryComponent arg) { + exists(DataFlowDispatch::ParameterPosition pos | + arg = FlowSummary::SummaryComponent::argument(pos) and + argumentPositionMatch(call, result, pos) + ) + } + + Node parameterOf(Node callable, SummaryComponent param) { + exists( + DataFlowDispatch::ArgumentPosition apos, DataFlowDispatch::ParameterPosition ppos, Parameter p + | + param = FlowSummary::SummaryComponent::parameter(apos) and + DataFlowDispatch::parameterMatch(ppos, apos) and + // pick the SsaNode rather than the CfgNode + result.asVar().getDefinition().(ParameterDefinition).getParameter() = p and + ( + exists(int i | ppos.isPositional(i) | + p = callable.getALocalSource().asExpr().(CallableExpr).getInnerScope().getArg(i) + ) + or + exists(string name | ppos.isKeyword(name) | + p = callable.getALocalSource().asExpr().(CallableExpr).getInnerScope().getArgByName(name) + ) + ) + ) + } + + Node returnOf(Node callable, SummaryComponent return) { + return = FlowSummary::SummaryComponent::return() and + // `result` should be the return value of a callable expression (lambda or function) referenced by `callable` + result.asCfgNode() = + callable.getALocalSource().asExpr().(CallableExpr).getInnerScope().getAReturnValueFlowNode() + } + + // Relating callables to nodes + Node callTo(SummarizedCallable callable) { result = callable.getACallSimple() } +} + +private module TypeTrackerSummaryFlow = SummaryTypeTracker::SummaryFlow; diff --git a/python/ql/test/experimental/dataflow/summaries/TestSummaries.qll b/python/ql/test/experimental/dataflow/summaries/TestSummaries.qll index 971a653c469..cdd61420bbb 100644 --- a/python/ql/test/experimental/dataflow/summaries/TestSummaries.qll +++ b/python/ql/test/experimental/dataflow/summaries/TestSummaries.qll @@ -60,7 +60,7 @@ private class SummarizedCallableApplyLambda extends SummarizedCallable { } private class SummarizedCallableReversed extends SummarizedCallable { - SummarizedCallableReversed() { this = "reversed" } + SummarizedCallableReversed() { this = "list_reversed" } override DataFlow::CallCfgNode getACall() { result.getFunction().asCfgNode().(NameNode).getId() = this diff --git a/python/ql/test/experimental/dataflow/summaries/summaries.py b/python/ql/test/experimental/dataflow/summaries/summaries.py index 1532a5393d8..806b39f6dc8 100644 --- a/python/ql/test/experimental/dataflow/summaries/summaries.py +++ b/python/ql/test/experimental/dataflow/summaries/summaries.py @@ -66,3 +66,21 @@ SINK(tainted_list[0]) # $ flow="SOURCE, l:-1 -> tainted_list[0]" from json import loads as json_loads tainted_resultlist = json_loads(SOURCE) SINK(tainted_resultlist[0]) # $ flow="SOURCE, l:-1 -> tainted_resultlist[0]" + + +# Class methods are not handled right now + +class MyClass: + @staticmethod + def foo(x): + return x + + def bar(self, x): + return x + +through_staticmethod = apply_lambda(MyClass.foo, SOURCE) +through_staticmethod # $ MISSING: flow + +mc = MyClass() +through_method = apply_lambda(mc.bar, SOURCE) +through_method # $ MISSING: flow diff --git a/python/ql/test/experimental/dataflow/typetracking-summaries/TestSummaries.qll b/python/ql/test/experimental/dataflow/typetracking-summaries/TestSummaries.qll new file mode 100644 index 00000000000..8d626b332a3 --- /dev/null +++ b/python/ql/test/experimental/dataflow/typetracking-summaries/TestSummaries.qll @@ -0,0 +1,189 @@ +private import python +private import semmle.python.dataflow.new.FlowSummary +private import semmle.python.ApiGraphs + +/** + * This module ensures that the `callStep` predicate in + * our type tracker implementation does not refer to the + * `getACall` predicate on `SummarizedCallable`. + */ +module RecursionGuard { + private import semmle.python.dataflow.new.internal.TypeTrackerSpecific as TT + + private class RecursionGuard extends SummarizedCallable { + RecursionGuard() { this = "TypeTrackingSummariesRecursionGuard" } + + override DataFlow::CallCfgNode getACall() { + result.getFunction().asCfgNode().(NameNode).getId() = this and + (TT::callStep(_, _) implies any()) + } + + override DataFlow::CallCfgNode getACallSimple() { none() } + + override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this } + } + + predicate test(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) { + TT::levelStepNoCall(nodeFrom, nodeTo) + } +} + +private class SummarizedCallableIdentity extends SummarizedCallable { + SummarizedCallableIdentity() { this = "TTS_identity" } + + override DataFlow::CallCfgNode getACall() { none() } + + override DataFlow::CallCfgNode getACallSimple() { + result.getFunction().asCfgNode().(NameNode).getId() = this + } + + override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this } + + override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + input = "Argument[0]" and + output = "ReturnValue" and + preservesValue = true + } +} + +// For lambda flow to work, implement lambdaCall and lambdaCreation +private class SummarizedCallableApplyLambda extends SummarizedCallable { + SummarizedCallableApplyLambda() { this = "TTS_apply_lambda" } + + override DataFlow::CallCfgNode getACall() { none() } + + override DataFlow::CallCfgNode getACallSimple() { + result.getFunction().asCfgNode().(NameNode).getId() = this + } + + override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this } + + override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + input = "Argument[1]" and + output = "Argument[0].Parameter[0]" and + preservesValue = true + or + input = "Argument[0].ReturnValue" and + output = "ReturnValue" and + preservesValue = true + } +} + +private class SummarizedCallableReversed extends SummarizedCallable { + SummarizedCallableReversed() { this = "TTS_reversed" } + + override DataFlow::CallCfgNode getACall() { none() } + + override DataFlow::CallCfgNode getACallSimple() { + result.getFunction().asCfgNode().(NameNode).getId() = this + } + + override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this } + + override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + input = "Argument[0].ListElement" and + output = "ReturnValue.ListElement" and + preservesValue = true + } +} + +private class SummarizedCallableMap extends SummarizedCallable { + SummarizedCallableMap() { this = "TTS_list_map" } + + override DataFlow::CallCfgNode getACall() { none() } + + override DataFlow::CallCfgNode getACallSimple() { + result.getFunction().asCfgNode().(NameNode).getId() = this + } + + override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this } + + override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + input = "Argument[1].ListElement" and + output = "Argument[0].Parameter[0]" and + preservesValue = true + or + input = "Argument[0].ReturnValue" and + output = "ReturnValue.ListElement" and + preservesValue = true + } +} + +private class SummarizedCallableAppend extends SummarizedCallable { + SummarizedCallableAppend() { this = "TTS_append_to_list" } + + override DataFlow::CallCfgNode getACall() { none() } + + override DataFlow::CallCfgNode getACallSimple() { + result.getFunction().asCfgNode().(NameNode).getId() = this + } + + override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this } + + override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + input = "Argument[0]" and + output = "ReturnValue" and + preservesValue = false + or + input = "Argument[1]" and + output = "ReturnValue.ListElement" and + preservesValue = true + } +} + +private class SummarizedCallableJsonLoads extends SummarizedCallable { + SummarizedCallableJsonLoads() { this = "TTS_json.loads" } + + override DataFlow::CallCfgNode getACall() { + result = API::moduleImport("json").getMember("loads").getACall() + } + + override DataFlow::CallCfgNode getACallSimple() { none() } + + override DataFlow::ArgumentNode getACallback() { + result = API::moduleImport("json").getMember("loads").getAValueReachableFromSource() + } + + override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + input = "Argument[0]" and + output = "ReturnValue.ListElement" and + preservesValue = true + } +} + +// read and store +private class SummarizedCallableReadSecret extends SummarizedCallable { + SummarizedCallableReadSecret() { this = "TTS_read_secret" } + + override DataFlow::CallCfgNode getACall() { none() } + + override DataFlow::CallCfgNode getACallSimple() { + result.getFunction().asCfgNode().(NameNode).getId() = this + } + + override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this } + + override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + input = "Argument[0].Attribute[secret]" and + output = "ReturnValue" and + preservesValue = true + } +} + +private class SummarizedCallableSetSecret extends SummarizedCallable { + SummarizedCallableSetSecret() { this = "TTS_set_secret" } + + override DataFlow::CallCfgNode getACall() { none() } + + override DataFlow::CallCfgNode getACallSimple() { + result.getFunction().asCfgNode().(NameNode).getId() = this + } + + override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this } + + override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + input = "Argument[1]" and + output = "Argument[0].Attribute[secret]" and + preservesValue = true + } +} diff --git a/python/ql/test/experimental/dataflow/typetracking-summaries/summaries.py b/python/ql/test/experimental/dataflow/typetracking-summaries/summaries.py new file mode 100644 index 00000000000..f838032b063 --- /dev/null +++ b/python/ql/test/experimental/dataflow/typetracking-summaries/summaries.py @@ -0,0 +1,78 @@ +import sys +import os + +# Simple summary +tainted = TTS_identity(tracked) # $ tracked +tainted # $ tracked + +# Lambda summary +# I think the missing result is expected because type tracking +# is not allowed to flow back out of a call. +tainted_lambda = TTS_apply_lambda(lambda x: x, tracked) # $ tracked +tainted_lambda # $ MISSING: tracked + +# A lambda that directly introduces taint +bad_lambda = TTS_apply_lambda(lambda x: tracked, 1) # $ tracked +bad_lambda # $ tracked + +# A lambda that breaks the flow +untainted_lambda = TTS_apply_lambda(lambda x: 1, tracked) # $ tracked +untainted_lambda + +# Collection summaries +tainted_list = TTS_reversed([tracked]) # $ tracked +tl = tainted_list[0] +tl # $ MISSING: tracked + +# Complex summaries +def add_colon(x): + return x + ":" + +tainted_mapped = TTS_list_map(add_colon, [tracked]) # $ tracked +tm = tainted_mapped[0] +tm # $ MISSING: tracked + +def explicit_identity(x): + return x + +tainted_mapped_explicit = TTS_list_map(explicit_identity, [tracked]) # $ tracked +tainted_mapped_explicit[0] # $ MISSING: tracked + +tainted_mapped_summary = TTS_list_map(identity, [tracked]) # $ tracked +tms = tainted_mapped_summary[0] +tms # $ MISSING: tracked + +another_tainted_list = TTS_append_to_list([], tracked) # $ tracked +atl = another_tainted_list[0] +atl # $ MISSING: tracked + +# This will not work, as the call is not found by `getACallSimple`. +from json import loads as json_loads +tainted_resultlist = json_loads(tracked) # $ tracked +tr = tainted_resultlist[0] +tr # $ MISSING: tracked + +x.secret = tracked # $ tracked=secret tracked +r = TTS_read_secret(x) # $ tracked=secret tracked +r # $ tracked + +y # $ tracked=secret +TTS_set_secret(y, tracked) # $ tracked tracked=secret +y.secret # $ tracked tracked=secret + +# Class methods are not handled right now + +class MyClass: + @staticmethod + def foo(x): + return x + + def bar(self, x): + return x + +through_staticmethod = TTS_apply_lambda(MyClass.foo, tracked) # $ tracked +through_staticmethod # $ MISSING: tracked + +mc = MyClass() +through_method = TTS_apply_lambda(mc.bar, tracked) # $ tracked +through_method # $ MISSING: tracked diff --git a/python/ql/test/experimental/dataflow/typetracking-summaries/tracked.expected b/python/ql/test/experimental/dataflow/typetracking-summaries/tracked.expected new file mode 100644 index 00000000000..e69de29bb2d diff --git a/python/ql/test/experimental/dataflow/typetracking-summaries/tracked.ql b/python/ql/test/experimental/dataflow/typetracking-summaries/tracked.ql new file mode 100644 index 00000000000..e5bf62053a0 --- /dev/null +++ b/python/ql/test/experimental/dataflow/typetracking-summaries/tracked.ql @@ -0,0 +1,36 @@ +import python +import semmle.python.dataflow.new.DataFlow +import semmle.python.dataflow.new.TypeTracker +import TestUtilities.InlineExpectationsTest +import semmle.python.ApiGraphs +import TestSummaries + +// ----------------------------------------------------------------------------- +// tracked +// ----------------------------------------------------------------------------- +private DataFlow::TypeTrackingNode tracked(TypeTracker t) { + t.start() and + result.asCfgNode() = any(NameNode n | n.getId() = "tracked") + or + exists(TypeTracker t2 | result = tracked(t2).track(t2, t)) +} + +class TrackedTest extends InlineExpectationsTest { + TrackedTest() { this = "TrackedTest" } + + override string getARelevantTag() { result = "tracked" } + + override predicate hasActualResult(Location location, string element, string tag, string value) { + exists(DataFlow::Node e, TypeTracker t | + exists(e.getLocation().getFile().getRelativePath()) and + e.getLocation().getStartLine() > 0 and + tracked(t).flowsTo(e) and + // Module variables have no sensible location, and hence can't be annotated. + not e instanceof DataFlow::ModuleVariableNode and + tag = "tracked" and + location = e.getLocation() and + value = t.getAttr() and + element = e.toString() + ) + } +} diff --git a/ruby/ql/lib/codeql/ruby/typetracking/TypeTrackerSpecific.qll b/ruby/ql/lib/codeql/ruby/typetracking/TypeTrackerSpecific.qll index 7725d201fba..d14c182d127 100644 --- a/ruby/ql/lib/codeql/ruby/typetracking/TypeTrackerSpecific.qll +++ b/ruby/ql/lib/codeql/ruby/typetracking/TypeTrackerSpecific.qll @@ -112,15 +112,7 @@ predicate levelStepCall(Node nodeFrom, Node nodeTo) { /** Holds if there is a level step from `nodeFrom` to `nodeTo`, which does not depend on the call graph. */ pragma[nomagic] predicate levelStepNoCall(Node nodeFrom, Node nodeTo) { - exists( - SummarizedCallable callable, DataFlowPublic::CallNode call, SummaryComponentStack input, - SummaryComponentStack output - | - callable.propagatesFlow(input, output, true) and - call.asExpr().getExpr() = callable.getACallSimple() and - nodeFrom = evaluateSummaryComponentStackLocal(callable, call, input) and - nodeTo = evaluateSummaryComponentStackLocal(callable, call, output) - ) + TypeTrackerSummaryFlow::levelStepNoCall(nodeFrom, nodeTo) or localFieldStep(nodeFrom, nodeTo) } @@ -276,16 +268,7 @@ predicate returnStep(Node nodeFrom, Node nodeTo) { predicate basicStoreStep(Node nodeFrom, Node nodeTo, DataFlow::ContentSet contents) { storeStepIntoSourceNode(nodeFrom, nodeTo, contents) or - exists( - SummarizedCallable callable, DataFlowPublic::CallNode call, SummaryComponentStack input, - SummaryComponentStack output - | - hasStoreSummary(callable, contents, pragma[only_bind_into](input), - pragma[only_bind_into](output)) and - call.asExpr().getExpr() = callable.getACallSimple() and - nodeFrom = evaluateSummaryComponentStackLocal(callable, call, input) and - nodeTo = evaluateSummaryComponentStackLocal(callable, call, output) - ) + TypeTrackerSummaryFlow::basicStoreStep(nodeFrom, nodeTo, contents) } /** @@ -319,15 +302,7 @@ predicate basicLoadStep(Node nodeFrom, Node nodeTo, DataFlow::ContentSet content nodeTo.asExpr() = call ) or - exists( - SummarizedCallable callable, DataFlowPublic::CallNode call, SummaryComponentStack input, - SummaryComponentStack output - | - hasLoadSummary(callable, contents, pragma[only_bind_into](input), pragma[only_bind_into](output)) and - call.asExpr().getExpr() = callable.getACallSimple() and - nodeFrom = evaluateSummaryComponentStackLocal(callable, call, input) and - nodeTo = evaluateSummaryComponentStackLocal(callable, call, output) - ) + TypeTrackerSummaryFlow::basicLoadStep(nodeFrom, nodeTo, contents) } /** @@ -336,48 +311,21 @@ predicate basicLoadStep(Node nodeFrom, Node nodeTo, DataFlow::ContentSet content predicate basicLoadStoreStep( Node nodeFrom, Node nodeTo, DataFlow::ContentSet loadContent, DataFlow::ContentSet storeContent ) { - exists( - SummarizedCallable callable, DataFlowPublic::CallNode call, SummaryComponentStack input, - SummaryComponentStack output - | - hasLoadStoreSummary(callable, loadContent, storeContent, pragma[only_bind_into](input), - pragma[only_bind_into](output)) and - call.asExpr().getExpr() = callable.getACallSimple() and - nodeFrom = evaluateSummaryComponentStackLocal(callable, call, input) and - nodeTo = evaluateSummaryComponentStackLocal(callable, call, output) - ) + TypeTrackerSummaryFlow::basicLoadStoreStep(nodeFrom, nodeTo, loadContent, storeContent) } /** * Holds if type-tracking should step from `nodeFrom` to `nodeTo` but block flow of contents matched by `filter` through here. */ predicate basicWithoutContentStep(Node nodeFrom, Node nodeTo, ContentFilter filter) { - exists( - SummarizedCallable callable, DataFlowPublic::CallNode call, SummaryComponentStack input, - SummaryComponentStack output - | - hasWithoutContentSummary(callable, filter, pragma[only_bind_into](input), - pragma[only_bind_into](output)) and - call.asExpr().getExpr() = callable.getACallSimple() and - nodeFrom = evaluateSummaryComponentStackLocal(callable, call, input) and - nodeTo = evaluateSummaryComponentStackLocal(callable, call, output) - ) + TypeTrackerSummaryFlow::basicWithoutContentStep(nodeFrom, nodeTo, filter) } /** * Holds if type-tracking should step from `nodeFrom` to `nodeTo` if inside a content matched by `filter`. */ predicate basicWithContentStep(Node nodeFrom, Node nodeTo, ContentFilter filter) { - exists( - SummarizedCallable callable, DataFlowPublic::CallNode call, SummaryComponentStack input, - SummaryComponentStack output - | - hasWithContentSummary(callable, filter, pragma[only_bind_into](input), - pragma[only_bind_into](output)) and - call.asExpr().getExpr() = callable.getACallSimple() and - nodeFrom = evaluateSummaryComponentStackLocal(callable, call, input) and - nodeTo = evaluateSummaryComponentStackLocal(callable, call, output) - ) + TypeTrackerSummaryFlow::basicWithContentStep(nodeFrom, nodeTo, filter) } /** @@ -389,121 +337,6 @@ class Boolean extends boolean { private import SummaryComponentStack -pragma[nomagic] -private predicate hasStoreSummary( - SummarizedCallable callable, DataFlow::ContentSet contents, SummaryComponentStack input, - SummaryComponentStack output -) { - not isNonLocal(input.head()) and - not isNonLocal(output.head()) and - ( - callable.propagatesFlow(input, push(SummaryComponent::content(contents), output), true) - or - // Allow the input to start with an arbitrary WithoutContent[X]. - // Since type-tracking only tracks one content deep, and we're about to store into another content, - // we're already preventing the input from being in a content. - callable - .propagatesFlow(push(SummaryComponent::withoutContent(_), input), - push(SummaryComponent::content(contents), output), true) - ) -} - -pragma[nomagic] -private predicate hasLoadSummary( - SummarizedCallable callable, DataFlow::ContentSet contents, SummaryComponentStack input, - SummaryComponentStack output -) { - callable.propagatesFlow(push(SummaryComponent::content(contents), input), output, true) and - not isNonLocal(input.head()) and - not isNonLocal(output.head()) -} - -pragma[nomagic] -private predicate hasLoadStoreSummary( - SummarizedCallable callable, DataFlow::ContentSet loadContents, - DataFlow::ContentSet storeContents, SummaryComponentStack input, SummaryComponentStack output -) { - callable - .propagatesFlow(push(SummaryComponent::content(loadContents), input), - push(SummaryComponent::content(storeContents), output), true) and - not isNonLocal(input.head()) and - not isNonLocal(output.head()) -} - -/** - * Gets a content filter to use for a `WithoutContent[content]` step, or has no result if - * the step should be treated as ordinary flow. - * - * `WithoutContent` is often used to perform strong updates on individual collection elements, but for - * type-tracking this is rarely beneficial and quite expensive. However, `WithoutContent` can be quite useful - * for restricting the type of an object, and in these cases we translate it to a filter. - */ -private ContentFilter getFilterFromWithoutContentStep(DataFlow::ContentSet content) { - ( - content.isAnyElement() - or - content.isElementLowerBoundOrUnknown(_) - or - content.isElementOfTypeOrUnknown(_) - or - content.isSingleton(any(DataFlow::Content::UnknownElementContent c)) - ) and - result = MkElementFilter() -} - -pragma[nomagic] -private predicate hasWithoutContentSummary( - SummarizedCallable callable, ContentFilter filter, SummaryComponentStack input, - SummaryComponentStack output -) { - exists(DataFlow::ContentSet content | - callable.propagatesFlow(push(SummaryComponent::withoutContent(content), input), output, true) and - filter = getFilterFromWithoutContentStep(content) and - not isNonLocal(input.head()) and - not isNonLocal(output.head()) and - input != output - ) -} - -/** - * Gets a content filter to use for a `WithContent[content]` step, or has no result if - * the step cannot be handled by type-tracking. - * - * `WithContent` is often used to perform strong updates on individual collection elements (or rather - * to preserve those that didn't get updated). But for type-tracking this is rarely beneficial and quite expensive. - * However, `WithContent` can be quite useful for restricting the type of an object, and in these cases we translate it to a filter. - */ -private ContentFilter getFilterFromWithContentStep(DataFlow::ContentSet content) { - ( - content.isAnyElement() - or - content.isElementLowerBound(_) - or - content.isElementLowerBoundOrUnknown(_) - or - content.isElementOfType(_) - or - content.isElementOfTypeOrUnknown(_) - or - content.isSingleton(any(DataFlow::Content::ElementContent c)) - ) and - result = MkElementFilter() -} - -pragma[nomagic] -private predicate hasWithContentSummary( - SummarizedCallable callable, ContentFilter filter, SummaryComponentStack input, - SummaryComponentStack output -) { - exists(DataFlow::ContentSet content | - callable.propagatesFlow(push(SummaryComponent::withContent(content), input), output, true) and - filter = getFilterFromWithContentStep(content) and - not isNonLocal(input.head()) and - not isNonLocal(output.head()) and - input != output - ) -} - /** * Holds if the given component can't be evaluated by `evaluateSummaryComponentStackLocal`. */ @@ -514,101 +347,95 @@ predicate isNonLocal(SummaryComponent component) { component = SC::withContent(_) } -/** - * Gets a data flow node corresponding an argument or return value of `call`, - * as specified by `component`. - */ -bindingset[call, component] -private DataFlow::Node evaluateSummaryComponentLocal( - DataFlow::CallNode call, SummaryComponent component -) { - exists(DataFlowDispatch::ParameterPosition pos | - component = SummaryComponent::argument(pos) and - argumentPositionMatch(call.asExpr(), result, pos) - ) - or - component = SummaryComponent::return() and - result = call -} +private import internal.SummaryTypeTracker as SummaryTypeTracker +private import codeql.ruby.dataflow.FlowSummary as FlowSummary -/** - * Holds if `callable` is relevant for type-tracking and we therefore want `stack` to - * be evaluated locally at its call sites. - */ -pragma[nomagic] -private predicate dependsOnSummaryComponentStack( - SummarizedCallable callable, SummaryComponentStack stack -) { - exists(callable.getACallSimple()) and - ( - callable.propagatesFlow(stack, _, true) - or - callable.propagatesFlow(_, stack, true) - or - // include store summaries as they may skip an initial step at the input - hasStoreSummary(callable, _, stack, _) - ) - or - dependsOnSummaryComponentStackCons(callable, _, stack) -} +private module SummaryTypeTrackerInput implements SummaryTypeTracker::Input { + // Dataflow nodes + class Node = DataFlow::Node; -pragma[nomagic] -private predicate dependsOnSummaryComponentStackCons( - SummarizedCallable callable, SummaryComponent head, SummaryComponentStack tail -) { - dependsOnSummaryComponentStack(callable, SCS::push(head, tail)) -} + // Content + class TypeTrackerContent = DataFlowPublic::ContentSet; -pragma[nomagic] -private predicate dependsOnSummaryComponentStackConsLocal( - SummarizedCallable callable, SummaryComponent head, SummaryComponentStack tail -) { - dependsOnSummaryComponentStackCons(callable, head, tail) and - not isNonLocal(head) -} + class TypeTrackerContentFilter = ContentFilter; -pragma[nomagic] -private predicate dependsOnSummaryComponentStackLeaf( - SummarizedCallable callable, SummaryComponent leaf -) { - dependsOnSummaryComponentStack(callable, SCS::singleton(leaf)) -} + TypeTrackerContentFilter getFilterFromWithoutContentStep(TypeTrackerContent content) { + ( + content.isAnyElement() + or + content.isElementLowerBoundOrUnknown(_) + or + content.isElementOfTypeOrUnknown(_) + or + content.isSingleton(any(DataFlow::Content::UnknownElementContent c)) + ) and + result = MkElementFilter() + } -/** - * Gets a data flow node corresponding to the local input or output of `call` - * identified by `stack`, if possible. - */ -pragma[nomagic] -private DataFlow::Node evaluateSummaryComponentStackLocal( - SummarizedCallable callable, DataFlow::CallNode call, SummaryComponentStack stack -) { - exists(SummaryComponent component | - dependsOnSummaryComponentStackLeaf(callable, component) and - stack = SCS::singleton(component) and - call.asExpr().getExpr() = callable.getACallSimple() and - result = evaluateSummaryComponentLocal(call, component) - ) - or - exists(DataFlow::Node prev, SummaryComponent head, SummaryComponentStack tail | - prev = evaluateSummaryComponentStackLocal(callable, call, tail) and - dependsOnSummaryComponentStackConsLocal(callable, pragma[only_bind_into](head), - pragma[only_bind_out](tail)) and - stack = SCS::push(pragma[only_bind_out](head), pragma[only_bind_out](tail)) - | + TypeTrackerContentFilter getFilterFromWithContentStep(TypeTrackerContent content) { + ( + content.isAnyElement() + or + content.isElementLowerBound(_) + or + content.isElementLowerBoundOrUnknown(_) + or + content.isElementOfType(_) + or + content.isElementOfTypeOrUnknown(_) + or + content.isSingleton(any(DataFlow::Content::ElementContent c)) + ) and + result = MkElementFilter() + } + + // Summaries and their stacks + class SummaryComponent = FlowSummary::SummaryComponent; + + class SummaryComponentStack = FlowSummary::SummaryComponentStack; + + predicate singleton = FlowSummary::SummaryComponentStack::singleton/1; + + predicate push = FlowSummary::SummaryComponentStack::push/2; + + // Relating content to summaries + predicate content = FlowSummary::SummaryComponent::content/1; + + predicate withoutContent = FlowSummary::SummaryComponent::withoutContent/1; + + predicate withContent = FlowSummary::SummaryComponent::withContent/1; + + predicate return = FlowSummary::SummaryComponent::return/0; + + // Callables + class SummarizedCallable = FlowSummary::SummarizedCallable; + + // Relating nodes to summaries + Node argumentOf(Node call, SummaryComponent arg) { + exists(DataFlowDispatch::ParameterPosition pos | + arg = SummaryComponent::argument(pos) and + argumentPositionMatch(call.asExpr(), result, pos) + ) + } + + Node parameterOf(Node callable, SummaryComponent param) { exists(DataFlowDispatch::ArgumentPosition apos, DataFlowDispatch::ParameterPosition ppos | - head = SummaryComponent::parameter(apos) and + param = SummaryComponent::parameter(apos) and DataFlowDispatch::parameterMatch(ppos, apos) and - result.(DataFlowPrivate::ParameterNodeImpl).isSourceParameterOf(prev.asExpr().getExpr(), ppos) + result + .(DataFlowPrivate::ParameterNodeImpl) + .isSourceParameterOf(callable.asExpr().getExpr(), ppos) ) - or - head = SummaryComponent::return() and + } + + Node returnOf(Node callable, SummaryComponent return) { + return = SummaryComponent::return() and result.(DataFlowPrivate::ReturnNode).(DataFlowPrivate::NodeImpl).getCfgScope() = - prev.asExpr().getExpr() - or - exists(DataFlow::ContentSet content | - head = SummaryComponent::withoutContent(content) and - not exists(getFilterFromWithoutContentStep(content)) and - result = prev - ) - ) + callable.asExpr().getExpr() + } + + // Relating callables to nodes + Node callTo(SummarizedCallable callable) { result.asExpr().getExpr() = callable.getACallSimple() } } + +private module TypeTrackerSummaryFlow = SummaryTypeTracker::SummaryFlow; diff --git a/ruby/ql/lib/codeql/ruby/typetracking/internal/SummaryTypeTracker.qll b/ruby/ql/lib/codeql/ruby/typetracking/internal/SummaryTypeTracker.qll new file mode 100644 index 00000000000..9c6f841651d --- /dev/null +++ b/ruby/ql/lib/codeql/ruby/typetracking/internal/SummaryTypeTracker.qll @@ -0,0 +1,391 @@ +/** + * Provides the implementation of type tracking steps through flow summaries. + * To use this, you must implement the `Input` signature. You can then use the predicates in the `Output` + * signature to implement the predicates of the same names inside `TypeTrackerSpecific.qll`. + */ + +/** The classes and predicates needed to generate type-tracking steps from summaries. */ +signature module Input { + // Dataflow nodes + class Node; + + // Content + class TypeTrackerContent; + + class TypeTrackerContentFilter; + + // Relating content and filters + /** + * Gets a content filter to use for a `WithoutContent[content]` step, (data is not allowed to be stored in `content`) + * or has no result if + * the step should be treated as ordinary flow. + * + * `WithoutContent` is often used to perform strong updates on individual collection elements, but for + * type-tracking this is rarely beneficial and quite expensive. However, `WithoutContent` can be quite useful + * for restricting the type of an object, and in these cases we translate it to a filter. + */ + TypeTrackerContentFilter getFilterFromWithoutContentStep(TypeTrackerContent content); + + /** + * Gets a content filter to use for a `WithContent[content]` step, (data must be stored in `content`) + * or has no result if + * the step cannot be handled by type-tracking. + * + * `WithContent` is often used to perform strong updates on individual collection elements (or rather + * to preserve those that didn't get updated). But for type-tracking this is rarely beneficial and quite expensive. + * However, `WithContent` can be quite useful for restricting the type of an object, and in these cases we translate it to a filter. + */ + TypeTrackerContentFilter getFilterFromWithContentStep(TypeTrackerContent content); + + // Summaries and their stacks + class SummaryComponent; + + class SummaryComponentStack { + SummaryComponent head(); + } + + /** Gets a singleton stack containing `component`. */ + SummaryComponentStack singleton(SummaryComponent component); + + /** + * Gets the stack obtained by pushing `head` onto `tail`. + */ + SummaryComponentStack push(SummaryComponent head, SummaryComponentStack tail); + + /** Gets a singleton stack representing a return. */ + SummaryComponent return(); + + // Relating content to summaries + /** Gets a summary component for content `c`. */ + SummaryComponent content(TypeTrackerContent contents); + + /** Gets a summary component where data is not allowed to be stored in `contents`. */ + SummaryComponent withoutContent(TypeTrackerContent contents); + + /** Gets a summary component where data must be stored in `contents`. */ + SummaryComponent withContent(TypeTrackerContent contents); + + // Callables + class SummarizedCallable { + predicate propagatesFlow( + SummaryComponentStack input, SummaryComponentStack output, boolean preservesValue + ); + } + + // Relating nodes to summaries + /** Gets a dataflow node respresenting the argument of `call` indicated by `arg`. */ + Node argumentOf(Node call, SummaryComponent arg); + + /** Gets a dataflow node respresenting the parameter of `callable` indicated by `param`. */ + Node parameterOf(Node callable, SummaryComponent param); + + /** Gets a dataflow node respresenting the return of `callable` indicated by `return`. */ + Node returnOf(Node callable, SummaryComponent return); + + // Relating callables to nodes + /** Gets a dataflow node respresenting a call to `callable`. */ + Node callTo(SummarizedCallable callable); +} + +/** + * The predicates provided by a summary type tracker. + * These are meant to be used in `TypeTrackerSpecific.qll` + * inside the predicates of the same names. + */ +signature module Output { + /** + * Holds if there is a level step from `nodeFrom` to `nodeTo`, which does not depend on the call graph. + */ + predicate levelStepNoCall(I::Node nodeFrom, I::Node nodeTo); + + /** + * Holds if `nodeTo` is the result of accessing the `content` content of `nodeFrom`. + */ + predicate basicLoadStep(I::Node nodeFrom, I::Node nodeTo, I::TypeTrackerContent content); + + /** + * Holds if `nodeFrom` is being written to the `content` content of the object in `nodeTo`. + */ + predicate basicStoreStep(I::Node nodeFrom, I::Node nodeTo, I::TypeTrackerContent content); + + /** + * Holds if the `loadContent` of `nodeFrom` is stored in the `storeContent` of `nodeTo`. + */ + predicate basicLoadStoreStep( + I::Node nodeFrom, I::Node nodeTo, I::TypeTrackerContent loadContent, + I::TypeTrackerContent storeContent + ); + + /** + * Holds if type-tracking should step from `nodeFrom` to `nodeTo` but block flow of contents matched by `filter` through here. + */ + predicate basicWithoutContentStep( + I::Node nodeFrom, I::Node nodeTo, I::TypeTrackerContentFilter filter + ); + + /** + * Holds if type-tracking should step from `nodeFrom` to `nodeTo` if inside a content matched by `filter`. + */ + predicate basicWithContentStep( + I::Node nodeFrom, I::Node nodeTo, I::TypeTrackerContentFilter filter + ); +} + +/** + * Implementation of the summary type tracker, that is type tracking through flow summaries. + */ +module SummaryFlow implements Output { + pragma[nomagic] + private predicate isNonLocal(I::SummaryComponent component) { + component = I::content(_) + or + component = I::withContent(_) + } + + pragma[nomagic] + private predicate hasLoadSummary( + I::SummarizedCallable callable, I::TypeTrackerContent contents, I::SummaryComponentStack input, + I::SummaryComponentStack output + ) { + callable.propagatesFlow(I::push(I::content(contents), input), output, true) and + not isNonLocal(input.head()) and + not isNonLocal(output.head()) + } + + pragma[nomagic] + private predicate hasStoreSummary( + I::SummarizedCallable callable, I::TypeTrackerContent contents, I::SummaryComponentStack input, + I::SummaryComponentStack output + ) { + not isNonLocal(input.head()) and + not isNonLocal(output.head()) and + ( + callable.propagatesFlow(input, I::push(I::content(contents), output), true) + or + // Allow the input to start with an arbitrary WithoutContent[X]. + // Since type-tracking only tracks one content deep, and we're about to store into another content, + // we're already preventing the input from being in a content. + callable + .propagatesFlow(I::push(I::withoutContent(_), input), + I::push(I::content(contents), output), true) + ) + } + + pragma[nomagic] + private predicate hasLoadStoreSummary( + I::SummarizedCallable callable, I::TypeTrackerContent loadContents, + I::TypeTrackerContent storeContents, I::SummaryComponentStack input, + I::SummaryComponentStack output + ) { + callable + .propagatesFlow(I::push(I::content(loadContents), input), + I::push(I::content(storeContents), output), true) and + not isNonLocal(input.head()) and + not isNonLocal(output.head()) + } + + pragma[nomagic] + private predicate hasWithoutContentSummary( + I::SummarizedCallable callable, I::TypeTrackerContentFilter filter, + I::SummaryComponentStack input, I::SummaryComponentStack output + ) { + exists(I::TypeTrackerContent content | + callable.propagatesFlow(I::push(I::withoutContent(content), input), output, true) and + filter = I::getFilterFromWithoutContentStep(content) and + not isNonLocal(input.head()) and + not isNonLocal(output.head()) and + input != output + ) + } + + pragma[nomagic] + private predicate hasWithContentSummary( + I::SummarizedCallable callable, I::TypeTrackerContentFilter filter, + I::SummaryComponentStack input, I::SummaryComponentStack output + ) { + exists(I::TypeTrackerContent content | + callable.propagatesFlow(I::push(I::withContent(content), input), output, true) and + filter = I::getFilterFromWithContentStep(content) and + not isNonLocal(input.head()) and + not isNonLocal(output.head()) and + input != output + ) + } + + private predicate componentLevelStep(I::SummaryComponent component) { + exists(I::TypeTrackerContent content | + component = I::withoutContent(content) and + not exists(I::getFilterFromWithoutContentStep(content)) + ) + } + + /** + * Gets a data flow `I::Node` corresponding an argument or return value of `call`, + * as specified by `component`. + */ + bindingset[call, component] + private I::Node evaluateSummaryComponentLocal(I::Node call, I::SummaryComponent component) { + result = I::argumentOf(call, component) + or + component = I::return() and + result = call + } + + /** + * Holds if `callable` is relevant for type-tracking and we therefore want `stack` to + * be evaluated locally at its call sites. + */ + pragma[nomagic] + private predicate dependsOnSummaryComponentStack( + I::SummarizedCallable callable, I::SummaryComponentStack stack + ) { + exists(I::callTo(callable)) and + ( + callable.propagatesFlow(stack, _, true) + or + callable.propagatesFlow(_, stack, true) + or + // include store summaries as they may skip an initial step at the input + hasStoreSummary(callable, _, stack, _) + ) + or + dependsOnSummaryComponentStackCons(callable, _, stack) + } + + pragma[nomagic] + private predicate dependsOnSummaryComponentStackCons( + I::SummarizedCallable callable, I::SummaryComponent head, I::SummaryComponentStack tail + ) { + dependsOnSummaryComponentStack(callable, I::push(head, tail)) + } + + pragma[nomagic] + private predicate dependsOnSummaryComponentStackConsLocal( + I::SummarizedCallable callable, I::SummaryComponent head, I::SummaryComponentStack tail + ) { + dependsOnSummaryComponentStackCons(callable, head, tail) and + not isNonLocal(head) + } + + pragma[nomagic] + private predicate dependsOnSummaryComponentStackLeaf( + I::SummarizedCallable callable, I::SummaryComponent leaf + ) { + dependsOnSummaryComponentStack(callable, I::singleton(leaf)) + } + + /** + * Gets a data flow I::Node corresponding to the local input or output of `call` + * identified by `stack`, if possible. + */ + pragma[nomagic] + private I::Node evaluateSummaryComponentStackLocal( + I::SummarizedCallable callable, I::Node call, I::SummaryComponentStack stack + ) { + exists(I::SummaryComponent component | + dependsOnSummaryComponentStackLeaf(callable, component) and + stack = I::singleton(component) and + call = I::callTo(callable) and + result = evaluateSummaryComponentLocal(call, component) + ) + or + exists(I::Node prev, I::SummaryComponent head, I::SummaryComponentStack tail | + prev = evaluateSummaryComponentStackLocal(callable, call, tail) and + dependsOnSummaryComponentStackConsLocal(callable, pragma[only_bind_into](head), + pragma[only_bind_out](tail)) and + stack = I::push(pragma[only_bind_out](head), pragma[only_bind_out](tail)) + | + result = I::parameterOf(prev, head) + or + result = I::returnOf(prev, head) + or + componentLevelStep(head) and + result = prev + ) + } + + // Implement Output + predicate levelStepNoCall(I::Node nodeFrom, I::Node nodeTo) { + exists( + I::SummarizedCallable callable, I::Node call, I::SummaryComponentStack input, + I::SummaryComponentStack output + | + callable.propagatesFlow(input, output, true) and + call = I::callTo(callable) and + nodeFrom = evaluateSummaryComponentStackLocal(callable, call, input) and + nodeTo = evaluateSummaryComponentStackLocal(callable, call, output) + ) + } + + predicate basicLoadStep(I::Node nodeFrom, I::Node nodeTo, I::TypeTrackerContent content) { + exists( + I::SummarizedCallable callable, I::Node call, I::SummaryComponentStack input, + I::SummaryComponentStack output + | + hasLoadSummary(callable, content, pragma[only_bind_into](input), + pragma[only_bind_into](output)) and + call = I::callTo(callable) and + nodeFrom = evaluateSummaryComponentStackLocal(callable, call, input) and + nodeTo = evaluateSummaryComponentStackLocal(callable, call, output) + ) + } + + predicate basicStoreStep(I::Node nodeFrom, I::Node nodeTo, I::TypeTrackerContent content) { + exists( + I::SummarizedCallable callable, I::Node call, I::SummaryComponentStack input, + I::SummaryComponentStack output + | + hasStoreSummary(callable, content, pragma[only_bind_into](input), + pragma[only_bind_into](output)) and + call = I::callTo(callable) and + nodeFrom = evaluateSummaryComponentStackLocal(callable, call, input) and + nodeTo = evaluateSummaryComponentStackLocal(callable, call, output) + ) + } + + predicate basicLoadStoreStep( + I::Node nodeFrom, I::Node nodeTo, I::TypeTrackerContent loadContent, + I::TypeTrackerContent storeContent + ) { + exists( + I::SummarizedCallable callable, I::Node call, I::SummaryComponentStack input, + I::SummaryComponentStack output + | + hasLoadStoreSummary(callable, loadContent, storeContent, pragma[only_bind_into](input), + pragma[only_bind_into](output)) and + call = I::callTo(callable) and + nodeFrom = evaluateSummaryComponentStackLocal(callable, call, input) and + nodeTo = evaluateSummaryComponentStackLocal(callable, call, output) + ) + } + + predicate basicWithoutContentStep( + I::Node nodeFrom, I::Node nodeTo, I::TypeTrackerContentFilter filter + ) { + exists( + I::SummarizedCallable callable, I::Node call, I::SummaryComponentStack input, + I::SummaryComponentStack output + | + hasWithoutContentSummary(callable, filter, pragma[only_bind_into](input), + pragma[only_bind_into](output)) and + call = I::callTo(callable) and + nodeFrom = evaluateSummaryComponentStackLocal(callable, call, input) and + nodeTo = evaluateSummaryComponentStackLocal(callable, call, output) + ) + } + + predicate basicWithContentStep( + I::Node nodeFrom, I::Node nodeTo, I::TypeTrackerContentFilter filter + ) { + exists( + I::SummarizedCallable callable, I::Node call, I::SummaryComponentStack input, + I::SummaryComponentStack output + | + hasWithContentSummary(callable, filter, pragma[only_bind_into](input), + pragma[only_bind_into](output)) and + call = I::callTo(callable) and + nodeFrom = evaluateSummaryComponentStackLocal(callable, call, input) and + nodeTo = evaluateSummaryComponentStackLocal(callable, call, output) + ) + } +}