From 47b2d48da25ed1cd49c4e3dd5647b362477f2682 Mon Sep 17 00:00:00 2001 From: Rasmus Lerchedahl Petersen Date: Wed, 17 May 2023 23:16:59 +0200 Subject: [PATCH 01/21] python: add tests - add `getACallSimple` to `SummarizedCallable` (by adding it to `LibraryCallable`) --- .../new/internal/DataFlowDispatch.qll | 3 + .../typetracking-summaries/TestSummaries.qll | 189 ++++++++++++++++++ .../typetracking-summaries/summaries.py | 60 ++++++ .../typetracking-summaries/tracked.expected | 0 .../typetracking-summaries/tracked.ql | 36 ++++ 5 files changed, 288 insertions(+) create mode 100644 python/ql/test/experimental/dataflow/typetracking-summaries/TestSummaries.qll create mode 100644 python/ql/test/experimental/dataflow/typetracking-summaries/summaries.py create mode 100644 python/ql/test/experimental/dataflow/typetracking-summaries/tracked.expected create mode 100644 python/ql/test/experimental/dataflow/typetracking-summaries/tracked.ql diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll index cdca96cc4ac..8dad2e8a032 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll @@ -250,6 +250,9 @@ abstract class LibraryCallable extends string { /** Gets a call to this library callable. */ abstract CallCfgNode getACall(); + /** Same as `getACall` but without referring to the call graph or API graph. */ + CallCfgNode getACallSimple() { none() } + /** Gets a data-flow node, where this library callable is used as a call-back. */ abstract ArgumentNode getACallback(); } diff --git a/python/ql/test/experimental/dataflow/typetracking-summaries/TestSummaries.qll b/python/ql/test/experimental/dataflow/typetracking-summaries/TestSummaries.qll new file mode 100644 index 00000000000..1a6a504e1ee --- /dev/null +++ b/python/ql/test/experimental/dataflow/typetracking-summaries/TestSummaries.qll @@ -0,0 +1,189 @@ +private import python +private import semmle.python.dataflow.new.FlowSummary +private import semmle.python.ApiGraphs + +/** + * This module ensures that the `callStep` predicate in + * our type tracker implelemtation does not refer to the + * `getACall` predicate on `SummarizedCallable`. + */ +module RecursionGuard { + private import semmle.python.dataflow.new.internal.TypeTrackerSpecific as TT + + private class RecursionGuard extends SummarizedCallable { + RecursionGuard() { this = "RecursionGuard" } + + override DataFlow::CallCfgNode getACall() { + result.getFunction().asCfgNode().(NameNode).getId() = this and + (TT::callStep(_, _) implies any()) + } + + override DataFlow::CallCfgNode getACallSimple() { none() } + + override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this } + } + + predicate test(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) { + TT::levelStepNoCall(nodeFrom, nodeTo) + } +} + +private class SummarizedCallableIdentity extends SummarizedCallable { + SummarizedCallableIdentity() { this = "identity" } + + override DataFlow::CallCfgNode getACall() { none() } + + override DataFlow::CallCfgNode getACallSimple() { + result.getFunction().asCfgNode().(NameNode).getId() = this + } + + override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this } + + override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + input = "Argument[0]" and + output = "ReturnValue" and + preservesValue = true + } +} + +// For lambda flow to work, implement lambdaCall and lambdaCreation +private class SummarizedCallableApplyLambda extends SummarizedCallable { + SummarizedCallableApplyLambda() { this = "apply_lambda" } + + override DataFlow::CallCfgNode getACall() { none() } + + override DataFlow::CallCfgNode getACallSimple() { + result.getFunction().asCfgNode().(NameNode).getId() = this + } + + override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this } + + override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + input = "Argument[1]" and + output = "Argument[0].Parameter[0]" and + preservesValue = true + or + input = "Argument[0].ReturnValue" and + output = "ReturnValue" and + preservesValue = true + } +} + +private class SummarizedCallableReversed extends SummarizedCallable { + SummarizedCallableReversed() { this = "reversed" } + + override DataFlow::CallCfgNode getACall() { none() } + + override DataFlow::CallCfgNode getACallSimple() { + result.getFunction().asCfgNode().(NameNode).getId() = this + } + + override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this } + + override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + input = "Argument[0].ListElement" and + output = "ReturnValue.ListElement" and + preservesValue = true + } +} + +private class SummarizedCallableMap extends SummarizedCallable { + SummarizedCallableMap() { this = "list_map" } + + override DataFlow::CallCfgNode getACall() { none() } + + override DataFlow::CallCfgNode getACallSimple() { + result.getFunction().asCfgNode().(NameNode).getId() = this + } + + override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this } + + override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + input = "Argument[1].ListElement" and + output = "Argument[0].Parameter[0]" and + preservesValue = true + or + input = "Argument[0].ReturnValue" and + output = "ReturnValue.ListElement" and + preservesValue = true + } +} + +private class SummarizedCallableAppend extends SummarizedCallable { + SummarizedCallableAppend() { this = "append_to_list" } + + override DataFlow::CallCfgNode getACall() { none() } + + override DataFlow::CallCfgNode getACallSimple() { + result.getFunction().asCfgNode().(NameNode).getId() = this + } + + override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this } + + override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + input = "Argument[0]" and + output = "ReturnValue" and + preservesValue = false + or + input = "Argument[1]" and + output = "ReturnValue.ListElement" and + preservesValue = true + } +} + +private class SummarizedCallableJsonLoads extends SummarizedCallable { + SummarizedCallableJsonLoads() { this = "json.loads" } + + override DataFlow::CallCfgNode getACall() { + result = API::moduleImport("json").getMember("loads").getACall() + } + + override DataFlow::CallCfgNode getACallSimple() { none() } + + override DataFlow::ArgumentNode getACallback() { + result = API::moduleImport("json").getMember("loads").getAValueReachableFromSource() + } + + override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + input = "Argument[0]" and + output = "ReturnValue.ListElement" and + preservesValue = true + } +} + +// read and store +private class SummarizedCallableReadSecret extends SummarizedCallable { + SummarizedCallableReadSecret() { this = "read_secret" } + + override DataFlow::CallCfgNode getACall() { none() } + + override DataFlow::CallCfgNode getACallSimple() { + result.getFunction().asCfgNode().(NameNode).getId() = this + } + + override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this } + + override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + input = "Argument[0].Attribute[secret]" and + output = "ReturnValue" and + preservesValue = true + } +} + +private class SummarizedCallableSetSecret extends SummarizedCallable { + SummarizedCallableSetSecret() { this = "set_secret" } + + override DataFlow::CallCfgNode getACall() { none() } + + override DataFlow::CallCfgNode getACallSimple() { + result.getFunction().asCfgNode().(NameNode).getId() = this + } + + override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this } + + override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + input = "Argument[1]" and + output = "Argument[0].Attribute[secret]" and + preservesValue = true + } +} diff --git a/python/ql/test/experimental/dataflow/typetracking-summaries/summaries.py b/python/ql/test/experimental/dataflow/typetracking-summaries/summaries.py new file mode 100644 index 00000000000..f7affa6036f --- /dev/null +++ b/python/ql/test/experimental/dataflow/typetracking-summaries/summaries.py @@ -0,0 +1,60 @@ +import sys +import os + +# Simple summary +tainted = identity(tracked) # $ tracked +tainted # $ MISSING: tracked + +# Lambda summary +# I think the missing result is expected because type tracking +# is not allowed to flow back out of a call. +tainted_lambda = apply_lambda(lambda x: x, tracked) # $ tracked +tainted_lambda # $ MISSING: tracked + +# A lambda that directly introduces taint +bad_lambda = apply_lambda(lambda x: tracked, 1) # $ tracked +bad_lambda # $ MISSING: tracked + +# A lambda that breaks the flow +untainted_lambda = apply_lambda(lambda x: 1, tracked) # $ tracked +untainted_lambda + +# Collection summaries +tainted_list = reversed([tracked]) # $ tracked +tl = tainted_list[0] +tl # $ MISSING: tracked + +# Complex summaries +def add_colon(x): + return x + ":" + +tainted_mapped = list_map(add_colon, [tracked]) # $ tracked +tm = tainted_mapped[0] +tm # $ MISSING: tracked + +def explicit_identity(x): + return x + +tainted_mapped_explicit = list_map(explicit_identity, [tracked]) # $ tracked +tainted_mapped_explicit[0] # $ MISSING: tracked + +tainted_mapped_summary = list_map(identity, [tracked]) # $ tracked +tms = tainted_mapped_summary[0] +tms # $ MISSING: tracked + +another_tainted_list = append_to_list([], tracked) # $ tracked +atl = another_tainted_list[0] +atl # $ MISSING: tracked + +from json import loads as json_loads +tainted_resultlist = json_loads(tracked) # $ tracked +tr = tainted_resultlist[0] +tr # $ MISSING: tracked + +x.secret = tracked # $ tracked=secret tracked +r = read_secret(x) # $ tracked=secret MISSING: tracked +r # $ MISSING: tracked + +y # $ MISSING: tracked=secret +set_secret(y, tracked) # $ tracked MISSING: tracked=secret +y.secret # $ MISSING: tracked tracked=secret \ No newline at end of file diff --git a/python/ql/test/experimental/dataflow/typetracking-summaries/tracked.expected b/python/ql/test/experimental/dataflow/typetracking-summaries/tracked.expected new file mode 100644 index 00000000000..e69de29bb2d diff --git a/python/ql/test/experimental/dataflow/typetracking-summaries/tracked.ql b/python/ql/test/experimental/dataflow/typetracking-summaries/tracked.ql new file mode 100644 index 00000000000..e5bf62053a0 --- /dev/null +++ b/python/ql/test/experimental/dataflow/typetracking-summaries/tracked.ql @@ -0,0 +1,36 @@ +import python +import semmle.python.dataflow.new.DataFlow +import semmle.python.dataflow.new.TypeTracker +import TestUtilities.InlineExpectationsTest +import semmle.python.ApiGraphs +import TestSummaries + +// ----------------------------------------------------------------------------- +// tracked +// ----------------------------------------------------------------------------- +private DataFlow::TypeTrackingNode tracked(TypeTracker t) { + t.start() and + result.asCfgNode() = any(NameNode n | n.getId() = "tracked") + or + exists(TypeTracker t2 | result = tracked(t2).track(t2, t)) +} + +class TrackedTest extends InlineExpectationsTest { + TrackedTest() { this = "TrackedTest" } + + override string getARelevantTag() { result = "tracked" } + + override predicate hasActualResult(Location location, string element, string tag, string value) { + exists(DataFlow::Node e, TypeTracker t | + exists(e.getLocation().getFile().getRelativePath()) and + e.getLocation().getStartLine() > 0 and + tracked(t).flowsTo(e) and + // Module variables have no sensible location, and hence can't be annotated. + not e instanceof DataFlow::ModuleVariableNode and + tag = "tracked" and + location = e.getLocation() and + value = t.getAttr() and + element = e.toString() + ) + } +} From 2daa9577bbbcdf762fb00c96dcd20849245e75b0 Mon Sep 17 00:00:00 2001 From: Rasmus Lerchedahl Petersen Date: Tue, 16 May 2023 14:20:29 +0200 Subject: [PATCH 02/21] ruby/python: implement shared module ruby: - create new shared file `SummaryTypeTracker.qll` - move much logic into the module - instantiate the module - remove old logic, now provided by module python: - clone shared file - instantiate module - use (some of the) steps provided by the module --- config/identical-files.json | 4 + .../new/internal/SummaryTypeTracker.qll | 382 ++++++++++++++++++ .../new/internal/TypeTrackerSpecific.qll | 128 +++++- .../typetracking-summaries/summaries.py | 14 +- .../ruby/typetracking/SummaryTypeTracker.qll | 382 ++++++++++++++++++ .../ruby/typetracking/TypeTrackerSpecific.qll | 370 +++++------------ 6 files changed, 1011 insertions(+), 269 deletions(-) create mode 100644 python/ql/lib/semmle/python/dataflow/new/internal/SummaryTypeTracker.qll create mode 100644 ruby/ql/lib/codeql/ruby/typetracking/SummaryTypeTracker.qll diff --git a/config/identical-files.json b/config/identical-files.json index 29fae2d3855..3d84e84dc9c 100644 --- a/config/identical-files.json +++ b/config/identical-files.json @@ -522,6 +522,10 @@ "python/ql/lib/semmle/python/dataflow/new/internal/TypeTracker.qll", "ruby/ql/lib/codeql/ruby/typetracking/TypeTracker.qll" ], + "SummaryTypeTracker": [ + "python/ql/lib/semmle/python/dataflow/new/internal/SummaryTypeTracker.qll", + "ruby/ql/lib/codeql/ruby/typetracking/SummaryTypeTracker.qll" + ], "AccessPathSyntax": [ "csharp/ql/lib/semmle/code/csharp/dataflow/internal/AccessPathSyntax.qll", "go/ql/lib/semmle/go/dataflow/internal/AccessPathSyntax.qll", diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/SummaryTypeTracker.qll b/python/ql/lib/semmle/python/dataflow/new/internal/SummaryTypeTracker.qll new file mode 100644 index 00000000000..310dec3aadf --- /dev/null +++ b/python/ql/lib/semmle/python/dataflow/new/internal/SummaryTypeTracker.qll @@ -0,0 +1,382 @@ +/** + * Provides the implementation of a summary type tracker, that is type tracking through flow summaries. + * To use this, you must implement the `Input` signature. You can then use the predicates in the `Output` + * signature to implement the predicates of the same names inside `TypeTrackerSpecific.qll`. + */ + +/** The classes and predicates needed to generate a summary type tracker. */ +signature module Input { + // Dataflow nodes + class Node; + + // Content + class TypeTrackerContent; + + class TypeTrackerContentFilter; + + // Relating content and filters + /** + * Gets a content filter to use for a `WithoutContent[content]` step, or has no result if + * the step should be treated as ordinary flow. + * + * `WithoutContent` is often used to perform strong updates on individual collection elements, but for + * type-tracking this is rarely beneficial and quite expensive. However, `WithoutContent` can be quite useful + * for restricting the type of an object, and in these cases we translate it to a filter. + */ + TypeTrackerContentFilter getFilterFromWithoutContentStep(TypeTrackerContent content); + + /** + * Gets a content filter to use for a `WithContent[content]` step, or has no result if + * the step cannot be handled by type-tracking. + * + * `WithContent` is often used to perform strong updates on individual collection elements (or rather + * to preserve those that didn't get updated). But for type-tracking this is rarely beneficial and quite expensive. + * However, `WithContent` can be quite useful for restricting the type of an object, and in these cases we translate it to a filter. + */ + TypeTrackerContentFilter getFilterFromWithContentStep(TypeTrackerContent content); + + // Summaries and their stacks + class SummaryComponent; + + class SummaryComponentStack { + SummaryComponent head(); + } + + /** Gets a singleton stack containing `component`. */ + SummaryComponentStack singleton(SummaryComponent component); + + /** + * Gets the stack obtained by pushing `head` onto `tail`. + */ + SummaryComponentStack push(SummaryComponent component, SummaryComponentStack stack); + + /** Gets a singleton stack representing a return. */ + SummaryComponent return(); + + // Relating content to summaries + /** Gets a summary component for content `c`. */ + SummaryComponent content(TypeTrackerContent contents); + + /** Gets a summary component where data is not allowed to be stored in `c`. */ + SummaryComponent withoutContent(TypeTrackerContent contents); + + /** Gets a summary component where data must be stored in `c`. */ + SummaryComponent withContent(TypeTrackerContent contents); + + // Callables + class SummarizedCallable { + predicate propagatesFlow( + SummaryComponentStack input, SummaryComponentStack output, boolean preservesValue + ); + } + + // Relating nodes to summaries + /** Gets a dataflow node respresenting the argument of `call` indicated by `arg`. */ + Node argumentOf(Node call, SummaryComponent arg); + + /** Gets a dataflow node respresenting the parameter of `callable` indicated by `param`. */ + Node parameterOf(Node callable, SummaryComponent param); + + /** Gets a dataflow node respresenting the return of `callable` indicated by `return`. */ + Node returnOf(Node callable, SummaryComponent return); + + // Specific summary handling + /** Holds if component should be treated as a level step by type tracking. */ + predicate componentLevelStep(SummaryComponent component); + + /** Holds if the given component can't be evaluated by `evaluateSummaryComponentStackLocal`. */ + predicate isNonLocal(SummaryComponent component); + + // Relating callables to nodes + /** Gets a dataflow node respresenting a call to `callable`. */ + Node callTo(SummarizedCallable callable); +} + +/** + * The predicates provided by a summary type tracker. + * These are meant to be used in `TypeTrackerSpecific.qll` + * inside the predicates of the same names. + */ +signature module Output { + /** + * Holds if there is a level step from `nodeFrom` to `nodeTo`, which does not depend on the call graph. + */ + predicate levelStepNoCall(I::Node nodeFrom, I::Node nodeTo); + + /** + * Holds if `nodeTo` is the result of accessing the `content` content of `nodeFrom`. + */ + predicate basicLoadStep(I::Node nodeFrom, I::Node nodeTo, I::TypeTrackerContent content); + + /** + * Holds if `nodeFrom` is being written to the `content` content of the object in `nodeTo`. + */ + predicate basicStoreStep(I::Node nodeFrom, I::Node nodeTo, I::TypeTrackerContent content); + + /** + * Holds if the `loadContent` of `nodeFrom` is stored in the `storeContent` of `nodeTo`. + */ + predicate basicLoadStoreStep( + I::Node nodeFrom, I::Node nodeTo, I::TypeTrackerContent loadContent, + I::TypeTrackerContent storeContent + ); + + /** + * Holds if type-tracking should step from `nodeFrom` to `nodeTo` but block flow of contents matched by `filter` through here. + */ + predicate basicWithoutContentStep( + I::Node nodeFrom, I::Node nodeTo, I::TypeTrackerContentFilter filter + ); + + /** + * Holds if type-tracking should step from `nodeFrom` to `nodeTo` if inside a content matched by `filter`. + */ + predicate basicWithContentStep( + I::Node nodeFrom, I::Node nodeTo, I::TypeTrackerContentFilter filter + ); +} + +/** + * Implementation of the summary type tracker, that is type tracking through flow summaries. + */ +module SummaryFlow implements Output { + pragma[nomagic] + private predicate hasLoadSummary( + I::SummarizedCallable callable, I::TypeTrackerContent contents, I::SummaryComponentStack input, + I::SummaryComponentStack output + ) { + callable.propagatesFlow(I::push(I::content(contents), input), output, true) and + not I::isNonLocal(input.head()) and + not I::isNonLocal(output.head()) + } + + pragma[nomagic] + private predicate hasStoreSummary( + I::SummarizedCallable callable, I::TypeTrackerContent contents, I::SummaryComponentStack input, + I::SummaryComponentStack output + ) { + not I::isNonLocal(input.head()) and + not I::isNonLocal(output.head()) and + ( + callable.propagatesFlow(input, I::push(I::content(contents), output), true) + or + // Allow the input to start with an arbitrary WithoutContent[X]. + // Since type-tracking only tracks one content deep, and we're about to store into another content, + // we're already preventing the input from being in a content. + callable + .propagatesFlow(I::push(I::withoutContent(_), input), + I::push(I::content(contents), output), true) + ) + } + + pragma[nomagic] + private predicate hasLoadStoreSummary( + I::SummarizedCallable callable, I::TypeTrackerContent loadContents, + I::TypeTrackerContent storeContents, I::SummaryComponentStack input, + I::SummaryComponentStack output + ) { + callable + .propagatesFlow(I::push(I::content(loadContents), input), + I::push(I::content(storeContents), output), true) and + not I::isNonLocal(input.head()) and + not I::isNonLocal(output.head()) + } + + pragma[nomagic] + private predicate hasWithoutContentSummary( + I::SummarizedCallable callable, I::TypeTrackerContentFilter filter, + I::SummaryComponentStack input, I::SummaryComponentStack output + ) { + exists(I::TypeTrackerContent content | + callable.propagatesFlow(I::push(I::withoutContent(content), input), output, true) and + filter = I::getFilterFromWithoutContentStep(content) and + not I::isNonLocal(input.head()) and + not I::isNonLocal(output.head()) and + input != output + ) + } + + pragma[nomagic] + private predicate hasWithContentSummary( + I::SummarizedCallable callable, I::TypeTrackerContentFilter filter, + I::SummaryComponentStack input, I::SummaryComponentStack output + ) { + exists(I::TypeTrackerContent content | + callable.propagatesFlow(I::push(I::withContent(content), input), output, true) and + filter = I::getFilterFromWithContentStep(content) and + not I::isNonLocal(input.head()) and + not I::isNonLocal(output.head()) and + input != output + ) + } + + /** + * Gets a data flow I::Node corresponding an argument or return value of `call`, + * as specified by `component`. + */ + bindingset[call, component] + private I::Node evaluateSummaryComponentLocal(I::Node call, I::SummaryComponent component) { + result = I::argumentOf(call, component) + or + component = I::return() and + result = call + } + + /** + * Holds if `callable` is relevant for type-tracking and we therefore want `stack` to + * be evaluated locally at its call sites. + */ + pragma[nomagic] + private predicate dependsOnSummaryComponentStack( + I::SummarizedCallable callable, I::SummaryComponentStack stack + ) { + exists(I::callTo(callable)) and + ( + callable.propagatesFlow(stack, _, true) + or + callable.propagatesFlow(_, stack, true) + or + // include store summaries as they may skip an initial step at the input + hasStoreSummary(callable, _, stack, _) + ) + or + dependsOnSummaryComponentStackCons(callable, _, stack) + } + + pragma[nomagic] + private predicate dependsOnSummaryComponentStackCons( + I::SummarizedCallable callable, I::SummaryComponent head, I::SummaryComponentStack tail + ) { + dependsOnSummaryComponentStack(callable, I::push(head, tail)) + } + + pragma[nomagic] + private predicate dependsOnSummaryComponentStackConsLocal( + I::SummarizedCallable callable, I::SummaryComponent head, I::SummaryComponentStack tail + ) { + dependsOnSummaryComponentStackCons(callable, head, tail) and + not I::isNonLocal(head) + } + + pragma[nomagic] + private predicate dependsOnSummaryComponentStackLeaf( + I::SummarizedCallable callable, I::SummaryComponent leaf + ) { + dependsOnSummaryComponentStack(callable, I::singleton(leaf)) + } + + /** + * Gets a data flow I::Node corresponding to the local input or output of `call` + * identified by `stack`, if possible. + */ + pragma[nomagic] + private I::Node evaluateSummaryComponentStackLocal( + I::SummarizedCallable callable, I::Node call, I::SummaryComponentStack stack + ) { + exists(I::SummaryComponent component | + dependsOnSummaryComponentStackLeaf(callable, component) and + stack = I::singleton(component) and + call = I::callTo(callable) and + result = evaluateSummaryComponentLocal(call, component) + ) + or + exists(I::Node prev, I::SummaryComponent head, I::SummaryComponentStack tail | + prev = evaluateSummaryComponentStackLocal(callable, call, tail) and + dependsOnSummaryComponentStackConsLocal(callable, pragma[only_bind_into](head), + pragma[only_bind_out](tail)) and + stack = I::push(pragma[only_bind_out](head), pragma[only_bind_out](tail)) + | + result = I::parameterOf(prev, head) + or + result = I::returnOf(prev, head) + or + I::componentLevelStep(head) and + result = prev + ) + } + + // Implement Output + predicate levelStepNoCall(I::Node nodeFrom, I::Node nodeTo) { + exists( + I::SummarizedCallable callable, I::Node call, I::SummaryComponentStack input, + I::SummaryComponentStack output + | + callable.propagatesFlow(input, output, true) and + call = I::callTo(callable) and + nodeFrom = evaluateSummaryComponentStackLocal(callable, call, input) and + nodeTo = evaluateSummaryComponentStackLocal(callable, call, output) + ) + } + + predicate basicLoadStep(I::Node nodeFrom, I::Node nodeTo, I::TypeTrackerContent content) { + exists( + I::SummarizedCallable callable, I::Node call, I::SummaryComponentStack input, + I::SummaryComponentStack output + | + hasLoadSummary(callable, content, pragma[only_bind_into](input), + pragma[only_bind_into](output)) and + call = I::callTo(callable) and + nodeFrom = evaluateSummaryComponentStackLocal(callable, call, input) and + nodeTo = evaluateSummaryComponentStackLocal(callable, call, output) + ) + } + + predicate basicStoreStep(I::Node nodeFrom, I::Node nodeTo, I::TypeTrackerContent content) { + exists( + I::SummarizedCallable callable, I::Node call, I::SummaryComponentStack input, + I::SummaryComponentStack output + | + hasStoreSummary(callable, content, pragma[only_bind_into](input), + pragma[only_bind_into](output)) and + call = I::callTo(callable) and + nodeFrom = evaluateSummaryComponentStackLocal(callable, call, input) and + nodeTo = evaluateSummaryComponentStackLocal(callable, call, output) + ) + } + + predicate basicLoadStoreStep( + I::Node nodeFrom, I::Node nodeTo, I::TypeTrackerContent loadContent, + I::TypeTrackerContent storeContent + ) { + exists( + I::SummarizedCallable callable, I::Node call, I::SummaryComponentStack input, + I::SummaryComponentStack output + | + hasLoadStoreSummary(callable, loadContent, storeContent, pragma[only_bind_into](input), + pragma[only_bind_into](output)) and + call = I::callTo(callable) and + nodeFrom = evaluateSummaryComponentStackLocal(callable, call, input) and + nodeTo = evaluateSummaryComponentStackLocal(callable, call, output) + ) + } + + predicate basicWithoutContentStep( + I::Node nodeFrom, I::Node nodeTo, I::TypeTrackerContentFilter filter + ) { + exists( + I::SummarizedCallable callable, I::Node call, I::SummaryComponentStack input, + I::SummaryComponentStack output + | + hasWithoutContentSummary(callable, filter, pragma[only_bind_into](input), + pragma[only_bind_into](output)) and + call = I::callTo(callable) and + nodeFrom = evaluateSummaryComponentStackLocal(callable, call, input) and + nodeTo = evaluateSummaryComponentStackLocal(callable, call, output) + ) + } + + predicate basicWithContentStep( + I::Node nodeFrom, I::Node nodeTo, I::TypeTrackerContentFilter filter + ) { + exists( + I::SummarizedCallable callable, I::Node call, I::SummaryComponentStack input, + I::SummaryComponentStack output + | + hasWithContentSummary(callable, filter, pragma[only_bind_into](input), + pragma[only_bind_into](output)) and + call = I::callTo(callable) and + nodeFrom = evaluateSummaryComponentStackLocal(callable, call, input) and + nodeTo = evaluateSummaryComponentStackLocal(callable, call, output) + ) + } +} diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackerSpecific.qll b/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackerSpecific.qll index 9e05b7869c5..81e673dc30b 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackerSpecific.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackerSpecific.qll @@ -61,7 +61,9 @@ predicate capturedJumpStep(Node nodeFrom, Node nodeTo) { predicate levelStepCall(Node nodeFrom, Node nodeTo) { none() } /** Holds if there is a level step from `nodeFrom` to `nodeTo`, which does not depend on the call graph. */ -predicate levelStepNoCall(Node nodeFrom, Node nodeTo) { none() } +predicate levelStepNoCall(Node nodeFrom, Node nodeTo) { + TypeTrackerSummaryFlow::levelStepNoCall(nodeFrom, nodeTo) +} /** * Gets the name of a possible piece of content. For Python, this is currently only attribute names, @@ -108,6 +110,12 @@ predicate basicStoreStep(Node nodeFrom, Node nodeTo, string content) { nodeFrom = a.getValue() and nodeTo = a.getObject() ) + or + exists(DataFlowPublic::ContentSet contents | + contents.(DataFlowPublic::AttributeContent).getAttribute() = content + | + TypeTrackerSummaryFlow::basicStoreStep(nodeFrom, nodeTo, contents) + ) } /** @@ -119,13 +127,24 @@ predicate basicLoadStep(Node nodeFrom, Node nodeTo, string content) { nodeFrom = a.getObject() and nodeTo = a ) + or + exists(DataFlowPublic::ContentSet contents | + contents.(DataFlowPublic::AttributeContent).getAttribute() = content + | + TypeTrackerSummaryFlow::basicLoadStep(nodeFrom, nodeTo, contents) + ) } /** * Holds if the `loadContent` of `nodeFrom` is stored in the `storeContent` of `nodeTo`. */ predicate basicLoadStoreStep(Node nodeFrom, Node nodeTo, string loadContent, string storeContent) { - none() + exists(DataFlowPublic::ContentSet loadContents, DataFlowPublic::ContentSet storeContents | + loadContents.(DataFlowPublic::AttributeContent).getAttribute() = loadContent and + storeContents.(DataFlowPublic::AttributeContent).getAttribute() = storeContent + | + TypeTrackerSummaryFlow::basicLoadStoreStep(nodeFrom, nodeTo, loadContents, storeContents) + ) } /** @@ -144,3 +163,108 @@ predicate basicWithContentStep(Node nodeFrom, Node nodeTo, ContentFilter filter) class Boolean extends boolean { Boolean() { this = true or this = false } } + +private import SummaryTypeTracker as SummaryTypeTracker +private import semmle.python.dataflow.new.FlowSummary as FlowSummary +private import semmle.python.dataflow.new.internal.DataFlowDispatch as DataFlowDispatch + +pragma[noinline] +private predicate argumentPositionMatch( + DataFlowPublic::CallCfgNode call, DataFlowPublic::ArgumentNode arg, + DataFlowDispatch::ParameterPosition ppos +) { + exists(DataFlowDispatch::ArgumentPosition apos, DataFlowPrivate::DataFlowCall c | + c.getNode() = call.asCfgNode() and + arg.argumentOf(c, apos) and + DataFlowDispatch::parameterMatch(ppos, apos) + ) +} + +module SummaryTypeTrackerInput implements SummaryTypeTracker::Input { + // Dataflow nodes + class Node = DataFlowPublic::Node; + + // Content + class TypeTrackerContent = DataFlowPublic::ContentSet; + + class TypeTrackerContentFilter = ContentFilter; + + TypeTrackerContentFilter getFilterFromWithoutContentStep(TypeTrackerContent content) { none() } + + TypeTrackerContentFilter getFilterFromWithContentStep(TypeTrackerContent content) { none() } + + // Callables + class SummarizedCallable = FlowSummary::SummarizedCallable; + + // Summaries and their stacks + class SummaryComponent = FlowSummary::SummaryComponent; + + class SummaryComponentStack = FlowSummary::SummaryComponentStack; + + SummaryComponentStack singleton(SummaryComponent component) { + result = FlowSummary::SummaryComponentStack::singleton(component) + } + + SummaryComponentStack push(SummaryComponent component, SummaryComponentStack stack) { + result = FlowSummary::SummaryComponentStack::push(component, stack) + } + + // Relating content to summaries + SummaryComponent content(TypeTrackerContent contents) { + result = FlowSummary::SummaryComponent::content(contents) + } + + SummaryComponent withoutContent(TypeTrackerContent contents) { none() } + + SummaryComponent withContent(TypeTrackerContent contents) { none() } + + SummaryComponent return() { result = FlowSummary::SummaryComponent::return() } + + // Relating nodes to summaries + Node argumentOf(Node call, SummaryComponent arg) { + exists(DataFlowDispatch::ParameterPosition pos | + arg = FlowSummary::SummaryComponent::argument(pos) and + argumentPositionMatch(call, result, pos) + ) + } + + Node parameterOf(Node callable, SummaryComponent param) { + exists( + DataFlowDispatch::ArgumentPosition apos, DataFlowDispatch::ParameterPosition ppos, Parameter p + | + param = FlowSummary::SummaryComponent::parameter(apos) and + DataFlowDispatch::parameterMatch(ppos, apos) and + // pick the SsaNode rather than the CfgNode + result.asVar().getDefinition().(ParameterDefinition).getParameter() = p and + ( + exists(int i | ppos.isPositional(i) | + p = callable.getALocalSource().asExpr().(CallableExpr).getInnerScope().getArg(i) + ) + or + exists(string name | ppos.isKeyword(name) | + p = callable.getALocalSource().asExpr().(CallableExpr).getInnerScope().getArgByName(name) + ) + ) + ) + } + + Node returnOf(Node callable, SummaryComponent return) { + return = FlowSummary::SummaryComponent::return() and + // result should be return value of callable which should be a lambda + result.asCfgNode() = + callable.getALocalSource().asExpr().(CallableExpr).getInnerScope().getAReturnValueFlowNode() + } + + // Specific summary handling + predicate componentLevelStep(SummaryComponent component) { none() } + + pragma[nomagic] + predicate isNonLocal(SummaryComponent component) { + component = FlowSummary::SummaryComponent::content(_) + } + + // Relating callables to nodes + Node callTo(SummarizedCallable callable) { result = callable.getACallSimple() } +} + +module TypeTrackerSummaryFlow = SummaryTypeTracker::SummaryFlow; diff --git a/python/ql/test/experimental/dataflow/typetracking-summaries/summaries.py b/python/ql/test/experimental/dataflow/typetracking-summaries/summaries.py index f7affa6036f..728456cc711 100644 --- a/python/ql/test/experimental/dataflow/typetracking-summaries/summaries.py +++ b/python/ql/test/experimental/dataflow/typetracking-summaries/summaries.py @@ -3,7 +3,7 @@ import os # Simple summary tainted = identity(tracked) # $ tracked -tainted # $ MISSING: tracked +tainted # $ tracked # Lambda summary # I think the missing result is expected because type tracking @@ -13,7 +13,7 @@ tainted_lambda # $ MISSING: tracked # A lambda that directly introduces taint bad_lambda = apply_lambda(lambda x: tracked, 1) # $ tracked -bad_lambda # $ MISSING: tracked +bad_lambda # $ tracked # A lambda that breaks the flow untainted_lambda = apply_lambda(lambda x: 1, tracked) # $ tracked @@ -52,9 +52,9 @@ tr = tainted_resultlist[0] tr # $ MISSING: tracked x.secret = tracked # $ tracked=secret tracked -r = read_secret(x) # $ tracked=secret MISSING: tracked -r # $ MISSING: tracked +r = read_secret(x) # $ tracked=secret tracked +r # $ tracked -y # $ MISSING: tracked=secret -set_secret(y, tracked) # $ tracked MISSING: tracked=secret -y.secret # $ MISSING: tracked tracked=secret \ No newline at end of file +y # $ tracked=secret +set_secret(y, tracked) # $ tracked tracked=secret +y.secret # $ tracked tracked=secret \ No newline at end of file diff --git a/ruby/ql/lib/codeql/ruby/typetracking/SummaryTypeTracker.qll b/ruby/ql/lib/codeql/ruby/typetracking/SummaryTypeTracker.qll new file mode 100644 index 00000000000..310dec3aadf --- /dev/null +++ b/ruby/ql/lib/codeql/ruby/typetracking/SummaryTypeTracker.qll @@ -0,0 +1,382 @@ +/** + * Provides the implementation of a summary type tracker, that is type tracking through flow summaries. + * To use this, you must implement the `Input` signature. You can then use the predicates in the `Output` + * signature to implement the predicates of the same names inside `TypeTrackerSpecific.qll`. + */ + +/** The classes and predicates needed to generate a summary type tracker. */ +signature module Input { + // Dataflow nodes + class Node; + + // Content + class TypeTrackerContent; + + class TypeTrackerContentFilter; + + // Relating content and filters + /** + * Gets a content filter to use for a `WithoutContent[content]` step, or has no result if + * the step should be treated as ordinary flow. + * + * `WithoutContent` is often used to perform strong updates on individual collection elements, but for + * type-tracking this is rarely beneficial and quite expensive. However, `WithoutContent` can be quite useful + * for restricting the type of an object, and in these cases we translate it to a filter. + */ + TypeTrackerContentFilter getFilterFromWithoutContentStep(TypeTrackerContent content); + + /** + * Gets a content filter to use for a `WithContent[content]` step, or has no result if + * the step cannot be handled by type-tracking. + * + * `WithContent` is often used to perform strong updates on individual collection elements (or rather + * to preserve those that didn't get updated). But for type-tracking this is rarely beneficial and quite expensive. + * However, `WithContent` can be quite useful for restricting the type of an object, and in these cases we translate it to a filter. + */ + TypeTrackerContentFilter getFilterFromWithContentStep(TypeTrackerContent content); + + // Summaries and their stacks + class SummaryComponent; + + class SummaryComponentStack { + SummaryComponent head(); + } + + /** Gets a singleton stack containing `component`. */ + SummaryComponentStack singleton(SummaryComponent component); + + /** + * Gets the stack obtained by pushing `head` onto `tail`. + */ + SummaryComponentStack push(SummaryComponent component, SummaryComponentStack stack); + + /** Gets a singleton stack representing a return. */ + SummaryComponent return(); + + // Relating content to summaries + /** Gets a summary component for content `c`. */ + SummaryComponent content(TypeTrackerContent contents); + + /** Gets a summary component where data is not allowed to be stored in `c`. */ + SummaryComponent withoutContent(TypeTrackerContent contents); + + /** Gets a summary component where data must be stored in `c`. */ + SummaryComponent withContent(TypeTrackerContent contents); + + // Callables + class SummarizedCallable { + predicate propagatesFlow( + SummaryComponentStack input, SummaryComponentStack output, boolean preservesValue + ); + } + + // Relating nodes to summaries + /** Gets a dataflow node respresenting the argument of `call` indicated by `arg`. */ + Node argumentOf(Node call, SummaryComponent arg); + + /** Gets a dataflow node respresenting the parameter of `callable` indicated by `param`. */ + Node parameterOf(Node callable, SummaryComponent param); + + /** Gets a dataflow node respresenting the return of `callable` indicated by `return`. */ + Node returnOf(Node callable, SummaryComponent return); + + // Specific summary handling + /** Holds if component should be treated as a level step by type tracking. */ + predicate componentLevelStep(SummaryComponent component); + + /** Holds if the given component can't be evaluated by `evaluateSummaryComponentStackLocal`. */ + predicate isNonLocal(SummaryComponent component); + + // Relating callables to nodes + /** Gets a dataflow node respresenting a call to `callable`. */ + Node callTo(SummarizedCallable callable); +} + +/** + * The predicates provided by a summary type tracker. + * These are meant to be used in `TypeTrackerSpecific.qll` + * inside the predicates of the same names. + */ +signature module Output { + /** + * Holds if there is a level step from `nodeFrom` to `nodeTo`, which does not depend on the call graph. + */ + predicate levelStepNoCall(I::Node nodeFrom, I::Node nodeTo); + + /** + * Holds if `nodeTo` is the result of accessing the `content` content of `nodeFrom`. + */ + predicate basicLoadStep(I::Node nodeFrom, I::Node nodeTo, I::TypeTrackerContent content); + + /** + * Holds if `nodeFrom` is being written to the `content` content of the object in `nodeTo`. + */ + predicate basicStoreStep(I::Node nodeFrom, I::Node nodeTo, I::TypeTrackerContent content); + + /** + * Holds if the `loadContent` of `nodeFrom` is stored in the `storeContent` of `nodeTo`. + */ + predicate basicLoadStoreStep( + I::Node nodeFrom, I::Node nodeTo, I::TypeTrackerContent loadContent, + I::TypeTrackerContent storeContent + ); + + /** + * Holds if type-tracking should step from `nodeFrom` to `nodeTo` but block flow of contents matched by `filter` through here. + */ + predicate basicWithoutContentStep( + I::Node nodeFrom, I::Node nodeTo, I::TypeTrackerContentFilter filter + ); + + /** + * Holds if type-tracking should step from `nodeFrom` to `nodeTo` if inside a content matched by `filter`. + */ + predicate basicWithContentStep( + I::Node nodeFrom, I::Node nodeTo, I::TypeTrackerContentFilter filter + ); +} + +/** + * Implementation of the summary type tracker, that is type tracking through flow summaries. + */ +module SummaryFlow implements Output { + pragma[nomagic] + private predicate hasLoadSummary( + I::SummarizedCallable callable, I::TypeTrackerContent contents, I::SummaryComponentStack input, + I::SummaryComponentStack output + ) { + callable.propagatesFlow(I::push(I::content(contents), input), output, true) and + not I::isNonLocal(input.head()) and + not I::isNonLocal(output.head()) + } + + pragma[nomagic] + private predicate hasStoreSummary( + I::SummarizedCallable callable, I::TypeTrackerContent contents, I::SummaryComponentStack input, + I::SummaryComponentStack output + ) { + not I::isNonLocal(input.head()) and + not I::isNonLocal(output.head()) and + ( + callable.propagatesFlow(input, I::push(I::content(contents), output), true) + or + // Allow the input to start with an arbitrary WithoutContent[X]. + // Since type-tracking only tracks one content deep, and we're about to store into another content, + // we're already preventing the input from being in a content. + callable + .propagatesFlow(I::push(I::withoutContent(_), input), + I::push(I::content(contents), output), true) + ) + } + + pragma[nomagic] + private predicate hasLoadStoreSummary( + I::SummarizedCallable callable, I::TypeTrackerContent loadContents, + I::TypeTrackerContent storeContents, I::SummaryComponentStack input, + I::SummaryComponentStack output + ) { + callable + .propagatesFlow(I::push(I::content(loadContents), input), + I::push(I::content(storeContents), output), true) and + not I::isNonLocal(input.head()) and + not I::isNonLocal(output.head()) + } + + pragma[nomagic] + private predicate hasWithoutContentSummary( + I::SummarizedCallable callable, I::TypeTrackerContentFilter filter, + I::SummaryComponentStack input, I::SummaryComponentStack output + ) { + exists(I::TypeTrackerContent content | + callable.propagatesFlow(I::push(I::withoutContent(content), input), output, true) and + filter = I::getFilterFromWithoutContentStep(content) and + not I::isNonLocal(input.head()) and + not I::isNonLocal(output.head()) and + input != output + ) + } + + pragma[nomagic] + private predicate hasWithContentSummary( + I::SummarizedCallable callable, I::TypeTrackerContentFilter filter, + I::SummaryComponentStack input, I::SummaryComponentStack output + ) { + exists(I::TypeTrackerContent content | + callable.propagatesFlow(I::push(I::withContent(content), input), output, true) and + filter = I::getFilterFromWithContentStep(content) and + not I::isNonLocal(input.head()) and + not I::isNonLocal(output.head()) and + input != output + ) + } + + /** + * Gets a data flow I::Node corresponding an argument or return value of `call`, + * as specified by `component`. + */ + bindingset[call, component] + private I::Node evaluateSummaryComponentLocal(I::Node call, I::SummaryComponent component) { + result = I::argumentOf(call, component) + or + component = I::return() and + result = call + } + + /** + * Holds if `callable` is relevant for type-tracking and we therefore want `stack` to + * be evaluated locally at its call sites. + */ + pragma[nomagic] + private predicate dependsOnSummaryComponentStack( + I::SummarizedCallable callable, I::SummaryComponentStack stack + ) { + exists(I::callTo(callable)) and + ( + callable.propagatesFlow(stack, _, true) + or + callable.propagatesFlow(_, stack, true) + or + // include store summaries as they may skip an initial step at the input + hasStoreSummary(callable, _, stack, _) + ) + or + dependsOnSummaryComponentStackCons(callable, _, stack) + } + + pragma[nomagic] + private predicate dependsOnSummaryComponentStackCons( + I::SummarizedCallable callable, I::SummaryComponent head, I::SummaryComponentStack tail + ) { + dependsOnSummaryComponentStack(callable, I::push(head, tail)) + } + + pragma[nomagic] + private predicate dependsOnSummaryComponentStackConsLocal( + I::SummarizedCallable callable, I::SummaryComponent head, I::SummaryComponentStack tail + ) { + dependsOnSummaryComponentStackCons(callable, head, tail) and + not I::isNonLocal(head) + } + + pragma[nomagic] + private predicate dependsOnSummaryComponentStackLeaf( + I::SummarizedCallable callable, I::SummaryComponent leaf + ) { + dependsOnSummaryComponentStack(callable, I::singleton(leaf)) + } + + /** + * Gets a data flow I::Node corresponding to the local input or output of `call` + * identified by `stack`, if possible. + */ + pragma[nomagic] + private I::Node evaluateSummaryComponentStackLocal( + I::SummarizedCallable callable, I::Node call, I::SummaryComponentStack stack + ) { + exists(I::SummaryComponent component | + dependsOnSummaryComponentStackLeaf(callable, component) and + stack = I::singleton(component) and + call = I::callTo(callable) and + result = evaluateSummaryComponentLocal(call, component) + ) + or + exists(I::Node prev, I::SummaryComponent head, I::SummaryComponentStack tail | + prev = evaluateSummaryComponentStackLocal(callable, call, tail) and + dependsOnSummaryComponentStackConsLocal(callable, pragma[only_bind_into](head), + pragma[only_bind_out](tail)) and + stack = I::push(pragma[only_bind_out](head), pragma[only_bind_out](tail)) + | + result = I::parameterOf(prev, head) + or + result = I::returnOf(prev, head) + or + I::componentLevelStep(head) and + result = prev + ) + } + + // Implement Output + predicate levelStepNoCall(I::Node nodeFrom, I::Node nodeTo) { + exists( + I::SummarizedCallable callable, I::Node call, I::SummaryComponentStack input, + I::SummaryComponentStack output + | + callable.propagatesFlow(input, output, true) and + call = I::callTo(callable) and + nodeFrom = evaluateSummaryComponentStackLocal(callable, call, input) and + nodeTo = evaluateSummaryComponentStackLocal(callable, call, output) + ) + } + + predicate basicLoadStep(I::Node nodeFrom, I::Node nodeTo, I::TypeTrackerContent content) { + exists( + I::SummarizedCallable callable, I::Node call, I::SummaryComponentStack input, + I::SummaryComponentStack output + | + hasLoadSummary(callable, content, pragma[only_bind_into](input), + pragma[only_bind_into](output)) and + call = I::callTo(callable) and + nodeFrom = evaluateSummaryComponentStackLocal(callable, call, input) and + nodeTo = evaluateSummaryComponentStackLocal(callable, call, output) + ) + } + + predicate basicStoreStep(I::Node nodeFrom, I::Node nodeTo, I::TypeTrackerContent content) { + exists( + I::SummarizedCallable callable, I::Node call, I::SummaryComponentStack input, + I::SummaryComponentStack output + | + hasStoreSummary(callable, content, pragma[only_bind_into](input), + pragma[only_bind_into](output)) and + call = I::callTo(callable) and + nodeFrom = evaluateSummaryComponentStackLocal(callable, call, input) and + nodeTo = evaluateSummaryComponentStackLocal(callable, call, output) + ) + } + + predicate basicLoadStoreStep( + I::Node nodeFrom, I::Node nodeTo, I::TypeTrackerContent loadContent, + I::TypeTrackerContent storeContent + ) { + exists( + I::SummarizedCallable callable, I::Node call, I::SummaryComponentStack input, + I::SummaryComponentStack output + | + hasLoadStoreSummary(callable, loadContent, storeContent, pragma[only_bind_into](input), + pragma[only_bind_into](output)) and + call = I::callTo(callable) and + nodeFrom = evaluateSummaryComponentStackLocal(callable, call, input) and + nodeTo = evaluateSummaryComponentStackLocal(callable, call, output) + ) + } + + predicate basicWithoutContentStep( + I::Node nodeFrom, I::Node nodeTo, I::TypeTrackerContentFilter filter + ) { + exists( + I::SummarizedCallable callable, I::Node call, I::SummaryComponentStack input, + I::SummaryComponentStack output + | + hasWithoutContentSummary(callable, filter, pragma[only_bind_into](input), + pragma[only_bind_into](output)) and + call = I::callTo(callable) and + nodeFrom = evaluateSummaryComponentStackLocal(callable, call, input) and + nodeTo = evaluateSummaryComponentStackLocal(callable, call, output) + ) + } + + predicate basicWithContentStep( + I::Node nodeFrom, I::Node nodeTo, I::TypeTrackerContentFilter filter + ) { + exists( + I::SummarizedCallable callable, I::Node call, I::SummaryComponentStack input, + I::SummaryComponentStack output + | + hasWithContentSummary(callable, filter, pragma[only_bind_into](input), + pragma[only_bind_into](output)) and + call = I::callTo(callable) and + nodeFrom = evaluateSummaryComponentStackLocal(callable, call, input) and + nodeTo = evaluateSummaryComponentStackLocal(callable, call, output) + ) + } +} diff --git a/ruby/ql/lib/codeql/ruby/typetracking/TypeTrackerSpecific.qll b/ruby/ql/lib/codeql/ruby/typetracking/TypeTrackerSpecific.qll index 55ec26258d6..4f17302e72b 100644 --- a/ruby/ql/lib/codeql/ruby/typetracking/TypeTrackerSpecific.qll +++ b/ruby/ql/lib/codeql/ruby/typetracking/TypeTrackerSpecific.qll @@ -112,15 +112,7 @@ predicate levelStepCall(Node nodeFrom, Node nodeTo) { /** Holds if there is a level step from `nodeFrom` to `nodeTo`, which does not depend on the call graph. */ pragma[nomagic] predicate levelStepNoCall(Node nodeFrom, Node nodeTo) { - exists( - SummarizedCallable callable, DataFlowPublic::CallNode call, SummaryComponentStack input, - SummaryComponentStack output - | - callable.propagatesFlow(input, output, true) and - call.asExpr().getExpr() = callable.getACallSimple() and - nodeFrom = evaluateSummaryComponentStackLocal(callable, call, input) and - nodeTo = evaluateSummaryComponentStackLocal(callable, call, output) - ) + TypeTrackerSummaryFlow::levelStepNoCall(nodeFrom, nodeTo) or localFieldStep(nodeFrom, nodeTo) } @@ -290,16 +282,7 @@ predicate returnStep(Node nodeFrom, Node nodeTo) { predicate basicStoreStep(Node nodeFrom, Node nodeTo, DataFlow::ContentSet contents) { storeStepIntoSourceNode(nodeFrom, nodeTo, contents) or - exists( - SummarizedCallable callable, DataFlowPublic::CallNode call, SummaryComponentStack input, - SummaryComponentStack output - | - hasStoreSummary(callable, contents, pragma[only_bind_into](input), - pragma[only_bind_into](output)) and - call.asExpr().getExpr() = callable.getACallSimple() and - nodeFrom = evaluateSummaryComponentStackLocal(callable, call, input) and - nodeTo = evaluateSummaryComponentStackLocal(callable, call, output) - ) + TypeTrackerSummaryFlow::basicStoreStep(nodeFrom, nodeTo, contents) } /** @@ -333,15 +316,7 @@ predicate basicLoadStep(Node nodeFrom, Node nodeTo, DataFlow::ContentSet content nodeTo.asExpr() = call ) or - exists( - SummarizedCallable callable, DataFlowPublic::CallNode call, SummaryComponentStack input, - SummaryComponentStack output - | - hasLoadSummary(callable, contents, pragma[only_bind_into](input), pragma[only_bind_into](output)) and - call.asExpr().getExpr() = callable.getACallSimple() and - nodeFrom = evaluateSummaryComponentStackLocal(callable, call, input) and - nodeTo = evaluateSummaryComponentStackLocal(callable, call, output) - ) + TypeTrackerSummaryFlow::basicLoadStep(nodeFrom, nodeTo, contents) } /** @@ -350,48 +325,21 @@ predicate basicLoadStep(Node nodeFrom, Node nodeTo, DataFlow::ContentSet content predicate basicLoadStoreStep( Node nodeFrom, Node nodeTo, DataFlow::ContentSet loadContent, DataFlow::ContentSet storeContent ) { - exists( - SummarizedCallable callable, DataFlowPublic::CallNode call, SummaryComponentStack input, - SummaryComponentStack output - | - hasLoadStoreSummary(callable, loadContent, storeContent, pragma[only_bind_into](input), - pragma[only_bind_into](output)) and - call.asExpr().getExpr() = callable.getACallSimple() and - nodeFrom = evaluateSummaryComponentStackLocal(callable, call, input) and - nodeTo = evaluateSummaryComponentStackLocal(callable, call, output) - ) + TypeTrackerSummaryFlow::basicLoadStoreStep(nodeFrom, nodeTo, loadContent, storeContent) } /** * Holds if type-tracking should step from `nodeFrom` to `nodeTo` but block flow of contents matched by `filter` through here. */ predicate basicWithoutContentStep(Node nodeFrom, Node nodeTo, ContentFilter filter) { - exists( - SummarizedCallable callable, DataFlowPublic::CallNode call, SummaryComponentStack input, - SummaryComponentStack output - | - hasWithoutContentSummary(callable, filter, pragma[only_bind_into](input), - pragma[only_bind_into](output)) and - call.asExpr().getExpr() = callable.getACallSimple() and - nodeFrom = evaluateSummaryComponentStackLocal(callable, call, input) and - nodeTo = evaluateSummaryComponentStackLocal(callable, call, output) - ) + TypeTrackerSummaryFlow::basicWithoutContentStep(nodeFrom, nodeTo, filter) } /** * Holds if type-tracking should step from `nodeFrom` to `nodeTo` if inside a content matched by `filter`. */ predicate basicWithContentStep(Node nodeFrom, Node nodeTo, ContentFilter filter) { - exists( - SummarizedCallable callable, DataFlowPublic::CallNode call, SummaryComponentStack input, - SummaryComponentStack output - | - hasWithContentSummary(callable, filter, pragma[only_bind_into](input), - pragma[only_bind_into](output)) and - call.asExpr().getExpr() = callable.getACallSimple() and - nodeFrom = evaluateSummaryComponentStackLocal(callable, call, input) and - nodeTo = evaluateSummaryComponentStackLocal(callable, call, output) - ) + TypeTrackerSummaryFlow::basicWithContentStep(nodeFrom, nodeTo, filter) } /** @@ -403,121 +351,6 @@ class Boolean extends boolean { private import SummaryComponentStack -pragma[nomagic] -private predicate hasStoreSummary( - SummarizedCallable callable, DataFlow::ContentSet contents, SummaryComponentStack input, - SummaryComponentStack output -) { - not isNonLocal(input.head()) and - not isNonLocal(output.head()) and - ( - callable.propagatesFlow(input, push(SummaryComponent::content(contents), output), true) - or - // Allow the input to start with an arbitrary WithoutContent[X]. - // Since type-tracking only tracks one content deep, and we're about to store into another content, - // we're already preventing the input from being in a content. - callable - .propagatesFlow(push(SummaryComponent::withoutContent(_), input), - push(SummaryComponent::content(contents), output), true) - ) -} - -pragma[nomagic] -private predicate hasLoadSummary( - SummarizedCallable callable, DataFlow::ContentSet contents, SummaryComponentStack input, - SummaryComponentStack output -) { - callable.propagatesFlow(push(SummaryComponent::content(contents), input), output, true) and - not isNonLocal(input.head()) and - not isNonLocal(output.head()) -} - -pragma[nomagic] -private predicate hasLoadStoreSummary( - SummarizedCallable callable, DataFlow::ContentSet loadContents, - DataFlow::ContentSet storeContents, SummaryComponentStack input, SummaryComponentStack output -) { - callable - .propagatesFlow(push(SummaryComponent::content(loadContents), input), - push(SummaryComponent::content(storeContents), output), true) and - not isNonLocal(input.head()) and - not isNonLocal(output.head()) -} - -/** - * Gets a content filter to use for a `WithoutContent[content]` step, or has no result if - * the step should be treated as ordinary flow. - * - * `WithoutContent` is often used to perform strong updates on individual collection elements, but for - * type-tracking this is rarely beneficial and quite expensive. However, `WithoutContent` can be quite useful - * for restricting the type of an object, and in these cases we translate it to a filter. - */ -private ContentFilter getFilterFromWithoutContentStep(DataFlow::ContentSet content) { - ( - content.isAnyElement() - or - content.isElementLowerBoundOrUnknown(_) - or - content.isElementOfTypeOrUnknown(_) - or - content.isSingleton(any(DataFlow::Content::UnknownElementContent c)) - ) and - result = MkElementFilter() -} - -pragma[nomagic] -private predicate hasWithoutContentSummary( - SummarizedCallable callable, ContentFilter filter, SummaryComponentStack input, - SummaryComponentStack output -) { - exists(DataFlow::ContentSet content | - callable.propagatesFlow(push(SummaryComponent::withoutContent(content), input), output, true) and - filter = getFilterFromWithoutContentStep(content) and - not isNonLocal(input.head()) and - not isNonLocal(output.head()) and - input != output - ) -} - -/** - * Gets a content filter to use for a `WithContent[content]` step, or has no result if - * the step cannot be handled by type-tracking. - * - * `WithContent` is often used to perform strong updates on individual collection elements (or rather - * to preserve those that didn't get updated). But for type-tracking this is rarely beneficial and quite expensive. - * However, `WithContent` can be quite useful for restricting the type of an object, and in these cases we translate it to a filter. - */ -private ContentFilter getFilterFromWithContentStep(DataFlow::ContentSet content) { - ( - content.isAnyElement() - or - content.isElementLowerBound(_) - or - content.isElementLowerBoundOrUnknown(_) - or - content.isElementOfType(_) - or - content.isElementOfTypeOrUnknown(_) - or - content.isSingleton(any(DataFlow::Content::ElementContent c)) - ) and - result = MkElementFilter() -} - -pragma[nomagic] -private predicate hasWithContentSummary( - SummarizedCallable callable, ContentFilter filter, SummaryComponentStack input, - SummaryComponentStack output -) { - exists(DataFlow::ContentSet content | - callable.propagatesFlow(push(SummaryComponent::withContent(content), input), output, true) and - filter = getFilterFromWithContentStep(content) and - not isNonLocal(input.head()) and - not isNonLocal(output.head()) and - input != output - ) -} - /** * Holds if the given component can't be evaluated by `evaluateSummaryComponentStackLocal`. */ @@ -528,112 +361,129 @@ predicate isNonLocal(SummaryComponent component) { component = SC::withContent(_) } -/** - * Gets a data flow node corresponding an argument or return value of `call`, - * as specified by `component`. - */ -bindingset[call, component] -private DataFlow::Node evaluateSummaryComponentLocal( - DataFlow::CallNode call, SummaryComponent component -) { - exists(DataFlowDispatch::ParameterPosition pos | - component = SummaryComponent::argument(pos) and - argumentPositionMatch(call.asExpr(), result, pos) - ) - or - component = SummaryComponent::return() and - result = call -} +private import SummaryTypeTracker as SummaryTypeTracker +private import codeql.ruby.dataflow.FlowSummary as FlowSummary -/** - * Holds if `callable` is relevant for type-tracking and we therefore want `stack` to - * be evaluated locally at its call sites. - */ -pragma[nomagic] -private predicate dependsOnSummaryComponentStack( - SummarizedCallable callable, SummaryComponentStack stack -) { - exists(callable.getACallSimple()) and - ( - callable.propagatesFlow(stack, _, true) - or - callable.propagatesFlow(_, stack, true) - or - // include store summaries as they may skip an initial step at the input - hasStoreSummary(callable, _, stack, _) - ) - or - dependsOnSummaryComponentStackCons(callable, _, stack) -} +module SummaryTypeTrackerInput implements SummaryTypeTracker::Input { + // Dataflow nodes + class Node = DataFlow::Node; -pragma[nomagic] -private predicate dependsOnSummaryComponentStackCons( - SummarizedCallable callable, SummaryComponent head, SummaryComponentStack tail -) { - dependsOnSummaryComponentStack(callable, SCS::push(head, tail)) -} + // Content + class TypeTrackerContent = DataFlowPublic::ContentSet; -pragma[nomagic] -private predicate dependsOnSummaryComponentStackConsLocal( - SummarizedCallable callable, SummaryComponent head, SummaryComponentStack tail -) { - dependsOnSummaryComponentStackCons(callable, head, tail) and - not isNonLocal(head) -} + class TypeTrackerContentFilter = ContentFilter; -pragma[nomagic] -private predicate dependsOnSummaryComponentStackLeaf( - SummarizedCallable callable, SummaryComponent leaf -) { - dependsOnSummaryComponentStack(callable, SCS::singleton(leaf)) -} + TypeTrackerContentFilter getFilterFromWithoutContentStep(TypeTrackerContent content) { + ( + content.isAnyElement() + or + content.isElementLowerBoundOrUnknown(_) + or + content.isElementOfTypeOrUnknown(_) + or + content.isSingleton(any(DataFlow::Content::UnknownElementContent c)) + ) and + result = MkElementFilter() + } -/** - * Gets a data flow node corresponding to the local input or output of `call` - * identified by `stack`, if possible. - */ -pragma[nomagic] -private DataFlow::Node evaluateSummaryComponentStackLocal( - SummarizedCallable callable, DataFlow::CallNode call, SummaryComponentStack stack -) { - exists(SummaryComponent component | - dependsOnSummaryComponentStackLeaf(callable, component) and - stack = SCS::singleton(component) and - call.asExpr().getExpr() = callable.getACallSimple() and - result = evaluateSummaryComponentLocal(call, component) - ) - or - exists(DataFlow::Node prev, SummaryComponent head, SummaryComponentStack tail | - prev = evaluateSummaryComponentStackLocal(callable, call, tail) and - dependsOnSummaryComponentStackConsLocal(callable, pragma[only_bind_into](head), - pragma[only_bind_out](tail)) and - stack = SCS::push(pragma[only_bind_out](head), pragma[only_bind_out](tail)) - | + TypeTrackerContentFilter getFilterFromWithContentStep(TypeTrackerContent content) { + ( + content.isAnyElement() + or + content.isElementLowerBound(_) + or + content.isElementLowerBoundOrUnknown(_) + or + content.isElementOfType(_) + or + content.isElementOfTypeOrUnknown(_) + or + content.isSingleton(any(DataFlow::Content::ElementContent c)) + ) and + result = MkElementFilter() + } + + // Summaries and their stacks + class SummaryComponent = FlowSummary::SummaryComponent; + + class SummaryComponentStack = FlowSummary::SummaryComponentStack; + + SummaryComponentStack singleton(SummaryComponent component) { + result = FlowSummary::SummaryComponentStack::singleton(component) + } + + SummaryComponentStack push(SummaryComponent component, SummaryComponentStack stack) { + result = FlowSummary::SummaryComponentStack::push(component, stack) + } + + // Relating content to summaries + SummaryComponent content(TypeTrackerContent contents) { + result = FlowSummary::SummaryComponent::content(contents) + } + + SummaryComponent withoutContent(TypeTrackerContent contents) { + result = FlowSummary::SummaryComponent::withoutContent(contents) + } + + SummaryComponent withContent(TypeTrackerContent contents) { + result = FlowSummary::SummaryComponent::withContent(contents) + } + + SummaryComponent return() { result = FlowSummary::SummaryComponent::return() } + + // Callables + class SummarizedCallable = FlowSummary::SummarizedCallable; + + // Relating nodes to summaries + Node argumentOf(Node call, SummaryComponent arg) { + exists(DataFlowDispatch::ParameterPosition pos | + arg = SummaryComponent::argument(pos) and + argumentPositionMatch(call.asExpr(), result, pos) + ) + } + + Node parameterOf(Node callable, SummaryComponent param) { exists( DataFlowDispatch::ArgumentPosition apos, DataFlowDispatch::ParameterPosition ppos, DataFlowPrivate::ParameterNodeImpl p | - head = SummaryComponent::parameter(apos) and + param = SummaryComponent::parameter(apos) and DataFlowDispatch::parameterMatch(ppos, apos) and - p.isSourceParameterOf(prev.asExpr().getExpr(), ppos) and + p.isSourceParameterOf(callable.asExpr().getExpr(), ppos) and // We need to include both `p` and the SSA definition for `p`, since in type-tracking // the step from `p` to the SSA definition is considered a call step. result = [p.(DataFlow::Node), DataFlowPrivate::LocalFlow::getParameterDefNode(p.getParameter())] ) - or + } + + Node returnOf(Node callable, SummaryComponent return) { exists(DataFlowPrivate::SynthReturnNode ret | - head = SummaryComponent::return() and - ret.getCfgScope() = prev.asExpr().getExpr() and + return = SummaryComponent::return() and + ret.getCfgScope() = callable.asExpr().getExpr() and // We need to include both `ret` and `ret.getAnInput()`, since in type-tracking // the step from `ret.getAnInput()` to `ret` is considered a return step. result = [ret.(DataFlow::Node), ret.getAnInput()] ) - or - exists(DataFlow::ContentSet content | - head = SummaryComponent::withoutContent(content) and - not exists(getFilterFromWithoutContentStep(content)) and - result = prev + } + + // Specific summary handling + predicate componentLevelStep(SummaryComponent component) { + exists(TypeTrackerContent content | + component = SummaryComponent::withoutContent(content) and + not exists(getFilterFromWithoutContentStep(content)) ) - ) + } + + pragma[nomagic] + predicate isNonLocal(SummaryComponent component) { + component = SC::content(_) + or + component = SC::withContent(_) + } + + // Relating callables to nodes + Node callTo(SummarizedCallable callable) { result.asExpr().getExpr() = callable.getACallSimple() } } + +module TypeTrackerSummaryFlow = SummaryTypeTracker::SummaryFlow; From 820b5f235eba6c92251dde37ad2f052092648c75 Mon Sep 17 00:00:00 2001 From: Rasmus Lerchedahl Petersen Date: Tue, 30 May 2023 13:36:10 +0200 Subject: [PATCH 03/21] python: add change note --- .../2023-05-30-typetracking-via-flow-summaries.md | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 python/ql/lib/change-notes/2023-05-30-typetracking-via-flow-summaries.md diff --git a/python/ql/lib/change-notes/2023-05-30-typetracking-via-flow-summaries.md b/python/ql/lib/change-notes/2023-05-30-typetracking-via-flow-summaries.md new file mode 100644 index 00000000000..11c01629987 --- /dev/null +++ b/python/ql/lib/change-notes/2023-05-30-typetracking-via-flow-summaries.md @@ -0,0 +1,4 @@ +--- +category: minorAnalysis +--- +* Type tracking is now aware of flow summaries. This leads to a richer API graph, and may lead to more results in some queries. From 7ab3cde3aac29624605a3727f17e8da641f2e86b Mon Sep 17 00:00:00 2001 From: yoff Date: Wed, 7 Jun 2023 13:54:31 +0200 Subject: [PATCH 04/21] Apply suggestions from code review Co-authored-by: Asger F --- ruby/ql/lib/codeql/ruby/typetracking/SummaryTypeTracker.qll | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ruby/ql/lib/codeql/ruby/typetracking/SummaryTypeTracker.qll b/ruby/ql/lib/codeql/ruby/typetracking/SummaryTypeTracker.qll index 310dec3aadf..1968f6db4e8 100644 --- a/ruby/ql/lib/codeql/ruby/typetracking/SummaryTypeTracker.qll +++ b/ruby/ql/lib/codeql/ruby/typetracking/SummaryTypeTracker.qll @@ -1,10 +1,10 @@ /** - * Provides the implementation of a summary type tracker, that is type tracking through flow summaries. + * Provides the implementation of type tracking steps through flow summaries. * To use this, you must implement the `Input` signature. You can then use the predicates in the `Output` * signature to implement the predicates of the same names inside `TypeTrackerSpecific.qll`. */ -/** The classes and predicates needed to generate a summary type tracker. */ +/** The classes and predicates needed to generate type-tracking steps from summaries. */ signature module Input { // Dataflow nodes class Node; From 6ddf1f7eaf4eef6b36b63e6b0d93e7531603bf6c Mon Sep 17 00:00:00 2001 From: Rasmus Lerchedahl Petersen Date: Wed, 7 Jun 2023 14:07:08 +0200 Subject: [PATCH 05/21] ruby/python: remove predicates from interface --- .../new/internal/SummaryTypeTracker.qll | 49 +++++++++++-------- .../new/internal/TypeTrackerSpecific.qll | 8 --- .../ruby/typetracking/SummaryTypeTracker.qll | 45 ++++++++++------- .../ruby/typetracking/TypeTrackerSpecific.qll | 15 ------ 4 files changed, 54 insertions(+), 63 deletions(-) diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/SummaryTypeTracker.qll b/python/ql/lib/semmle/python/dataflow/new/internal/SummaryTypeTracker.qll index 310dec3aadf..4e0636826b8 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/SummaryTypeTracker.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/SummaryTypeTracker.qll @@ -1,10 +1,10 @@ /** - * Provides the implementation of a summary type tracker, that is type tracking through flow summaries. + * Provides the implementation of type tracking steps through flow summaries. * To use this, you must implement the `Input` signature. You can then use the predicates in the `Output` * signature to implement the predicates of the same names inside `TypeTrackerSpecific.qll`. */ -/** The classes and predicates needed to generate a summary type tracker. */ +/** The classes and predicates needed to generate type-tracking steps from summaries. */ signature module Input { // Dataflow nodes class Node; @@ -80,13 +80,6 @@ signature module Input { /** Gets a dataflow node respresenting the return of `callable` indicated by `return`. */ Node returnOf(Node callable, SummaryComponent return); - // Specific summary handling - /** Holds if component should be treated as a level step by type tracking. */ - predicate componentLevelStep(SummaryComponent component); - - /** Holds if the given component can't be evaluated by `evaluateSummaryComponentStackLocal`. */ - predicate isNonLocal(SummaryComponent component); - // Relating callables to nodes /** Gets a dataflow node respresenting a call to `callable`. */ Node callTo(SummarizedCallable callable); @@ -146,8 +139,8 @@ module SummaryFlow implements Output { I::SummaryComponentStack output ) { callable.propagatesFlow(I::push(I::content(contents), input), output, true) and - not I::isNonLocal(input.head()) and - not I::isNonLocal(output.head()) + not isNonLocal(input.head()) and + not isNonLocal(output.head()) } pragma[nomagic] @@ -155,8 +148,8 @@ module SummaryFlow implements Output { I::SummarizedCallable callable, I::TypeTrackerContent contents, I::SummaryComponentStack input, I::SummaryComponentStack output ) { - not I::isNonLocal(input.head()) and - not I::isNonLocal(output.head()) and + not isNonLocal(input.head()) and + not isNonLocal(output.head()) and ( callable.propagatesFlow(input, I::push(I::content(contents), output), true) or @@ -178,8 +171,8 @@ module SummaryFlow implements Output { callable .propagatesFlow(I::push(I::content(loadContents), input), I::push(I::content(storeContents), output), true) and - not I::isNonLocal(input.head()) and - not I::isNonLocal(output.head()) + not isNonLocal(input.head()) and + not isNonLocal(output.head()) } pragma[nomagic] @@ -190,8 +183,8 @@ module SummaryFlow implements Output { exists(I::TypeTrackerContent content | callable.propagatesFlow(I::push(I::withoutContent(content), input), output, true) and filter = I::getFilterFromWithoutContentStep(content) and - not I::isNonLocal(input.head()) and - not I::isNonLocal(output.head()) and + not isNonLocal(input.head()) and + not isNonLocal(output.head()) and input != output ) } @@ -204,12 +197,26 @@ module SummaryFlow implements Output { exists(I::TypeTrackerContent content | callable.propagatesFlow(I::push(I::withContent(content), input), output, true) and filter = I::getFilterFromWithContentStep(content) and - not I::isNonLocal(input.head()) and - not I::isNonLocal(output.head()) and + not isNonLocal(input.head()) and + not isNonLocal(output.head()) and input != output ) } + private predicate componentLevelStep(I::SummaryComponent component) { + exists(I::TypeTrackerContent content | + component = I::withoutContent(content) and + not exists(I::getFilterFromWithoutContentStep(content)) + ) + } + + pragma[nomagic] + private predicate isNonLocal(I::SummaryComponent component) { + component = I::content(_) + or + component = I::withContent(_) + } + /** * Gets a data flow I::Node corresponding an argument or return value of `call`, * as specified by `component`. @@ -255,7 +262,7 @@ module SummaryFlow implements Output { I::SummarizedCallable callable, I::SummaryComponent head, I::SummaryComponentStack tail ) { dependsOnSummaryComponentStackCons(callable, head, tail) and - not I::isNonLocal(head) + not isNonLocal(head) } pragma[nomagic] @@ -290,7 +297,7 @@ module SummaryFlow implements Output { or result = I::returnOf(prev, head) or - I::componentLevelStep(head) and + componentLevelStep(head) and result = prev ) } diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackerSpecific.qll b/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackerSpecific.qll index 81e673dc30b..f3f9b1b52b9 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackerSpecific.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackerSpecific.qll @@ -255,14 +255,6 @@ module SummaryTypeTrackerInput implements SummaryTypeTracker::Input { callable.getALocalSource().asExpr().(CallableExpr).getInnerScope().getAReturnValueFlowNode() } - // Specific summary handling - predicate componentLevelStep(SummaryComponent component) { none() } - - pragma[nomagic] - predicate isNonLocal(SummaryComponent component) { - component = FlowSummary::SummaryComponent::content(_) - } - // Relating callables to nodes Node callTo(SummarizedCallable callable) { result = callable.getACallSimple() } } diff --git a/ruby/ql/lib/codeql/ruby/typetracking/SummaryTypeTracker.qll b/ruby/ql/lib/codeql/ruby/typetracking/SummaryTypeTracker.qll index 1968f6db4e8..4e0636826b8 100644 --- a/ruby/ql/lib/codeql/ruby/typetracking/SummaryTypeTracker.qll +++ b/ruby/ql/lib/codeql/ruby/typetracking/SummaryTypeTracker.qll @@ -80,13 +80,6 @@ signature module Input { /** Gets a dataflow node respresenting the return of `callable` indicated by `return`. */ Node returnOf(Node callable, SummaryComponent return); - // Specific summary handling - /** Holds if component should be treated as a level step by type tracking. */ - predicate componentLevelStep(SummaryComponent component); - - /** Holds if the given component can't be evaluated by `evaluateSummaryComponentStackLocal`. */ - predicate isNonLocal(SummaryComponent component); - // Relating callables to nodes /** Gets a dataflow node respresenting a call to `callable`. */ Node callTo(SummarizedCallable callable); @@ -146,8 +139,8 @@ module SummaryFlow implements Output { I::SummaryComponentStack output ) { callable.propagatesFlow(I::push(I::content(contents), input), output, true) and - not I::isNonLocal(input.head()) and - not I::isNonLocal(output.head()) + not isNonLocal(input.head()) and + not isNonLocal(output.head()) } pragma[nomagic] @@ -155,8 +148,8 @@ module SummaryFlow implements Output { I::SummarizedCallable callable, I::TypeTrackerContent contents, I::SummaryComponentStack input, I::SummaryComponentStack output ) { - not I::isNonLocal(input.head()) and - not I::isNonLocal(output.head()) and + not isNonLocal(input.head()) and + not isNonLocal(output.head()) and ( callable.propagatesFlow(input, I::push(I::content(contents), output), true) or @@ -178,8 +171,8 @@ module SummaryFlow implements Output { callable .propagatesFlow(I::push(I::content(loadContents), input), I::push(I::content(storeContents), output), true) and - not I::isNonLocal(input.head()) and - not I::isNonLocal(output.head()) + not isNonLocal(input.head()) and + not isNonLocal(output.head()) } pragma[nomagic] @@ -190,8 +183,8 @@ module SummaryFlow implements Output { exists(I::TypeTrackerContent content | callable.propagatesFlow(I::push(I::withoutContent(content), input), output, true) and filter = I::getFilterFromWithoutContentStep(content) and - not I::isNonLocal(input.head()) and - not I::isNonLocal(output.head()) and + not isNonLocal(input.head()) and + not isNonLocal(output.head()) and input != output ) } @@ -204,12 +197,26 @@ module SummaryFlow implements Output { exists(I::TypeTrackerContent content | callable.propagatesFlow(I::push(I::withContent(content), input), output, true) and filter = I::getFilterFromWithContentStep(content) and - not I::isNonLocal(input.head()) and - not I::isNonLocal(output.head()) and + not isNonLocal(input.head()) and + not isNonLocal(output.head()) and input != output ) } + private predicate componentLevelStep(I::SummaryComponent component) { + exists(I::TypeTrackerContent content | + component = I::withoutContent(content) and + not exists(I::getFilterFromWithoutContentStep(content)) + ) + } + + pragma[nomagic] + private predicate isNonLocal(I::SummaryComponent component) { + component = I::content(_) + or + component = I::withContent(_) + } + /** * Gets a data flow I::Node corresponding an argument or return value of `call`, * as specified by `component`. @@ -255,7 +262,7 @@ module SummaryFlow implements Output { I::SummarizedCallable callable, I::SummaryComponent head, I::SummaryComponentStack tail ) { dependsOnSummaryComponentStackCons(callable, head, tail) and - not I::isNonLocal(head) + not isNonLocal(head) } pragma[nomagic] @@ -290,7 +297,7 @@ module SummaryFlow implements Output { or result = I::returnOf(prev, head) or - I::componentLevelStep(head) and + componentLevelStep(head) and result = prev ) } diff --git a/ruby/ql/lib/codeql/ruby/typetracking/TypeTrackerSpecific.qll b/ruby/ql/lib/codeql/ruby/typetracking/TypeTrackerSpecific.qll index 4f17302e72b..4283521afc5 100644 --- a/ruby/ql/lib/codeql/ruby/typetracking/TypeTrackerSpecific.qll +++ b/ruby/ql/lib/codeql/ruby/typetracking/TypeTrackerSpecific.qll @@ -467,21 +467,6 @@ module SummaryTypeTrackerInput implements SummaryTypeTracker::Input { ) } - // Specific summary handling - predicate componentLevelStep(SummaryComponent component) { - exists(TypeTrackerContent content | - component = SummaryComponent::withoutContent(content) and - not exists(getFilterFromWithoutContentStep(content)) - ) - } - - pragma[nomagic] - predicate isNonLocal(SummaryComponent component) { - component = SC::content(_) - or - component = SC::withContent(_) - } - // Relating callables to nodes Node callTo(SummarizedCallable callable) { result.asExpr().getExpr() = callable.getACallSimple() } } From 7e87a7c1f726d3033016d45de6edbc642ace5c04 Mon Sep 17 00:00:00 2001 From: Rasmus Lerchedahl Petersen Date: Fri, 9 Jun 2023 15:29:13 +0200 Subject: [PATCH 06/21] python: rewrite `argumentPositionMatch` to not use the call graph. --- .../python/dataflow/new/internal/TypeTrackerSpecific.qll | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackerSpecific.qll b/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackerSpecific.qll index f3f9b1b52b9..11e49f5f510 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackerSpecific.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackerSpecific.qll @@ -170,13 +170,12 @@ private import semmle.python.dataflow.new.internal.DataFlowDispatch as DataFlowD pragma[noinline] private predicate argumentPositionMatch( - DataFlowPublic::CallCfgNode call, DataFlowPublic::ArgumentNode arg, + DataFlowPublic::CallCfgNode call, DataFlowPublic::Node arg, DataFlowDispatch::ParameterPosition ppos ) { - exists(DataFlowDispatch::ArgumentPosition apos, DataFlowPrivate::DataFlowCall c | - c.getNode() = call.asCfgNode() and - arg.argumentOf(c, apos) and - DataFlowDispatch::parameterMatch(ppos, apos) + exists(DataFlowDispatch::ArgumentPosition apos | + DataFlowDispatch::parameterMatch(ppos, apos) and + DataFlowDispatch::normalCallArg(call.getNode(), arg, apos) ) } From 8cae151883999562eb07308557f2d848c53e8db5 Mon Sep 17 00:00:00 2001 From: yoff Date: Tue, 13 Jun 2023 11:22:54 +0200 Subject: [PATCH 07/21] Update python/ql/test/experimental/dataflow/typetracking-summaries/TestSummaries.qll Co-authored-by: Asger F --- .../dataflow/typetracking-summaries/TestSummaries.qll | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/ql/test/experimental/dataflow/typetracking-summaries/TestSummaries.qll b/python/ql/test/experimental/dataflow/typetracking-summaries/TestSummaries.qll index 1a6a504e1ee..c139308c00e 100644 --- a/python/ql/test/experimental/dataflow/typetracking-summaries/TestSummaries.qll +++ b/python/ql/test/experimental/dataflow/typetracking-summaries/TestSummaries.qll @@ -4,7 +4,7 @@ private import semmle.python.ApiGraphs /** * This module ensures that the `callStep` predicate in - * our type tracker implelemtation does not refer to the + * our type tracker implementation does not refer to the * `getACall` predicate on `SummarizedCallable`. */ module RecursionGuard { From 203f8226cb29d183d7b5bb6a2daa65819058ff22 Mon Sep 17 00:00:00 2001 From: Rasmus Lerchedahl Petersen Date: Tue, 13 Jun 2023 11:32:06 +0200 Subject: [PATCH 08/21] ruby/python: make `SummaryTypeTracker` private --- .../python/dataflow/new/internal/TypeTrackerSpecific.qll | 4 ++-- ruby/ql/lib/codeql/ruby/typetracking/TypeTrackerSpecific.qll | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackerSpecific.qll b/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackerSpecific.qll index 11e49f5f510..6acc4036c16 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackerSpecific.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackerSpecific.qll @@ -179,7 +179,7 @@ private predicate argumentPositionMatch( ) } -module SummaryTypeTrackerInput implements SummaryTypeTracker::Input { +private module SummaryTypeTrackerInput implements SummaryTypeTracker::Input { // Dataflow nodes class Node = DataFlowPublic::Node; @@ -258,4 +258,4 @@ module SummaryTypeTrackerInput implements SummaryTypeTracker::Input { Node callTo(SummarizedCallable callable) { result = callable.getACallSimple() } } -module TypeTrackerSummaryFlow = SummaryTypeTracker::SummaryFlow; +private module TypeTrackerSummaryFlow = SummaryTypeTracker::SummaryFlow; diff --git a/ruby/ql/lib/codeql/ruby/typetracking/TypeTrackerSpecific.qll b/ruby/ql/lib/codeql/ruby/typetracking/TypeTrackerSpecific.qll index a096f33bd5c..b344fcf127f 100644 --- a/ruby/ql/lib/codeql/ruby/typetracking/TypeTrackerSpecific.qll +++ b/ruby/ql/lib/codeql/ruby/typetracking/TypeTrackerSpecific.qll @@ -350,7 +350,7 @@ predicate isNonLocal(SummaryComponent component) { private import SummaryTypeTracker as SummaryTypeTracker private import codeql.ruby.dataflow.FlowSummary as FlowSummary -module SummaryTypeTrackerInput implements SummaryTypeTracker::Input { +private module SummaryTypeTrackerInput implements SummaryTypeTracker::Input { // Dataflow nodes class Node = DataFlow::Node; @@ -448,4 +448,4 @@ module SummaryTypeTrackerInput implements SummaryTypeTracker::Input { Node callTo(SummarizedCallable callable) { result.asExpr().getExpr() = callable.getACallSimple() } } -module TypeTrackerSummaryFlow = SummaryTypeTracker::SummaryFlow; +private module TypeTrackerSummaryFlow = SummaryTypeTracker::SummaryFlow; From b5961c7f6be954beb91b8ecd6cedb4e7dbfe0a15 Mon Sep 17 00:00:00 2001 From: Rasmus Lerchedahl Petersen Date: Tue, 13 Jun 2023 11:37:19 +0200 Subject: [PATCH 09/21] ruby: move to internal folder --- config/identical-files.json | 4 ++-- ruby/ql/lib/codeql/ruby/typetracking/TypeTrackerSpecific.qll | 2 +- .../ruby/typetracking/{ => internal}/SummaryTypeTracker.qll | 0 3 files changed, 3 insertions(+), 3 deletions(-) rename ruby/ql/lib/codeql/ruby/typetracking/{ => internal}/SummaryTypeTracker.qll (100%) diff --git a/config/identical-files.json b/config/identical-files.json index 7a66b2d52f4..39dadd75ab3 100644 --- a/config/identical-files.json +++ b/config/identical-files.json @@ -525,7 +525,7 @@ ], "SummaryTypeTracker": [ "python/ql/lib/semmle/python/dataflow/new/internal/SummaryTypeTracker.qll", - "ruby/ql/lib/codeql/ruby/typetracking/SummaryTypeTracker.qll" + "ruby/ql/lib/codeql/ruby/typetracking/internal/SummaryTypeTracker.qll" ], "AccessPathSyntax": [ "csharp/ql/lib/semmle/code/csharp/dataflow/internal/AccessPathSyntax.qll", @@ -603,4 +603,4 @@ "python/ql/lib/semmle/python/security/internal/EncryptionKeySizes.qll", "java/ql/lib/semmle/code/java/security/internal/EncryptionKeySizes.qll" ] -} +} \ No newline at end of file diff --git a/ruby/ql/lib/codeql/ruby/typetracking/TypeTrackerSpecific.qll b/ruby/ql/lib/codeql/ruby/typetracking/TypeTrackerSpecific.qll index b344fcf127f..484dbdd4197 100644 --- a/ruby/ql/lib/codeql/ruby/typetracking/TypeTrackerSpecific.qll +++ b/ruby/ql/lib/codeql/ruby/typetracking/TypeTrackerSpecific.qll @@ -347,7 +347,7 @@ predicate isNonLocal(SummaryComponent component) { component = SC::withContent(_) } -private import SummaryTypeTracker as SummaryTypeTracker +private import internal.SummaryTypeTracker as SummaryTypeTracker private import codeql.ruby.dataflow.FlowSummary as FlowSummary private module SummaryTypeTrackerInput implements SummaryTypeTracker::Input { diff --git a/ruby/ql/lib/codeql/ruby/typetracking/SummaryTypeTracker.qll b/ruby/ql/lib/codeql/ruby/typetracking/internal/SummaryTypeTracker.qll similarity index 100% rename from ruby/ql/lib/codeql/ruby/typetracking/SummaryTypeTracker.qll rename to ruby/ql/lib/codeql/ruby/typetracking/internal/SummaryTypeTracker.qll From e11f6b5107f80a167e5fbe069114ca0fd211a264 Mon Sep 17 00:00:00 2001 From: Rasmus Lerchedahl Petersen Date: Tue, 13 Jun 2023 11:42:53 +0200 Subject: [PATCH 10/21] ruby/python: adjust shared file - move `isNonLocal` to the top - missing backtics --- .../dataflow/new/internal/SummaryTypeTracker.qll | 16 ++++++++-------- .../typetracking/internal/SummaryTypeTracker.qll | 16 ++++++++-------- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/SummaryTypeTracker.qll b/python/ql/lib/semmle/python/dataflow/new/internal/SummaryTypeTracker.qll index 4e0636826b8..d1a65e1c3e5 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/SummaryTypeTracker.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/SummaryTypeTracker.qll @@ -133,6 +133,13 @@ signature module Output { * Implementation of the summary type tracker, that is type tracking through flow summaries. */ module SummaryFlow implements Output { + pragma[nomagic] + private predicate isNonLocal(I::SummaryComponent component) { + component = I::content(_) + or + component = I::withContent(_) + } + pragma[nomagic] private predicate hasLoadSummary( I::SummarizedCallable callable, I::TypeTrackerContent contents, I::SummaryComponentStack input, @@ -210,15 +217,8 @@ module SummaryFlow implements Output { ) } - pragma[nomagic] - private predicate isNonLocal(I::SummaryComponent component) { - component = I::content(_) - or - component = I::withContent(_) - } - /** - * Gets a data flow I::Node corresponding an argument or return value of `call`, + * Gets a data flow `I::Node` corresponding an argument or return value of `call`, * as specified by `component`. */ bindingset[call, component] diff --git a/ruby/ql/lib/codeql/ruby/typetracking/internal/SummaryTypeTracker.qll b/ruby/ql/lib/codeql/ruby/typetracking/internal/SummaryTypeTracker.qll index 4e0636826b8..d1a65e1c3e5 100644 --- a/ruby/ql/lib/codeql/ruby/typetracking/internal/SummaryTypeTracker.qll +++ b/ruby/ql/lib/codeql/ruby/typetracking/internal/SummaryTypeTracker.qll @@ -133,6 +133,13 @@ signature module Output { * Implementation of the summary type tracker, that is type tracking through flow summaries. */ module SummaryFlow implements Output { + pragma[nomagic] + private predicate isNonLocal(I::SummaryComponent component) { + component = I::content(_) + or + component = I::withContent(_) + } + pragma[nomagic] private predicate hasLoadSummary( I::SummarizedCallable callable, I::TypeTrackerContent contents, I::SummaryComponentStack input, @@ -210,15 +217,8 @@ module SummaryFlow implements Output { ) } - pragma[nomagic] - private predicate isNonLocal(I::SummaryComponent component) { - component = I::content(_) - or - component = I::withContent(_) - } - /** - * Gets a data flow I::Node corresponding an argument or return value of `call`, + * Gets a data flow `I::Node` corresponding an argument or return value of `call`, * as specified by `component`. */ bindingset[call, component] From 33ad15e989f2db19b9ba3c4298b5a536581c0bc7 Mon Sep 17 00:00:00 2001 From: Rasmus Lerchedahl Petersen Date: Tue, 13 Jun 2023 11:48:06 +0200 Subject: [PATCH 11/21] ruby: use aliases --- .../ruby/typetracking/TypeTrackerSpecific.qll | 22 +++++-------------- 1 file changed, 6 insertions(+), 16 deletions(-) diff --git a/ruby/ql/lib/codeql/ruby/typetracking/TypeTrackerSpecific.qll b/ruby/ql/lib/codeql/ruby/typetracking/TypeTrackerSpecific.qll index 484dbdd4197..d14c182d127 100644 --- a/ruby/ql/lib/codeql/ruby/typetracking/TypeTrackerSpecific.qll +++ b/ruby/ql/lib/codeql/ruby/typetracking/TypeTrackerSpecific.qll @@ -394,28 +394,18 @@ private module SummaryTypeTrackerInput implements SummaryTypeTracker::Input { class SummaryComponentStack = FlowSummary::SummaryComponentStack; - SummaryComponentStack singleton(SummaryComponent component) { - result = FlowSummary::SummaryComponentStack::singleton(component) - } + predicate singleton = FlowSummary::SummaryComponentStack::singleton/1; - SummaryComponentStack push(SummaryComponent component, SummaryComponentStack stack) { - result = FlowSummary::SummaryComponentStack::push(component, stack) - } + predicate push = FlowSummary::SummaryComponentStack::push/2; // Relating content to summaries - SummaryComponent content(TypeTrackerContent contents) { - result = FlowSummary::SummaryComponent::content(contents) - } + predicate content = FlowSummary::SummaryComponent::content/1; - SummaryComponent withoutContent(TypeTrackerContent contents) { - result = FlowSummary::SummaryComponent::withoutContent(contents) - } + predicate withoutContent = FlowSummary::SummaryComponent::withoutContent/1; - SummaryComponent withContent(TypeTrackerContent contents) { - result = FlowSummary::SummaryComponent::withContent(contents) - } + predicate withContent = FlowSummary::SummaryComponent::withContent/1; - SummaryComponent return() { result = FlowSummary::SummaryComponent::return() } + predicate return = FlowSummary::SummaryComponent::return/0; // Callables class SummarizedCallable = FlowSummary::SummarizedCallable; From 2ae5dae474f821e20b9b036d0a158556d65756ae Mon Sep 17 00:00:00 2001 From: yoff Date: Wed, 14 Jun 2023 20:55:45 +0200 Subject: [PATCH 12/21] Apply suggestions from code review Co-authored-by: Rasmus Wriedt Larsen --- .../dataflow/new/internal/SummaryTypeTracker.qll | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/SummaryTypeTracker.qll b/python/ql/lib/semmle/python/dataflow/new/internal/SummaryTypeTracker.qll index d1a65e1c3e5..9ecc1b5ea1d 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/SummaryTypeTracker.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/SummaryTypeTracker.qll @@ -16,7 +16,8 @@ signature module Input { // Relating content and filters /** - * Gets a content filter to use for a `WithoutContent[content]` step, or has no result if + * Gets a content filter to use for a `WithoutContent[content]` step, (data is not allowed to be stored in `content`) + * or has no result if * the step should be treated as ordinary flow. * * `WithoutContent` is often used to perform strong updates on individual collection elements, but for @@ -26,7 +27,8 @@ signature module Input { TypeTrackerContentFilter getFilterFromWithoutContentStep(TypeTrackerContent content); /** - * Gets a content filter to use for a `WithContent[content]` step, or has no result if + * Gets a content filter to use for a `WithContent[content]` step, (data must be stored in `content`) + * or has no result if * the step cannot be handled by type-tracking. * * `WithContent` is often used to perform strong updates on individual collection elements (or rather @@ -57,10 +59,10 @@ signature module Input { /** Gets a summary component for content `c`. */ SummaryComponent content(TypeTrackerContent contents); - /** Gets a summary component where data is not allowed to be stored in `c`. */ + /** Gets a summary component where data is not allowed to be stored in `contents`. */ SummaryComponent withoutContent(TypeTrackerContent contents); - /** Gets a summary component where data must be stored in `c`. */ + /** Gets a summary component where data must be stored in `contents`. */ SummaryComponent withContent(TypeTrackerContent contents); // Callables From af72509ce6bd61462cde7f3fd1b24b154f8f3bcd Mon Sep 17 00:00:00 2001 From: yoff Date: Wed, 14 Jun 2023 20:57:14 +0200 Subject: [PATCH 13/21] Update python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackerSpecific.qll Co-authored-by: Rasmus Wriedt Larsen --- .../new/internal/TypeTrackerSpecific.qll | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackerSpecific.qll b/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackerSpecific.qll index 6acc4036c16..48881d37285 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackerSpecific.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackerSpecific.qll @@ -200,24 +200,18 @@ private module SummaryTypeTrackerInput implements SummaryTypeTracker::Input { class SummaryComponentStack = FlowSummary::SummaryComponentStack; - SummaryComponentStack singleton(SummaryComponent component) { - result = FlowSummary::SummaryComponentStack::singleton(component) - } + predicate singleton = FlowSummary::SummaryComponentStack::singleton/1; - SummaryComponentStack push(SummaryComponent component, SummaryComponentStack stack) { - result = FlowSummary::SummaryComponentStack::push(component, stack) - } + predicate push = FlowSummary::SummaryComponentStack::push/2; // Relating content to summaries - SummaryComponent content(TypeTrackerContent contents) { - result = FlowSummary::SummaryComponent::content(contents) - } + predicate content = FlowSummary::SummaryComponent::content/1; - SummaryComponent withoutContent(TypeTrackerContent contents) { none() } + predicate withoutContent = FlowSummary::SummaryComponent::withoutContent/1; - SummaryComponent withContent(TypeTrackerContent contents) { none() } + predicate withContent = FlowSummary::SummaryComponent::withContent/1; - SummaryComponent return() { result = FlowSummary::SummaryComponent::return() } + predicate return = FlowSummary::SummaryComponent::return/0; // Relating nodes to summaries Node argumentOf(Node call, SummaryComponent arg) { From 0e713e6fc10f57ce13ab49c8ae5738347527180d Mon Sep 17 00:00:00 2001 From: Rasmus Lerchedahl Petersen Date: Wed, 14 Jun 2023 21:02:42 +0200 Subject: [PATCH 14/21] ruby/python: more consistent naming of parameters --- .../dataflow/new/internal/SummaryTypeTracker.qll | 2 +- .../typetracking/internal/SummaryTypeTracker.qll | 12 +++++++----- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/SummaryTypeTracker.qll b/python/ql/lib/semmle/python/dataflow/new/internal/SummaryTypeTracker.qll index 9ecc1b5ea1d..9c6f841651d 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/SummaryTypeTracker.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/SummaryTypeTracker.qll @@ -50,7 +50,7 @@ signature module Input { /** * Gets the stack obtained by pushing `head` onto `tail`. */ - SummaryComponentStack push(SummaryComponent component, SummaryComponentStack stack); + SummaryComponentStack push(SummaryComponent head, SummaryComponentStack tail); /** Gets a singleton stack representing a return. */ SummaryComponent return(); diff --git a/ruby/ql/lib/codeql/ruby/typetracking/internal/SummaryTypeTracker.qll b/ruby/ql/lib/codeql/ruby/typetracking/internal/SummaryTypeTracker.qll index d1a65e1c3e5..9c6f841651d 100644 --- a/ruby/ql/lib/codeql/ruby/typetracking/internal/SummaryTypeTracker.qll +++ b/ruby/ql/lib/codeql/ruby/typetracking/internal/SummaryTypeTracker.qll @@ -16,7 +16,8 @@ signature module Input { // Relating content and filters /** - * Gets a content filter to use for a `WithoutContent[content]` step, or has no result if + * Gets a content filter to use for a `WithoutContent[content]` step, (data is not allowed to be stored in `content`) + * or has no result if * the step should be treated as ordinary flow. * * `WithoutContent` is often used to perform strong updates on individual collection elements, but for @@ -26,7 +27,8 @@ signature module Input { TypeTrackerContentFilter getFilterFromWithoutContentStep(TypeTrackerContent content); /** - * Gets a content filter to use for a `WithContent[content]` step, or has no result if + * Gets a content filter to use for a `WithContent[content]` step, (data must be stored in `content`) + * or has no result if * the step cannot be handled by type-tracking. * * `WithContent` is often used to perform strong updates on individual collection elements (or rather @@ -48,7 +50,7 @@ signature module Input { /** * Gets the stack obtained by pushing `head` onto `tail`. */ - SummaryComponentStack push(SummaryComponent component, SummaryComponentStack stack); + SummaryComponentStack push(SummaryComponent head, SummaryComponentStack tail); /** Gets a singleton stack representing a return. */ SummaryComponent return(); @@ -57,10 +59,10 @@ signature module Input { /** Gets a summary component for content `c`. */ SummaryComponent content(TypeTrackerContent contents); - /** Gets a summary component where data is not allowed to be stored in `c`. */ + /** Gets a summary component where data is not allowed to be stored in `contents`. */ SummaryComponent withoutContent(TypeTrackerContent contents); - /** Gets a summary component where data must be stored in `c`. */ + /** Gets a summary component where data must be stored in `contents`. */ SummaryComponent withContent(TypeTrackerContent contents); // Callables From 6521a51d93e6cf034e8727e281e10191928f8124 Mon Sep 17 00:00:00 2001 From: Rasmus Lerchedahl Petersen Date: Wed, 14 Jun 2023 21:14:50 +0200 Subject: [PATCH 15/21] python: unique strings in tests --- .../dataflow/summaries/TestSummaries.qll | 2 +- .../typetracking-summaries/TestSummaries.qll | 18 +++++++++--------- .../typetracking-summaries/summaries.py | 3 ++- 3 files changed, 12 insertions(+), 11 deletions(-) diff --git a/python/ql/test/experimental/dataflow/summaries/TestSummaries.qll b/python/ql/test/experimental/dataflow/summaries/TestSummaries.qll index 971a653c469..cdd61420bbb 100644 --- a/python/ql/test/experimental/dataflow/summaries/TestSummaries.qll +++ b/python/ql/test/experimental/dataflow/summaries/TestSummaries.qll @@ -60,7 +60,7 @@ private class SummarizedCallableApplyLambda extends SummarizedCallable { } private class SummarizedCallableReversed extends SummarizedCallable { - SummarizedCallableReversed() { this = "reversed" } + SummarizedCallableReversed() { this = "list_reversed" } override DataFlow::CallCfgNode getACall() { result.getFunction().asCfgNode().(NameNode).getId() = this diff --git a/python/ql/test/experimental/dataflow/typetracking-summaries/TestSummaries.qll b/python/ql/test/experimental/dataflow/typetracking-summaries/TestSummaries.qll index c139308c00e..8d626b332a3 100644 --- a/python/ql/test/experimental/dataflow/typetracking-summaries/TestSummaries.qll +++ b/python/ql/test/experimental/dataflow/typetracking-summaries/TestSummaries.qll @@ -11,7 +11,7 @@ module RecursionGuard { private import semmle.python.dataflow.new.internal.TypeTrackerSpecific as TT private class RecursionGuard extends SummarizedCallable { - RecursionGuard() { this = "RecursionGuard" } + RecursionGuard() { this = "TypeTrackingSummariesRecursionGuard" } override DataFlow::CallCfgNode getACall() { result.getFunction().asCfgNode().(NameNode).getId() = this and @@ -29,7 +29,7 @@ module RecursionGuard { } private class SummarizedCallableIdentity extends SummarizedCallable { - SummarizedCallableIdentity() { this = "identity" } + SummarizedCallableIdentity() { this = "TTS_identity" } override DataFlow::CallCfgNode getACall() { none() } @@ -48,7 +48,7 @@ private class SummarizedCallableIdentity extends SummarizedCallable { // For lambda flow to work, implement lambdaCall and lambdaCreation private class SummarizedCallableApplyLambda extends SummarizedCallable { - SummarizedCallableApplyLambda() { this = "apply_lambda" } + SummarizedCallableApplyLambda() { this = "TTS_apply_lambda" } override DataFlow::CallCfgNode getACall() { none() } @@ -70,7 +70,7 @@ private class SummarizedCallableApplyLambda extends SummarizedCallable { } private class SummarizedCallableReversed extends SummarizedCallable { - SummarizedCallableReversed() { this = "reversed" } + SummarizedCallableReversed() { this = "TTS_reversed" } override DataFlow::CallCfgNode getACall() { none() } @@ -88,7 +88,7 @@ private class SummarizedCallableReversed extends SummarizedCallable { } private class SummarizedCallableMap extends SummarizedCallable { - SummarizedCallableMap() { this = "list_map" } + SummarizedCallableMap() { this = "TTS_list_map" } override DataFlow::CallCfgNode getACall() { none() } @@ -110,7 +110,7 @@ private class SummarizedCallableMap extends SummarizedCallable { } private class SummarizedCallableAppend extends SummarizedCallable { - SummarizedCallableAppend() { this = "append_to_list" } + SummarizedCallableAppend() { this = "TTS_append_to_list" } override DataFlow::CallCfgNode getACall() { none() } @@ -132,7 +132,7 @@ private class SummarizedCallableAppend extends SummarizedCallable { } private class SummarizedCallableJsonLoads extends SummarizedCallable { - SummarizedCallableJsonLoads() { this = "json.loads" } + SummarizedCallableJsonLoads() { this = "TTS_json.loads" } override DataFlow::CallCfgNode getACall() { result = API::moduleImport("json").getMember("loads").getACall() @@ -153,7 +153,7 @@ private class SummarizedCallableJsonLoads extends SummarizedCallable { // read and store private class SummarizedCallableReadSecret extends SummarizedCallable { - SummarizedCallableReadSecret() { this = "read_secret" } + SummarizedCallableReadSecret() { this = "TTS_read_secret" } override DataFlow::CallCfgNode getACall() { none() } @@ -171,7 +171,7 @@ private class SummarizedCallableReadSecret extends SummarizedCallable { } private class SummarizedCallableSetSecret extends SummarizedCallable { - SummarizedCallableSetSecret() { this = "set_secret" } + SummarizedCallableSetSecret() { this = "TTS_set_secret" } override DataFlow::CallCfgNode getACall() { none() } diff --git a/python/ql/test/experimental/dataflow/typetracking-summaries/summaries.py b/python/ql/test/experimental/dataflow/typetracking-summaries/summaries.py index 728456cc711..d4195787185 100644 --- a/python/ql/test/experimental/dataflow/typetracking-summaries/summaries.py +++ b/python/ql/test/experimental/dataflow/typetracking-summaries/summaries.py @@ -46,6 +46,7 @@ another_tainted_list = append_to_list([], tracked) # $ tracked atl = another_tainted_list[0] atl # $ MISSING: tracked +# This will not work, as the call is not found by `getACallSimple`. from json import loads as json_loads tainted_resultlist = json_loads(tracked) # $ tracked tr = tainted_resultlist[0] @@ -57,4 +58,4 @@ r # $ tracked y # $ tracked=secret set_secret(y, tracked) # $ tracked tracked=secret -y.secret # $ tracked tracked=secret \ No newline at end of file +y.secret # $ tracked tracked=secret From 2491fda58e7a014ed41f383955e8f0b232ba3b17 Mon Sep 17 00:00:00 2001 From: Rasmus Lerchedahl Petersen Date: Wed, 14 Jun 2023 21:16:39 +0200 Subject: [PATCH 16/21] python: update comment --- .../semmle/python/dataflow/new/internal/TypeTrackerSpecific.qll | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackerSpecific.qll b/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackerSpecific.qll index 48881d37285..823e26688da 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackerSpecific.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackerSpecific.qll @@ -243,7 +243,7 @@ private module SummaryTypeTrackerInput implements SummaryTypeTracker::Input { Node returnOf(Node callable, SummaryComponent return) { return = FlowSummary::SummaryComponent::return() and - // result should be return value of callable which should be a lambda + // `result` should be the return value of a callable expresion (lambda or function) referenced by `callable` result.asCfgNode() = callable.getALocalSource().asExpr().(CallableExpr).getInnerScope().getAReturnValueFlowNode() } From 0267b329040171f11255231f6d36bc1ed257cb62 Mon Sep 17 00:00:00 2001 From: Rasmus Lerchedahl Petersen Date: Wed, 14 Jun 2023 21:17:12 +0200 Subject: [PATCH 17/21] fix eol --- config/identical-files.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/identical-files.json b/config/identical-files.json index 39dadd75ab3..ceed02ba5d6 100644 --- a/config/identical-files.json +++ b/config/identical-files.json @@ -603,4 +603,4 @@ "python/ql/lib/semmle/python/security/internal/EncryptionKeySizes.qll", "java/ql/lib/semmle/code/java/security/internal/EncryptionKeySizes.qll" ] -} \ No newline at end of file +} From 4fded84a49cbc07d69d5c8659a96fb25dc6c07bd Mon Sep 17 00:00:00 2001 From: Rasmus Lerchedahl Petersen Date: Wed, 14 Jun 2023 21:30:58 +0200 Subject: [PATCH 18/21] python: implement missing predicates --- .../python/dataflow/new/internal/TypeTrackerSpecific.qll | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackerSpecific.qll b/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackerSpecific.qll index 823e26688da..8817cdf21ce 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackerSpecific.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackerSpecific.qll @@ -207,9 +207,9 @@ private module SummaryTypeTrackerInput implements SummaryTypeTracker::Input { // Relating content to summaries predicate content = FlowSummary::SummaryComponent::content/1; - predicate withoutContent = FlowSummary::SummaryComponent::withoutContent/1; + SummaryComponent withoutContent(TypeTrackerContent contents) { none() } - predicate withContent = FlowSummary::SummaryComponent::withContent/1; + SummaryComponent withContent(TypeTrackerContent contents) { none() } predicate return = FlowSummary::SummaryComponent::return/0; From b7bf750174f5494776799b63288c9a3cc6ad979b Mon Sep 17 00:00:00 2001 From: Rasmus Lerchedahl Petersen Date: Wed, 14 Jun 2023 22:23:21 +0200 Subject: [PATCH 19/21] python: use updated names in test --- .../typetracking-summaries/summaries.py | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/python/ql/test/experimental/dataflow/typetracking-summaries/summaries.py b/python/ql/test/experimental/dataflow/typetracking-summaries/summaries.py index d4195787185..5801b8e7961 100644 --- a/python/ql/test/experimental/dataflow/typetracking-summaries/summaries.py +++ b/python/ql/test/experimental/dataflow/typetracking-summaries/summaries.py @@ -2,25 +2,25 @@ import sys import os # Simple summary -tainted = identity(tracked) # $ tracked +tainted = TTS_identity(tracked) # $ tracked tainted # $ tracked # Lambda summary # I think the missing result is expected because type tracking # is not allowed to flow back out of a call. -tainted_lambda = apply_lambda(lambda x: x, tracked) # $ tracked +tainted_lambda = TTS_apply_lambda(lambda x: x, tracked) # $ tracked tainted_lambda # $ MISSING: tracked # A lambda that directly introduces taint -bad_lambda = apply_lambda(lambda x: tracked, 1) # $ tracked +bad_lambda = TTS_apply_lambda(lambda x: tracked, 1) # $ tracked bad_lambda # $ tracked # A lambda that breaks the flow -untainted_lambda = apply_lambda(lambda x: 1, tracked) # $ tracked +untainted_lambda = TTS_apply_lambda(lambda x: 1, tracked) # $ tracked untainted_lambda # Collection summaries -tainted_list = reversed([tracked]) # $ tracked +tainted_list = TTS_reversed([tracked]) # $ tracked tl = tainted_list[0] tl # $ MISSING: tracked @@ -28,21 +28,21 @@ tl # $ MISSING: tracked def add_colon(x): return x + ":" -tainted_mapped = list_map(add_colon, [tracked]) # $ tracked +tainted_mapped = TTS_list_map(add_colon, [tracked]) # $ tracked tm = tainted_mapped[0] tm # $ MISSING: tracked def explicit_identity(x): return x -tainted_mapped_explicit = list_map(explicit_identity, [tracked]) # $ tracked +tainted_mapped_explicit = TTS_list_map(explicit_identity, [tracked]) # $ tracked tainted_mapped_explicit[0] # $ MISSING: tracked -tainted_mapped_summary = list_map(identity, [tracked]) # $ tracked +tainted_mapped_summary = TTS_list_map(identity, [tracked]) # $ tracked tms = tainted_mapped_summary[0] tms # $ MISSING: tracked -another_tainted_list = append_to_list([], tracked) # $ tracked +another_tainted_list = TTS_append_to_list([], tracked) # $ tracked atl = another_tainted_list[0] atl # $ MISSING: tracked @@ -53,9 +53,9 @@ tr = tainted_resultlist[0] tr # $ MISSING: tracked x.secret = tracked # $ tracked=secret tracked -r = read_secret(x) # $ tracked=secret tracked +r = TTS_read_secret(x) # $ tracked=secret tracked r # $ tracked y # $ tracked=secret -set_secret(y, tracked) # $ tracked tracked=secret +TTS_set_secret(y, tracked) # $ tracked tracked=secret y.secret # $ tracked tracked=secret From afafaac0d785c0f7b31345aa4a28e25f99c58d80 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Fri, 16 Jun 2023 14:41:36 +0200 Subject: [PATCH 20/21] Python: Fix typo --- .../semmle/python/dataflow/new/internal/TypeTrackerSpecific.qll | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackerSpecific.qll b/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackerSpecific.qll index 8817cdf21ce..bac194aae9e 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackerSpecific.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackerSpecific.qll @@ -243,7 +243,7 @@ private module SummaryTypeTrackerInput implements SummaryTypeTracker::Input { Node returnOf(Node callable, SummaryComponent return) { return = FlowSummary::SummaryComponent::return() and - // `result` should be the return value of a callable expresion (lambda or function) referenced by `callable` + // `result` should be the return value of a callable expression (lambda or function) referenced by `callable` result.asCfgNode() = callable.getALocalSource().asExpr().(CallableExpr).getInnerScope().getAReturnValueFlowNode() } From fb6955edf98bb51054650b3e9187d7451d9ab901 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Fri, 16 Jun 2023 14:43:45 +0200 Subject: [PATCH 21/21] Python: Add tests of methods in summaries --- .../dataflow/summaries/summaries.py | 18 ++++++++++++++++++ .../typetracking-summaries/summaries.py | 17 +++++++++++++++++ 2 files changed, 35 insertions(+) diff --git a/python/ql/test/experimental/dataflow/summaries/summaries.py b/python/ql/test/experimental/dataflow/summaries/summaries.py index 1532a5393d8..806b39f6dc8 100644 --- a/python/ql/test/experimental/dataflow/summaries/summaries.py +++ b/python/ql/test/experimental/dataflow/summaries/summaries.py @@ -66,3 +66,21 @@ SINK(tainted_list[0]) # $ flow="SOURCE, l:-1 -> tainted_list[0]" from json import loads as json_loads tainted_resultlist = json_loads(SOURCE) SINK(tainted_resultlist[0]) # $ flow="SOURCE, l:-1 -> tainted_resultlist[0]" + + +# Class methods are not handled right now + +class MyClass: + @staticmethod + def foo(x): + return x + + def bar(self, x): + return x + +through_staticmethod = apply_lambda(MyClass.foo, SOURCE) +through_staticmethod # $ MISSING: flow + +mc = MyClass() +through_method = apply_lambda(mc.bar, SOURCE) +through_method # $ MISSING: flow diff --git a/python/ql/test/experimental/dataflow/typetracking-summaries/summaries.py b/python/ql/test/experimental/dataflow/typetracking-summaries/summaries.py index 5801b8e7961..f838032b063 100644 --- a/python/ql/test/experimental/dataflow/typetracking-summaries/summaries.py +++ b/python/ql/test/experimental/dataflow/typetracking-summaries/summaries.py @@ -59,3 +59,20 @@ r # $ tracked y # $ tracked=secret TTS_set_secret(y, tracked) # $ tracked tracked=secret y.secret # $ tracked tracked=secret + +# Class methods are not handled right now + +class MyClass: + @staticmethod + def foo(x): + return x + + def bar(self, x): + return x + +through_staticmethod = TTS_apply_lambda(MyClass.foo, tracked) # $ tracked +through_staticmethod # $ MISSING: tracked + +mc = MyClass() +through_method = TTS_apply_lambda(mc.bar, tracked) # $ tracked +through_method # $ MISSING: tracked