From faaa558ed90be76b41f0fa537f8391ee8f18dd3c Mon Sep 17 00:00:00 2001 From: Tom Hvitved Date: Fri, 24 Nov 2023 11:48:08 +0100 Subject: [PATCH] Python: Use `FlowSummaryImpl` from `dataflow` pack --- config/identical-files.json | 2 - .../python/dataflow/new/FlowSummary.qll | 74 +- .../new/internal/AccessPathSyntax.qll | 182 -- .../new/internal/DataFlowDispatch.qll | 26 +- .../dataflow/new/internal/DataFlowPrivate.qll | 5 +- .../dataflow/new/internal/FlowSummaryImpl.qll | 1645 ++--------------- .../new/internal/FlowSummaryImplSpecific.qll | 324 ---- .../new/internal/TypeTrackerSpecific.qll | 26 +- .../ql/lib/semmle/python/frameworks/Flask.qll | 4 +- .../lib/semmle/python/frameworks/Stdlib.qll | 56 +- .../data/internal/ApiGraphModels.qll | 42 +- .../data/internal/ApiGraphModelsSpecific.qll | 16 +- .../dataflow/summaries-checks/invalid-spec.ql | 2 +- .../missing-attribute-content.ql | 2 +- .../dataflow/summaries/TestSummaries.qll | 16 +- .../dataflow/summaries/summaries.ql | 2 +- .../typetracking-summaries/TestSummaries.qll | 20 +- .../library-tests/frameworks/data/test.ql | 4 +- .../library-tests/frameworks/data/warnings.ql | 1 - 19 files changed, 305 insertions(+), 2144 deletions(-) delete mode 100644 python/ql/lib/semmle/python/dataflow/new/internal/AccessPathSyntax.qll delete mode 100644 python/ql/lib/semmle/python/dataflow/new/internal/FlowSummaryImplSpecific.qll diff --git a/config/identical-files.json b/config/identical-files.json index a5d2e2e3f5a..104e7de4dc0 100644 --- a/config/identical-files.json +++ b/config/identical-files.json @@ -56,7 +56,6 @@ "DataFlow Java/C#/Go/Ruby/Python/Swift Flow Summaries": [ "java/ql/lib/semmle/code/java/dataflow/internal/FlowSummaryImpl.qll", "go/ql/lib/semmle/go/dataflow/internal/FlowSummaryImpl.qll", - "python/ql/lib/semmle/python/dataflow/new/internal/FlowSummaryImpl.qll", "swift/ql/lib/codeql/swift/dataflow/internal/FlowSummaryImpl.qll" ], "SsaReadPosition Java/C#": [ @@ -467,7 +466,6 @@ "AccessPathSyntax": [ "go/ql/lib/semmle/go/dataflow/internal/AccessPathSyntax.qll", "java/ql/lib/semmle/code/java/dataflow/internal/AccessPathSyntax.qll", - "python/ql/lib/semmle/python/dataflow/new/internal/AccessPathSyntax.qll", "swift/ql/lib/codeql/swift/dataflow/internal/AccessPathSyntax.qll" ], "IncompleteUrlSubstringSanitization": [ diff --git a/python/ql/lib/semmle/python/dataflow/new/FlowSummary.qll b/python/ql/lib/semmle/python/dataflow/new/FlowSummary.qll index 8b80e13d06d..800c9592dcc 100644 --- a/python/ql/lib/semmle/python/dataflow/new/FlowSummary.qll +++ b/python/ql/lib/semmle/python/dataflow/new/FlowSummary.qll @@ -13,61 +13,14 @@ private module Summaries { private import semmle.python.Frameworks } -class SummaryComponent = Impl::Public::SummaryComponent; +deprecated class SummaryComponent = Impl::Private::SummaryComponent; /** Provides predicates for constructing summary components. */ -module SummaryComponent { - private import Impl::Public::SummaryComponent as SC +deprecated module SummaryComponent = Impl::Private::SummaryComponent; - predicate parameter = SC::parameter/1; +deprecated class SummaryComponentStack = Impl::Private::SummaryComponentStack; - predicate argument = SC::argument/1; - - predicate content = SC::content/1; - - /** Gets a summary component that represents a list element. */ - SummaryComponent listElement() { result = content(any(ListElementContent c)) } - - /** Gets a summary component that represents a set element. */ - SummaryComponent setElement() { result = content(any(SetElementContent c)) } - - /** Gets a summary component that represents a tuple element. */ - SummaryComponent tupleElement(int index) { - exists(TupleElementContent c | c.getIndex() = index and result = content(c)) - } - - /** Gets a summary component that represents a dictionary element. */ - SummaryComponent dictionaryElement(string key) { - exists(DictionaryElementContent c | c.getKey() = key and result = content(c)) - } - - /** Gets a summary component that represents a dictionary element at any key. */ - SummaryComponent dictionaryElementAny() { result = content(any(DictionaryElementAnyContent c)) } - - /** Gets a summary component that represents an attribute element. */ - SummaryComponent attribute(string attr) { - exists(AttributeContent c | c.getAttribute() = attr and result = content(c)) - } - - /** Gets a summary component that represents the return value of a call. */ - SummaryComponent return() { result = SC::return(any(ReturnKind rk)) } -} - -class SummaryComponentStack = Impl::Public::SummaryComponentStack; - -/** Provides predicates for constructing stacks of summary components. */ -module SummaryComponentStack { - private import Impl::Public::SummaryComponentStack as SCS - - predicate singleton = SCS::singleton/1; - - predicate push = SCS::push/2; - - predicate argument = SCS::argument/1; - - /** Gets a singleton stack representing the return value of a call. */ - SummaryComponentStack return() { result = singleton(SummaryComponent::return()) } -} +deprecated module SummaryComponentStack = Impl::Private::SummaryComponentStack; /** A callable with a flow summary, identified by a unique string. */ abstract class SummarizedCallable extends LibraryCallable, Impl::Public::SummarizedCallable { @@ -75,21 +28,14 @@ abstract class SummarizedCallable extends LibraryCallable, Impl::Public::Summari SummarizedCallable() { any() } /** - * Same as - * - * ```ql - * propagatesFlow( - * SummaryComponentStack input, SummaryComponentStack output, boolean preservesValue - * ) - * ``` - * - * but uses an external (string) representation of the input and output stacks. + * DEPRECATED: Use `propagatesFlow` instead. */ - pragma[nomagic] - predicate propagatesFlowExt(string input, string output, boolean preservesValue) { none() } + deprecated predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + this.propagatesFlow(input, output, preservesValue) + } } -class RequiredSummaryComponentStack = Impl::Public::RequiredSummaryComponentStack; +deprecated class RequiredSummaryComponentStack = Impl::Private::RequiredSummaryComponentStack; private class SummarizedCallableFromModel extends SummarizedCallable { string type; @@ -109,7 +55,7 @@ private class SummarizedCallableFromModel extends SummarizedCallable { ) } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { exists(string kind | ModelOutput::relevantSummaryModel(type, path, input, output, kind) | kind = "value" and preservesValue = true diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/AccessPathSyntax.qll b/python/ql/lib/semmle/python/dataflow/new/internal/AccessPathSyntax.qll deleted file mode 100644 index 0c3dc8427b2..00000000000 --- a/python/ql/lib/semmle/python/dataflow/new/internal/AccessPathSyntax.qll +++ /dev/null @@ -1,182 +0,0 @@ -/** - * Module for parsing access paths from MaD models, both the identifying access path used - * by dynamic languages, and the input/output specifications for summary steps. - * - * This file is used by the shared data flow library and by the JavaScript libraries - * (which does not use the shared data flow libraries). - */ - -/** - * Convenience-predicate for extracting two capture groups at once. - */ -bindingset[input, regexp] -private predicate regexpCaptureTwo(string input, string regexp, string capture1, string capture2) { - capture1 = input.regexpCapture(regexp, 1) and - capture2 = input.regexpCapture(regexp, 2) -} - -/** Companion module to the `AccessPath` class. */ -module AccessPath { - /** A string that should be parsed as an access path. */ - abstract class Range extends string { - bindingset[this] - Range() { any() } - } - - /** - * Parses an integer constant `n` or interval `n1..n2` (inclusive) and gets the value - * of the constant or any value contained in the interval. - */ - bindingset[arg] - int parseInt(string arg) { - result = arg.toInt() - or - // Match "n1..n2" - exists(string lo, string hi | - regexpCaptureTwo(arg, "(-?\\d+)\\.\\.(-?\\d+)", lo, hi) and - result = [lo.toInt() .. hi.toInt()] - ) - } - - /** - * Parses a lower-bounded interval `n..` and gets the lower bound. - */ - bindingset[arg] - int parseLowerBound(string arg) { result = arg.regexpCapture("(-?\\d+)\\.\\.", 1).toInt() } - - /** - * Parses an integer constant or interval (bounded or unbounded) that explicitly - * references the arity, such as `N-1` or `N-3..N-1`. - * - * Note that expressions of form `N-x` will never resolve to a negative index, - * even if `N` is zero (it will have no result in that case). - */ - bindingset[arg, arity] - private int parseIntWithExplicitArity(string arg, int arity) { - result >= 0 and // do not allow N-1 to resolve to a negative index - exists(string lo | - // N-x - lo = arg.regexpCapture("N-(\\d+)", 1) and - result = arity - lo.toInt() - or - // N-x.. - lo = arg.regexpCapture("N-(\\d+)\\.\\.", 1) and - result = [arity - lo.toInt(), arity - 1] - ) - or - exists(string lo, string hi | - // x..N-y - regexpCaptureTwo(arg, "(-?\\d+)\\.\\.N-(\\d+)", lo, hi) and - result = [lo.toInt() .. arity - hi.toInt()] - or - // N-x..N-y - regexpCaptureTwo(arg, "N-(\\d+)\\.\\.N-(\\d+)", lo, hi) and - result = [arity - lo.toInt() .. arity - hi.toInt()] and - result >= 0 - or - // N-x..y - regexpCaptureTwo(arg, "N-(\\d+)\\.\\.(\\d+)", lo, hi) and - result = [arity - lo.toInt() .. hi.toInt()] and - result >= 0 - ) - } - - /** - * Parses an integer constant or interval (bounded or unbounded) and gets any - * of the integers contained within (of which there may be infinitely many). - * - * Has no result for arguments involving an explicit arity, such as `N-1`. - */ - bindingset[arg, result] - int parseIntUnbounded(string arg) { - result = parseInt(arg) - or - result >= parseLowerBound(arg) - } - - /** - * Parses an integer constant or interval (bounded or unbounded) that - * may reference the arity of a call, such as `N-1` or `N-3..N-1`. - * - * Note that expressions of form `N-x` will never resolve to a negative index, - * even if `N` is zero (it will have no result in that case). - */ - bindingset[arg, arity] - int parseIntWithArity(string arg, int arity) { - result = parseInt(arg) - or - result in [parseLowerBound(arg) .. arity - 1] - or - result = parseIntWithExplicitArity(arg, arity) - } -} - -/** Gets the `n`th token on the access path as a string. */ -private string getRawToken(AccessPath path, int n) { - // Avoid splitting by '.' since tokens may contain dots, e.g. `Field[foo.Bar.x]`. - // Instead use regexpFind to match valid tokens, and supplement with a final length - // check (in `AccessPath.hasSyntaxError`) to ensure all characters were included in a token. - result = path.regexpFind("\\w+(?:\\[[^\\]]*\\])?(?=\\.|$)", n, _) -} - -/** - * A string that occurs as an access path (either identifying or input/output spec) - * which might be relevant for this database. - */ -class AccessPath extends string instanceof AccessPath::Range { - /** Holds if this string is not a syntactically valid access path. */ - predicate hasSyntaxError() { - // If the lengths match, all characters must haven been included in a token - // or seen by the `.` lookahead pattern. - this != "" and - not this.length() = sum(int n | | getRawToken(this, n).length() + 1) - 1 - } - - /** Gets the `n`th token on the access path (if there are no syntax errors). */ - AccessPathToken getToken(int n) { - result = getRawToken(this, n) and - not this.hasSyntaxError() - } - - /** Gets the number of tokens on the path (if there are no syntax errors). */ - int getNumToken() { - result = count(int n | exists(getRawToken(this, n))) and - not this.hasSyntaxError() - } -} - -/** - * An access part token such as `Argument[1]` or `ReturnValue`, appearing in one or more access paths. - */ -class AccessPathToken extends string { - AccessPathToken() { this = getRawToken(_, _) } - - private string getPart(int part) { - result = this.regexpCapture("([^\\[]+)(?:\\[([^\\]]*)\\])?", part) - } - - /** Gets the name of the token, such as `Member` from `Member[x]` */ - string getName() { result = this.getPart(1) } - - /** - * Gets the argument list, such as `1,2` from `Member[1,2]`, - * or has no result if there are no arguments. - */ - string getArgumentList() { result = this.getPart(2) } - - /** Gets the `n`th argument to this token, such as `x` or `y` from `Member[x,y]`. */ - string getArgument(int n) { result = this.getArgumentList().splitAt(",", n).trim() } - - /** Gets the `n`th argument to this `name` token, such as `x` or `y` from `Member[x,y]`. */ - pragma[nomagic] - string getArgument(string name, int n) { name = this.getName() and result = this.getArgument(n) } - - /** Gets an argument to this token, such as `x` or `y` from `Member[x,y]`. */ - string getAnArgument() { result = this.getArgument(_) } - - /** Gets an argument to this `name` token, such as `x` or `y` from `Member[x,y]`. */ - string getAnArgument(string name) { result = this.getArgument(name, _) } - - /** Gets the number of arguments to this token, such as 2 for `Member[x,y]` or zero for `ReturnValue`. */ - int getNumArgument() { result = count(int n | exists(this.getArgument(n))) } -} diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll index 6e2a462cc5a..da0bd5c8b7c 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll @@ -36,7 +36,6 @@ private import python private import DataFlowPublic private import DataFlowPrivate private import FlowSummaryImpl as FlowSummaryImpl -private import FlowSummaryImplSpecific as FlowSummaryImplSpecific private import semmle.python.internal.CachedStages private import semmle.python.dataflow.new.internal.TypeTracker::CallGraphConstruction as CallGraphConstruction @@ -49,13 +48,13 @@ newtype TParameterPosition = // since synthetic parameters are made for a synthetic summary callable, based on // what Argument positions they have flow for, we need to make sure we have such // parameter positions available. - FlowSummaryImplSpecific::ParsePositions::isParsedPositionalArgumentPosition(_, index) + FlowSummaryImpl::ParsePositions::isParsedPositionalArgumentPosition(_, index) } or TKeywordParameterPosition(string name) { name = any(Parameter p).getName() or // see comment for TPositionalParameterPosition - FlowSummaryImplSpecific::ParsePositions::isParsedKeywordArgumentPosition(_, name) + FlowSummaryImpl::ParsePositions::isParsedKeywordArgumentPosition(_, name) } or TStarArgsParameterPosition(int index) { // since `.getPosition` does not work for `*args`, we need *args parameter positions @@ -136,13 +135,13 @@ newtype TArgumentPosition = // since synthetic calls within a summarized callable could use a unique argument // position, we need to ensure we make these available (these are specified as // parameters in the flow-summary spec) - FlowSummaryImplSpecific::ParsePositions::isParsedPositionalParameterPosition(_, index) + FlowSummaryImpl::ParsePositions::isParsedPositionalParameterPosition(_, index) } or TKeywordArgumentPosition(string name) { exists(any(CallNode c).getArgByName(name)) or // see comment for TPositionalArgumentPosition - FlowSummaryImplSpecific::ParsePositions::isParsedKeywordParameterPosition(_, name) + FlowSummaryImpl::ParsePositions::isParsedKeywordParameterPosition(_, name) } or TStarArgsArgumentPosition(int index) { exists(Call c | c.getPositionalArg(index) instanceof Starred) @@ -1559,12 +1558,15 @@ private class SummaryReturnNode extends FlowSummaryNode, ReturnNode { } private class SummaryArgumentNode extends FlowSummaryNode, ArgumentNode { + private SummaryCall call_; + private ArgumentPosition pos_; + SummaryArgumentNode() { - FlowSummaryImpl::Private::summaryArgumentNode(_, this.getSummaryNode(), _) + FlowSummaryImpl::Private::summaryArgumentNode(call_.getReceiver(), this.getSummaryNode(), _) } override predicate argumentOf(DataFlowCall call, ArgumentPosition pos) { - FlowSummaryImpl::Private::summaryArgumentNode(call, this.getSummaryNode(), pos) + call = call_ and pos = pos_ } } @@ -1662,10 +1664,16 @@ private module OutNodes { } private class SummaryOutNode extends FlowSummaryNode, OutNode { - SummaryOutNode() { FlowSummaryImpl::Private::summaryOutNode(_, this.getSummaryNode(), _) } + private SummaryCall call; + private ReturnKind kind_; + + SummaryOutNode() { + FlowSummaryImpl::Private::summaryOutNode(call.getReceiver(), this.getSummaryNode(), kind_) + } override DataFlowCall getCall(ReturnKind kind) { - FlowSummaryImpl::Private::summaryOutNode(result, this.getSummaryNode(), kind) + result = call and + kind = kind_ } } } diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll index 839f147411e..f1f9668e856 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll @@ -1028,7 +1028,10 @@ predicate additionalLambdaFlowStep(Node nodeFrom, Node nodeTo, boolean preserves * by default as a heuristic. */ predicate allowParameterReturnInSelf(ParameterNode p) { - FlowSummaryImpl::Private::summaryAllowParameterReturnInSelf(p) + exists(DataFlowCallable c, ParameterPosition pos | + p.(ParameterNodeImpl).isParameterOf(c, pos) and + FlowSummaryImpl::Private::summaryAllowParameterReturnInSelf(c.asLibraryCallable(), pos) + ) } /** An approximated `Content`. */ diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/FlowSummaryImpl.qll b/python/ql/lib/semmle/python/dataflow/new/internal/FlowSummaryImpl.qll index 0aa17c521b4..055a7cee03b 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/FlowSummaryImpl.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/FlowSummaryImpl.qll @@ -1,1491 +1,196 @@ /** * Provides classes and predicates for defining flow summaries. - * - * The definitions in this file are language-independent, and language-specific - * definitions are passed in via the `DataFlowImplSpecific` and - * `FlowSummaryImplSpecific` modules. */ -private import FlowSummaryImplSpecific +private import python +private import codeql.dataflow.internal.FlowSummaryImpl +private import codeql.dataflow.internal.AccessPathSyntax as AccessPath +private import DataFlowImplSpecific as DataFlowImplSpecific private import DataFlowImplSpecific::Private private import DataFlowImplSpecific::Public -private import DataFlowImplCommon -private import codeql.util.Unit -/** Provides classes and predicates for defining flow summaries. */ -module Public { +module Input implements InputSig { + class SummarizedCallableBase = string; + + ArgumentPosition callbackSelfParameterPosition() { none() } + + ReturnKind getStandardReturnValueKind() { any() } + + string encodeParameterPosition(ParameterPosition pos) { + pos.isSelf() and result = "self" + or + exists(int i | + pos.isPositional(i) and + result = i.toString() + ) + or + exists(string name | + pos.isKeyword(name) and + result = name + ":" + ) + } + + string encodeArgumentPosition(ArgumentPosition pos) { + pos.isSelf() and result = "self" + or + exists(int i | + pos.isPositional(i) and + result = i.toString() + ) + or + exists(string name | + pos.isKeyword(name) and + result = name + ":" + ) + } + + string encodeContent(ContentSet cs, string arg) { + cs = TListElementContent() and result = "ListElement" and arg = "" + or + cs = TSetElementContent() and result = "SetElement" and arg = "" + or + exists(int index | + cs = TTupleElementContent(index) and result = "TupleElement" and arg = index.toString() + ) + or + exists(string key | + cs = TDictionaryElementContent(key) and result = "DictionaryElement" and arg = key + ) + or + cs = TDictionaryElementAnyContent() and result = "DictionaryElementAny" and arg = "" + or + exists(string attr | cs = TAttributeContent(attr) and result = "Attribute" and arg = attr) + } + + bindingset[token] + ParameterPosition decodeUnknownParameterPosition(AccessPath::AccessPathTokenBase token) { + // needed to support `Argument[x..y]` ranges + token.getName() = "Argument" and + result.isPositional(AccessPath::parseInt(token.getAnArgument())) + } + + bindingset[token] + ArgumentPosition decodeUnknownArgumentPosition(AccessPath::AccessPathTokenBase token) { + // needed to support `Parameter[x..y]` ranges + token.getName() = "Parameter" and + result.isPositional(AccessPath::parseInt(token.getAnArgument())) + } +} + +private import Make as Impl + +private module StepsInput implements Impl::Private::StepsInputSig { + DataFlowCall getACall(Public::SummarizedCallable sc) { + result = + TPotentialLibraryCall([ + sc.(LibraryCallable).getACall().asCfgNode(), + sc.(LibraryCallable).getACallSimple().asCfgNode() + ]) + } +} + +module Private { + import Impl::Private + + module Steps = Impl::Private::Steps; + + /** + * Provides predicates for constructing summary components. + */ + module SummaryComponent { + private import Impl::Private::SummaryComponent as SC + + predicate parameter = SC::parameter/1; + + predicate argument = SC::argument/1; + + predicate content = SC::content/1; + + predicate withoutContent = SC::withoutContent/1; + + predicate withContent = SC::withContent/1; + + /** Gets a summary component that represents a list element. */ + SummaryComponent listElement() { result = content(any(ListElementContent c)) } + + /** Gets a summary component that represents a set element. */ + SummaryComponent setElement() { result = content(any(SetElementContent c)) } + + /** Gets a summary component that represents a tuple element. */ + SummaryComponent tupleElement(int index) { + exists(TupleElementContent c | c.getIndex() = index and result = content(c)) + } + + /** Gets a summary component that represents a dictionary element. */ + SummaryComponent dictionaryElement(string key) { + exists(DictionaryElementContent c | c.getKey() = key and result = content(c)) + } + + /** Gets a summary component that represents a dictionary element at any key. */ + SummaryComponent dictionaryElementAny() { result = content(any(DictionaryElementAnyContent c)) } + + /** Gets a summary component that represents an attribute element. */ + SummaryComponent attribute(string attr) { + exists(AttributeContent c | c.getAttribute() = attr and result = content(c)) + } + + /** Gets a summary component that represents the return value of a call. */ + SummaryComponent return() { result = SC::return(any(ReturnKind rk)) } + } + + /** + * Provides predicates for constructing stacks of summary components. + */ + module SummaryComponentStack { + private import Impl::Private::SummaryComponentStack as SCS + + predicate singleton = SCS::singleton/1; + + predicate push = SCS::push/2; + + predicate argument = SCS::argument/1; + + /** Gets a singleton stack representing the return value of a call. */ + SummaryComponentStack return() { result = singleton(SummaryComponent::return()) } + } +} + +module Public = Impl::Public; + +module ParsePositions { private import Private - /** - * A component used in a flow summary. - * - * Either a parameter or an argument at a given position, a specific - * content type, or a return kind. - */ - class SummaryComponent extends TSummaryComponent { - /** Gets a textual representation of this component used for MaD models. */ - string getMadRepresentation() { - result = getMadRepresentationSpecific(this) - or - exists(ArgumentPosition pos | - this = TParameterSummaryComponent(pos) and - result = "Parameter[" + getArgumentPosition(pos) + "]" - ) - or - exists(ParameterPosition pos | - this = TArgumentSummaryComponent(pos) and - result = "Argument[" + getParameterPosition(pos) + "]" - ) - or - exists(string synthetic | - this = TSyntheticGlobalSummaryComponent(synthetic) and - result = "SyntheticGlobal[" + synthetic + "]" - ) - or - this = TReturnSummaryComponent(getReturnValueKind()) and result = "ReturnValue" - } - - /** Gets a textual representation of this summary component. */ - string toString() { result = this.getMadRepresentation() } + private predicate isParamBody(string body) { + exists(AccessPathToken tok | + tok.getName() = "Parameter" and + body = tok.getAnArgument() + ) } - /** Provides predicates for constructing summary components. */ - module SummaryComponent { - /** Gets a summary component for content `c`. */ - SummaryComponent content(ContentSet c) { result = TContentSummaryComponent(c) } - - /** Gets a summary component where data is not allowed to be stored in `c`. */ - SummaryComponent withoutContent(ContentSet c) { result = TWithoutContentSummaryComponent(c) } - - /** Gets a summary component where data must be stored in `c`. */ - SummaryComponent withContent(ContentSet c) { result = TWithContentSummaryComponent(c) } - - /** Gets a summary component for a parameter at position `pos`. */ - SummaryComponent parameter(ArgumentPosition pos) { result = TParameterSummaryComponent(pos) } - - /** Gets a summary component for an argument at position `pos`. */ - SummaryComponent argument(ParameterPosition pos) { result = TArgumentSummaryComponent(pos) } - - /** Gets a summary component for a return of kind `rk`. */ - SummaryComponent return(ReturnKind rk) { result = TReturnSummaryComponent(rk) } - - /** Gets a summary component for synthetic global `sg`. */ - SummaryComponent syntheticGlobal(SyntheticGlobal sg) { - result = TSyntheticGlobalSummaryComponent(sg) - } - - /** - * A synthetic global. This represents some form of global state, which - * summaries can read and write individually. - */ - abstract class SyntheticGlobal extends string { - bindingset[this] - SyntheticGlobal() { any() } - } + private predicate isArgBody(string body) { + exists(AccessPathToken tok | + tok.getName() = "Argument" and + body = tok.getAnArgument() + ) } - /** - * A (non-empty) stack of summary components. - * - * A stack is used to represent where data is read from (input) or where it - * is written to (output). For example, an input stack `[Field f, Argument 0]` - * means that data is read from field `f` from the `0`th argument, while an - * output stack `[Field g, Return]` means that data is written to the field - * `g` of the returned object. - */ - class SummaryComponentStack extends TSummaryComponentStack { - /** Gets the head of this stack. */ - SummaryComponent head() { - this = TSingletonSummaryComponentStack(result) or - this = TConsSummaryComponentStack(result, _) - } - - /** Gets the tail of this stack, if any. */ - SummaryComponentStack tail() { this = TConsSummaryComponentStack(_, result) } - - /** Gets the length of this stack. */ - int length() { - this = TSingletonSummaryComponentStack(_) and result = 1 - or - result = 1 + this.tail().length() - } - - /** Gets the stack obtained by dropping the first `i` elements, if any. */ - SummaryComponentStack drop(int i) { - i = 0 and result = this - or - result = this.tail().drop(i - 1) - } - - /** Holds if this stack contains summary component `c`. */ - predicate contains(SummaryComponent c) { c = this.drop(_).head() } - - /** Gets the bottom element of this stack. */ - SummaryComponent bottom() { - this = TSingletonSummaryComponentStack(result) or result = this.tail().bottom() - } - - /** Gets a textual representation of this stack used for MaD models. */ - string getMadRepresentation() { - exists(SummaryComponent head, SummaryComponentStack tail | - head = this.head() and - tail = this.tail() and - result = tail.getMadRepresentation() + "." + head.getMadRepresentation() - ) - or - exists(SummaryComponent c | - this = TSingletonSummaryComponentStack(c) and - result = c.getMadRepresentation() - ) - } - - /** Gets a textual representation of this stack. */ - string toString() { result = this.getMadRepresentation() } + predicate isParsedPositionalParameterPosition(string c, int i) { + isParamBody(c) and + i = AccessPath::parseInt(c) } - /** Provides predicates for constructing stacks of summary components. */ - module SummaryComponentStack { - /** Gets a singleton stack containing `c`. */ - SummaryComponentStack singleton(SummaryComponent c) { - result = TSingletonSummaryComponentStack(c) - } - - /** - * Gets the stack obtained by pushing `head` onto `tail`. - * - * Make sure to override `RequiredSummaryComponentStack::required()` in order - * to ensure that the constructed stack exists. - */ - SummaryComponentStack push(SummaryComponent head, SummaryComponentStack tail) { - result = TConsSummaryComponentStack(head, tail) - } - - /** Gets a singleton stack for an argument at position `pos`. */ - SummaryComponentStack argument(ParameterPosition pos) { - result = singleton(SummaryComponent::argument(pos)) - } - - /** Gets a singleton stack representing a return of kind `rk`. */ - SummaryComponentStack return(ReturnKind rk) { result = singleton(SummaryComponent::return(rk)) } + predicate isParsedKeywordParameterPosition(string c, string paramName) { + isParamBody(c) and + c = paramName + ":" } - /** - * A class that exists for QL technical reasons only (the IPA type used - * to represent component stacks needs to be bounded). - */ - class RequiredSummaryComponentStack extends Unit { - /** - * Holds if the stack obtained by pushing `head` onto `tail` is required. - */ - abstract predicate required(SummaryComponent head, SummaryComponentStack tail); + predicate isParsedPositionalArgumentPosition(string c, int i) { + isArgBody(c) and + i = AccessPath::parseInt(c) } - /** - * Gets the valid model origin values. - */ - private string getValidModelOrigin() { - result = - [ - "ai", // AI (machine learning) - "df", // Dataflow (model generator) - "tb", // Type based (model generator) - "hq", // Heuristic query - ] - } - - /** - * A class used to represent provenance values for MaD models. - * - * The provenance value is a string of the form `origin-verification` - * (or just `manual`), where `origin` is a value indicating the - * origin of the model, and `verification` is a value indicating, how - * the model was verified. - * - * Examples could be: - * - `df-generated`: A model produced by the model generator, but not verified by a human. - * - `ai-manual`: A model produced by AI, but verified by a human. - */ - class Provenance extends string { - private string verification; - - Provenance() { - exists(string origin | origin = getValidModelOrigin() | - this = origin + "-" + verification and - verification = ["manual", "generated"] - ) - or - this = verification and verification = "manual" - } - - /** - * Holds if this is a valid generated provenance value. - */ - predicate isGenerated() { verification = "generated" } - - /** - * Holds if this is a valid manual provenance value. - */ - predicate isManual() { verification = "manual" } - } - - /** A callable with a flow summary. */ - abstract class SummarizedCallable extends SummarizedCallableBase { - bindingset[this] - SummarizedCallable() { any() } - - /** - * Holds if data may flow from `input` to `output` through this callable. - * - * `preservesValue` indicates whether this is a value-preserving step - * or a taint-step. - * - * Input specifications are restricted to stacks that end with - * `SummaryComponent::argument(_)`, preceded by zero or more - * `SummaryComponent::return(_)` or `SummaryComponent::content(_)` components. - * - * Output specifications are restricted to stacks that end with - * `SummaryComponent::return(_)` or `SummaryComponent::argument(_)`. - * - * Output stacks ending with `SummaryComponent::return(_)` can be preceded by zero - * or more `SummaryComponent::content(_)` components. - * - * Output stacks ending with `SummaryComponent::argument(_)` can be preceded by an - * optional `SummaryComponent::parameter(_)` component, which in turn can be preceded - * by zero or more `SummaryComponent::content(_)` components. - */ - pragma[nomagic] - predicate propagatesFlow( - SummaryComponentStack input, SummaryComponentStack output, boolean preservesValue - ) { - none() - } - - /** - * Holds if there exists a generated summary that applies to this callable. - */ - final predicate hasGeneratedModel() { - exists(Provenance p | p.isGenerated() and this.hasProvenance(p)) - } - - /** - * Holds if all the summaries that apply to this callable are auto generated and not manually created. - * That is, only apply generated models, when there are no manual models. - */ - final predicate applyGeneratedModel() { - this.hasGeneratedModel() and - not this.hasManualModel() - } - - /** - * Holds if there exists a manual summary that applies to this callable. - */ - final predicate hasManualModel() { - exists(Provenance p | p.isManual() and this.hasProvenance(p)) - } - - /** - * Holds if there exists a manual summary that applies to this callable. - * Always apply manual models if they exist. - */ - final predicate applyManualModel() { this.hasManualModel() } - - /** - * Holds if there exists a summary that applies to this callable - * that has provenance `provenance`. - */ - predicate hasProvenance(Provenance provenance) { provenance = "manual" } - } - - /** - * A callable where there is no flow via the callable. - */ - class NeutralSummaryCallable extends NeutralCallable { - NeutralSummaryCallable() { this.getKind() = "summary" } - } - - /** - * A callable that has a neutral model. - */ - class NeutralCallable extends NeutralCallableBase { - private string kind; - private Provenance provenance; - - NeutralCallable() { neutralElement(this, kind, provenance) } - - /** - * Holds if the neutral is auto generated. - */ - final predicate hasGeneratedModel() { provenance.isGenerated() } - - /** - * Holds if there exists a manual neutral that applies to this callable. - */ - final predicate hasManualModel() { provenance.isManual() } - - /** - * Holds if the neutral has provenance `p`. - */ - predicate hasProvenance(Provenance p) { p = provenance } - - /** - * Gets the kind of the neutral. - */ - string getKind() { result = kind } - } -} - -/** - * Provides predicates for compiling flow summaries down to atomic local steps, - * read steps, and store steps. - */ -module Private { - private import Public - import AccessPathSyntax - - newtype TSummaryComponent = - TContentSummaryComponent(ContentSet c) or - TParameterSummaryComponent(ArgumentPosition pos) or - TArgumentSummaryComponent(ParameterPosition pos) or - TReturnSummaryComponent(ReturnKind rk) or - TSyntheticGlobalSummaryComponent(SummaryComponent::SyntheticGlobal sg) or - TWithoutContentSummaryComponent(ContentSet c) or - TWithContentSummaryComponent(ContentSet c) - - private TParameterSummaryComponent callbackSelfParam() { - result = TParameterSummaryComponent(callbackSelfParameterPosition()) - } - - newtype TSummaryComponentStack = - TSingletonSummaryComponentStack(SummaryComponent c) or - TConsSummaryComponentStack(SummaryComponent head, SummaryComponentStack tail) { - any(RequiredSummaryComponentStack x).required(head, tail) - or - any(RequiredSummaryComponentStack x).required(TParameterSummaryComponent(_), tail) and - head = callbackSelfParam() - or - derivedFluentFlowPush(_, _, _, head, tail, _) - } - - pragma[nomagic] - private predicate summary( - SummarizedCallable c, SummaryComponentStack input, SummaryComponentStack output, - boolean preservesValue - ) { - c.propagatesFlow(input, output, preservesValue) - or - // observe side effects of callbacks on input arguments - c.propagatesFlow(output, input, preservesValue) and - preservesValue = true and - isCallbackParameter(input) and - isContentOfArgument(output, _) - or - // flow from the receiver of a callback into the instance-parameter - exists(SummaryComponentStack s, SummaryComponentStack callbackRef | - c.propagatesFlow(s, _, _) or c.propagatesFlow(_, s, _) - | - callbackRef = s.drop(_) and - (isCallbackParameter(callbackRef) or callbackRef.head() = TReturnSummaryComponent(_)) and - input = callbackRef.tail() and - output = TConsSummaryComponentStack(callbackSelfParam(), input) and - preservesValue = true - ) - or - exists(SummaryComponentStack arg, SummaryComponentStack return | - derivedFluentFlow(c, input, arg, return, preservesValue) - | - arg.length() = 1 and - output = return - or - exists(SummaryComponent head, SummaryComponentStack tail | - derivedFluentFlowPush(c, input, arg, head, tail, 0) and - output = SummaryComponentStack::push(head, tail) - ) - ) - or - // Chain together summaries where values get passed into callbacks along the way - exists(SummaryComponentStack mid, boolean preservesValue1, boolean preservesValue2 | - c.propagatesFlow(input, mid, preservesValue1) and - c.propagatesFlow(mid, output, preservesValue2) and - mid.drop(mid.length() - 2) = - SummaryComponentStack::push(TParameterSummaryComponent(_), - SummaryComponentStack::singleton(TArgumentSummaryComponent(_))) and - preservesValue = preservesValue1.booleanAnd(preservesValue2) - ) - } - - /** - * Holds if `c` has a flow summary from `input` to `arg`, where `arg` - * writes to (contents of) arguments at position `pos`, and `c` has a - * value-preserving flow summary from the arguments at position `pos` - * to a return value (`return`). - * - * In such a case, we derive flow from `input` to (contents of) the return - * value. - * - * As an example, this simplifies modeling of fluent methods: - * for `StringBuilder.append(x)` with a specified value flow from qualifier to - * return value and taint flow from argument 0 to the qualifier, then this - * allows us to infer taint flow from argument 0 to the return value. - */ - pragma[nomagic] - private predicate derivedFluentFlow( - SummarizedCallable c, SummaryComponentStack input, SummaryComponentStack arg, - SummaryComponentStack return, boolean preservesValue - ) { - exists(ParameterPosition pos | - summary(c, input, arg, preservesValue) and - isContentOfArgument(arg, pos) and - summary(c, SummaryComponentStack::argument(pos), return, true) and - return.bottom() = TReturnSummaryComponent(_) - ) - } - - pragma[nomagic] - private predicate derivedFluentFlowPush( - SummarizedCallable c, SummaryComponentStack input, SummaryComponentStack arg, - SummaryComponent head, SummaryComponentStack tail, int i - ) { - derivedFluentFlow(c, input, arg, tail, _) and - head = arg.drop(i).head() and - i = arg.length() - 2 - or - exists(SummaryComponent head0, SummaryComponentStack tail0 | - derivedFluentFlowPush(c, input, arg, head0, tail0, i + 1) and - head = arg.drop(i).head() and - tail = SummaryComponentStack::push(head0, tail0) - ) - } - - private predicate isCallbackParameter(SummaryComponentStack s) { - s.head() = TParameterSummaryComponent(_) and exists(s.tail()) - } - - private predicate isContentOfArgument(SummaryComponentStack s, ParameterPosition pos) { - s.head() = TContentSummaryComponent(_) and isContentOfArgument(s.tail(), pos) - or - s = SummaryComponentStack::argument(pos) - } - - private predicate outputState(SummarizedCallable c, SummaryComponentStack s) { - summary(c, _, s, _) - or - exists(SummaryComponentStack out | - outputState(c, out) and - out.head() = TContentSummaryComponent(_) and - s = out.tail() - ) - or - // Add the argument node corresponding to the requested post-update node - inputState(c, s) and isCallbackParameter(s) - } - - private predicate inputState(SummarizedCallable c, SummaryComponentStack s) { - summary(c, s, _, _) - or - exists(SummaryComponentStack inp | inputState(c, inp) and s = inp.tail()) - or - exists(SummaryComponentStack out | - outputState(c, out) and - out.head() = TParameterSummaryComponent(_) and - s = out.tail() - ) - or - // Add the post-update node corresponding to the requested argument node - outputState(c, s) and isCallbackParameter(s) - or - // Add the parameter node for parameter side-effects - outputState(c, s) and s = SummaryComponentStack::argument(_) - } - - private newtype TSummaryNodeState = - TSummaryNodeInputState(SummaryComponentStack s) { inputState(_, s) } or - TSummaryNodeOutputState(SummaryComponentStack s) { outputState(_, s) } - - /** - * A state used to break up (complex) flow summaries into atomic flow steps. - * For a flow summary - * - * ```ql - * propagatesFlow( - * SummaryComponentStack input, SummaryComponentStack output, boolean preservesValue - * ) - * ``` - * - * the following states are used: - * - * - `TSummaryNodeInputState(SummaryComponentStack s)`: - * this state represents that the components in `s` _have been read_ from the - * input. - * - `TSummaryNodeOutputState(SummaryComponentStack s)`: - * this state represents that the components in `s` _remain to be written_ to - * the output. - */ - private class SummaryNodeState extends TSummaryNodeState { - /** Holds if this state is a valid input state for `c`. */ - pragma[nomagic] - predicate isInputState(SummarizedCallable c, SummaryComponentStack s) { - this = TSummaryNodeInputState(s) and - inputState(c, s) - } - - /** Holds if this state is a valid output state for `c`. */ - pragma[nomagic] - predicate isOutputState(SummarizedCallable c, SummaryComponentStack s) { - this = TSummaryNodeOutputState(s) and - outputState(c, s) - } - - /** Gets a textual representation of this state. */ - string toString() { - exists(SummaryComponentStack s | - this = TSummaryNodeInputState(s) and - result = "read: " + s - ) - or - exists(SummaryComponentStack s | - this = TSummaryNodeOutputState(s) and - result = "to write: " + s - ) - } - } - - private newtype TSummaryNode = - TSummaryInternalNode(SummarizedCallable c, SummaryNodeState state) { - summaryNodeRange(c, state) - } or - TSummaryParameterNode(SummarizedCallable c, ParameterPosition pos) { - summaryParameterNodeRange(c, pos) - } - - abstract class SummaryNode extends TSummaryNode { - abstract string toString(); - - abstract SummarizedCallable getSummarizedCallable(); - } - - private class SummaryInternalNode extends SummaryNode, TSummaryInternalNode { - private SummarizedCallable c; - private SummaryNodeState state; - - SummaryInternalNode() { this = TSummaryInternalNode(c, state) } - - override string toString() { result = "[summary] " + state + " in " + c } - - override SummarizedCallable getSummarizedCallable() { result = c } - } - - private class SummaryParamNode extends SummaryNode, TSummaryParameterNode { - private SummarizedCallable c; - private ParameterPosition pos; - - SummaryParamNode() { this = TSummaryParameterNode(c, pos) } - - override string toString() { result = "[summary param] " + pos + " in " + c } - - override SummarizedCallable getSummarizedCallable() { result = c } - } - - /** - * Holds if `state` represents having read from a parameter at position - * `pos` in `c`. In this case we are not synthesizing a data-flow node, - * but instead assume that a relevant parameter node already exists. - */ - private predicate parameterReadState( - SummarizedCallable c, SummaryNodeState state, ParameterPosition pos - ) { - state.isInputState(c, SummaryComponentStack::argument(pos)) - } - - /** - * Holds if a synthesized summary node is needed for the state `state` in summarized - * callable `c`. - */ - private predicate summaryNodeRange(SummarizedCallable c, SummaryNodeState state) { - state.isInputState(c, _) and - not parameterReadState(c, state, _) - or - state.isOutputState(c, _) - } - - pragma[noinline] - private SummaryNode summaryNodeInputState(SummarizedCallable c, SummaryComponentStack s) { - exists(SummaryNodeState state | state.isInputState(c, s) | - result = TSummaryInternalNode(c, state) - or - exists(ParameterPosition pos | - parameterReadState(c, state, pos) and - result = TSummaryParameterNode(c, pos) - ) - ) - } - - pragma[noinline] - private SummaryNode summaryNodeOutputState(SummarizedCallable c, SummaryComponentStack s) { - exists(SummaryNodeState state | - state.isOutputState(c, s) and - result = TSummaryInternalNode(c, state) - ) - } - - /** - * Holds if a write targets `post`, which is a post-update node for a - * parameter at position `pos` in `c`. - */ - private predicate isParameterPostUpdate( - SummaryNode post, SummarizedCallable c, ParameterPosition pos - ) { - post = summaryNodeOutputState(c, SummaryComponentStack::argument(pos)) - } - - /** Holds if a parameter node at position `pos` is required for `c`. */ - private predicate summaryParameterNodeRange(SummarizedCallable c, ParameterPosition pos) { - parameterReadState(c, _, pos) - or - // Same as `isParameterPostUpdate(_, c, pos)`, but can be used in a negative context - any(SummaryNodeState state).isOutputState(c, SummaryComponentStack::argument(pos)) - } - - private predicate callbackOutput( - SummarizedCallable c, SummaryComponentStack s, SummaryNode receiver, ReturnKind rk - ) { - any(SummaryNodeState state).isInputState(c, s) and - s.head() = TReturnSummaryComponent(rk) and - receiver = summaryNodeInputState(c, s.tail()) - } - - private predicate callbackInput( - SummarizedCallable c, SummaryComponentStack s, SummaryNode receiver, ArgumentPosition pos - ) { - any(SummaryNodeState state).isOutputState(c, s) and - s.head() = TParameterSummaryComponent(pos) and - receiver = summaryNodeInputState(c, s.tail()) - } - - /** Holds if a call targeting `receiver` should be synthesized inside `c`. */ - predicate summaryCallbackRange(SummarizedCallable c, SummaryNode receiver) { - callbackOutput(c, _, receiver, _) - or - callbackInput(c, _, receiver, _) - } - - /** - * Gets the type of synthesized summary node `n`. - * - * The type is computed based on the language-specific predicates - * `getContentType()`, `getReturnType()`, `getCallbackParameterType()`, and - * `getCallbackReturnType()`. - */ - DataFlowType summaryNodeType(SummaryNode n) { - exists(SummaryNode pre | - summaryPostUpdateNode(n, pre) and - result = summaryNodeType(pre) - ) - or - exists(SummarizedCallable c, SummaryComponentStack s, SummaryComponent head | head = s.head() | - n = summaryNodeInputState(c, s) and - ( - exists(ContentSet cont | result = getContentType(cont) | - head = TContentSummaryComponent(cont) or - head = TWithContentSummaryComponent(cont) - ) - or - head = TWithoutContentSummaryComponent(_) and - result = summaryNodeType(summaryNodeInputState(c, s.tail())) - or - exists(ReturnKind rk | - head = TReturnSummaryComponent(rk) and - result = - getCallbackReturnType(summaryNodeType(summaryNodeInputState(pragma[only_bind_out](c), - s.tail())), rk) - ) - or - exists(SummaryComponent::SyntheticGlobal sg | - head = TSyntheticGlobalSummaryComponent(sg) and - result = getSyntheticGlobalType(sg) - ) - or - exists(ParameterPosition pos | - head = TArgumentSummaryComponent(pos) and - result = getParameterType(c, pos) - ) - ) - or - n = summaryNodeOutputState(c, s) and - ( - exists(ContentSet cont | - head = TContentSummaryComponent(cont) and result = getContentType(cont) - ) - or - s.length() = 1 and - exists(ReturnKind rk | - head = TReturnSummaryComponent(rk) and - result = getReturnType(c, rk) - ) - or - exists(ArgumentPosition pos | head = TParameterSummaryComponent(pos) | - result = - getCallbackParameterType(summaryNodeType(summaryNodeInputState(pragma[only_bind_out](c), - s.tail())), pos) - ) - or - exists(SummaryComponent::SyntheticGlobal sg | - head = TSyntheticGlobalSummaryComponent(sg) and - result = getSyntheticGlobalType(sg) - ) - ) - ) - } - - /** Holds if summary node `p` is a parameter with position `pos`. */ - predicate summaryParameterNode(SummaryNode p, ParameterPosition pos) { - p = TSummaryParameterNode(_, pos) - } - - /** Holds if summary node `out` contains output of kind `rk` from call `c`. */ - predicate summaryOutNode(DataFlowCall c, SummaryNode out, ReturnKind rk) { - exists(SummarizedCallable callable, SummaryComponentStack s, SummaryNode receiver | - callbackOutput(callable, s, receiver, rk) and - out = summaryNodeInputState(callable, s) and - c = summaryDataFlowCall(receiver) - ) - } - - /** Holds if summary node `arg` is at position `pos` in the call `c`. */ - predicate summaryArgumentNode(DataFlowCall c, SummaryNode arg, ArgumentPosition pos) { - exists(SummarizedCallable callable, SummaryComponentStack s, SummaryNode receiver | - callbackInput(callable, s, receiver, pos) and - arg = summaryNodeOutputState(callable, s) and - c = summaryDataFlowCall(receiver) - ) - } - - /** Holds if summary node `post` is a post-update node with pre-update node `pre`. */ - predicate summaryPostUpdateNode(SummaryNode post, SummaryNode pre) { - exists(SummarizedCallable c, ParameterPosition pos | - isParameterPostUpdate(post, c, pos) and - pre = TSummaryParameterNode(c, pos) - ) - or - exists(SummarizedCallable callable, SummaryComponentStack s | - callbackInput(callable, s, _, _) and - pre = summaryNodeOutputState(callable, s) and - post = summaryNodeInputState(callable, s) - ) - } - - /** Holds if summary node `ret` is a return node of kind `rk`. */ - predicate summaryReturnNode(SummaryNode ret, ReturnKind rk) { - exists(SummaryComponentStack s | - ret = summaryNodeOutputState(_, s) and - s = TSingletonSummaryComponentStack(TReturnSummaryComponent(rk)) - ) - } - - /** - * Holds if flow is allowed to pass from parameter `p`, to a return - * node, and back out to `p`. - */ - predicate summaryAllowParameterReturnInSelf(ParamNode p) { - exists(SummarizedCallable c, ParameterPosition ppos | - p.isParameterOf(inject(c), pragma[only_bind_into](ppos)) - | - exists(SummaryComponentStack inputContents, SummaryComponentStack outputContents | - summary(c, inputContents, outputContents, _) and - inputContents.bottom() = pragma[only_bind_into](TArgumentSummaryComponent(ppos)) and - outputContents.bottom() = pragma[only_bind_into](TArgumentSummaryComponent(ppos)) - ) - ) - } - - /** Provides a compilation of flow summaries to atomic data-flow steps. */ - module Steps { - /** - * Holds if there is a local step from `pred` to `succ`, which is synthesized - * from a flow summary. - */ - predicate summaryLocalStep(SummaryNode pred, SummaryNode succ, boolean preservesValue) { - exists( - SummarizedCallable c, SummaryComponentStack inputContents, - SummaryComponentStack outputContents - | - summary(c, inputContents, outputContents, preservesValue) and - pred = summaryNodeInputState(c, inputContents) and - succ = summaryNodeOutputState(c, outputContents) - | - preservesValue = true - or - preservesValue = false and not summary(c, inputContents, outputContents, true) - ) - or - exists(SummarizedCallable c, SummaryComponentStack s | - pred = summaryNodeInputState(c, s.tail()) and - succ = summaryNodeInputState(c, s) and - s.head() = [SummaryComponent::withContent(_), SummaryComponent::withoutContent(_)] and - preservesValue = true - ) - } - - /** - * Holds if there is a read step of content `c` from `pred` to `succ`, which - * is synthesized from a flow summary. - */ - predicate summaryReadStep(SummaryNode pred, ContentSet c, SummaryNode succ) { - exists(SummarizedCallable sc, SummaryComponentStack s | - pred = summaryNodeInputState(sc, s.tail()) and - succ = summaryNodeInputState(sc, s) and - SummaryComponent::content(c) = s.head() - ) - } - - /** - * Holds if there is a store step of content `c` from `pred` to `succ`, which - * is synthesized from a flow summary. - */ - predicate summaryStoreStep(SummaryNode pred, ContentSet c, SummaryNode succ) { - exists(SummarizedCallable sc, SummaryComponentStack s | - pred = summaryNodeOutputState(sc, s) and - succ = summaryNodeOutputState(sc, s.tail()) and - SummaryComponent::content(c) = s.head() - ) - } - - /** - * Holds if there is a jump step from `pred` to `succ`, which is synthesized - * from a flow summary. - */ - predicate summaryJumpStep(SummaryNode pred, SummaryNode succ) { - exists(SummaryComponentStack s | - s = SummaryComponentStack::singleton(SummaryComponent::syntheticGlobal(_)) and - pred = summaryNodeOutputState(_, s) and - succ = summaryNodeInputState(_, s) - ) - } - - /** - * Holds if values stored inside content `c` are cleared at `n`. `n` is a - * synthesized summary node, so in order for values to be cleared at calls - * to the relevant method, it is important that flow does not pass over - * the argument, either via use-use flow or def-use flow. - * - * Example: - * - * ``` - * a.b = taint; - * a.clearB(); // assume we have a flow summary for `clearB` that clears `b` on the qualifier - * sink(a.b); - * ``` - * - * In the above, flow should not pass from `a` on the first line (or the second - * line) to `a` on the third line. Instead, there will be synthesized flow from - * `a` on line 2 to the post-update node for `a` on that line (via an intermediate - * node where field `b` is cleared). - */ - predicate summaryClearsContent(SummaryNode n, ContentSet c) { - exists(SummarizedCallable sc, SummaryNodeState state, SummaryComponentStack stack | - n = TSummaryInternalNode(sc, state) and - state.isInputState(sc, stack) and - stack.head() = SummaryComponent::withoutContent(c) - ) - } - - /** - * Holds if the value that is being tracked is expected to be stored inside - * content `c` at `n`. - */ - predicate summaryExpectsContent(SummaryNode n, ContentSet c) { - exists(SummarizedCallable sc, SummaryNodeState state, SummaryComponentStack stack | - n = TSummaryInternalNode(sc, state) and - state.isInputState(sc, stack) and - stack.head() = SummaryComponent::withContent(c) - ) - } - - pragma[noinline] - private predicate viableParam( - DataFlowCall call, SummarizedCallable sc, ParameterPosition ppos, SummaryParamNode p - ) { - exists(DataFlowCallable c | - c = inject(sc) and - p = TSummaryParameterNode(sc, ppos) and - c = viableCallable(call) - ) - } - - pragma[nomagic] - private SummaryParamNode summaryArgParam(DataFlowCall call, ArgNode arg, SummarizedCallable sc) { - exists(ParameterPosition ppos | - argumentPositionMatch(call, arg, ppos) and - viableParam(call, sc, ppos, result) - ) - } - - /** - * Holds if `p` can reach `n` in a summarized callable, using only value-preserving - * local steps. `clearsOrExpects` records whether any node on the path from `p` to - * `n` either clears or expects contents. - */ - private predicate paramReachesLocal(SummaryParamNode p, SummaryNode n, boolean clearsOrExpects) { - viableParam(_, _, _, p) and - n = p and - clearsOrExpects = false - or - exists(SummaryNode mid, boolean clearsOrExpectsMid | - paramReachesLocal(p, mid, clearsOrExpectsMid) and - summaryLocalStep(mid, n, true) and - if - summaryClearsContent(n, _) or - summaryExpectsContent(n, _) - then clearsOrExpects = true - else clearsOrExpects = clearsOrExpectsMid - ) - } - - /** - * Holds if use-use flow starting from `arg` should be prohibited. - * - * This is the case when `arg` is the argument of a call that targets a - * flow summary where the corresponding parameter either clears contents - * or expects contents. - */ - pragma[nomagic] - predicate prohibitsUseUseFlow(ArgNode arg, SummarizedCallable sc) { - exists(SummaryParamNode p, ParameterPosition ppos, SummaryNode ret | - paramReachesLocal(p, ret, true) and - p = summaryArgParam(_, arg, sc) and - p = TSummaryParameterNode(_, pragma[only_bind_into](ppos)) and - isParameterPostUpdate(ret, _, pragma[only_bind_into](ppos)) - ) - } - - pragma[nomagic] - private predicate summaryReturnNodeExt(SummaryNode ret, ReturnKindExt rk) { - summaryReturnNode(ret, rk.(ValueReturnKind).getKind()) - or - exists(SummaryParamNode p, SummaryNode pre, ParameterPosition pos | - paramReachesLocal(p, pre, _) and - summaryPostUpdateNode(ret, pre) and - p = TSummaryParameterNode(_, pos) and - rk.(ParamUpdateReturnKind).getPosition() = pos - ) - } - - bindingset[ret] - private SummaryParamNode summaryArgParamRetOut( - ArgNode arg, SummaryNode ret, OutNodeExt out, SummarizedCallable sc - ) { - exists(DataFlowCall call, ReturnKindExt rk | - result = summaryArgParam(call, arg, sc) and - summaryReturnNodeExt(ret, pragma[only_bind_into](rk)) and - out = pragma[only_bind_into](rk).getAnOutNode(call) - ) - } - - /** - * Holds if `arg` flows to `out` using a simple value-preserving flow - * summary, that is, a flow summary without reads and stores. - * - * NOTE: This step should not be used in global data-flow/taint-tracking, but may - * be useful to include in the exposed local data-flow/taint-tracking relations. - */ - predicate summaryThroughStepValue(ArgNode arg, Node out, SummarizedCallable sc) { - exists(ReturnKind rk, SummaryNode ret, DataFlowCall call | - summaryLocalStep(summaryArgParam(call, arg, sc), ret, true) and - summaryReturnNode(ret, pragma[only_bind_into](rk)) and - out = getAnOutNode(call, pragma[only_bind_into](rk)) - ) - } - - /** - * Holds if `arg` flows to `out` using a simple flow summary involving taint - * step, that is, a flow summary without reads and stores. - * - * NOTE: This step should not be used in global data-flow/taint-tracking, but may - * be useful to include in the exposed local data-flow/taint-tracking relations. - */ - predicate summaryThroughStepTaint(ArgNode arg, Node out, SummarizedCallable sc) { - exists(SummaryNode ret | - summaryLocalStep(summaryArgParamRetOut(arg, ret, out, sc), ret, false) - ) - } - - /** - * Holds if there is a read(+taint) of `c` from `arg` to `out` using a - * flow summary. - * - * NOTE: This step should not be used in global data-flow/taint-tracking, but may - * be useful to include in the exposed local data-flow/taint-tracking relations. - */ - predicate summaryGetterStep(ArgNode arg, ContentSet c, Node out, SummarizedCallable sc) { - exists(SummaryNode mid, SummaryNode ret | - summaryReadStep(summaryArgParamRetOut(arg, ret, out, sc), c, mid) and - summaryLocalStep(mid, ret, _) - ) - } - - /** - * Holds if there is a (taint+)store of `arg` into content `c` of `out` using a - * flow summary. - * - * NOTE: This step should not be used in global data-flow/taint-tracking, but may - * be useful to include in the exposed local data-flow/taint-tracking relations. - */ - predicate summarySetterStep(ArgNode arg, ContentSet c, Node out, SummarizedCallable sc) { - exists(SummaryNode mid, SummaryNode ret | - summaryLocalStep(summaryArgParamRetOut(arg, ret, out, sc), mid, _) and - summaryStoreStep(mid, c, ret) - ) - } - } - - /** - * Provides a means of translating externally (e.g., MaD) defined flow - * summaries into a `SummarizedCallable`s. - */ - module External { - /** Holds if `spec` is a relevant external specification. */ - private predicate relevantSpec(string spec) { - summaryElement(_, spec, _, _, _) or - summaryElement(_, _, spec, _, _) or - sourceElement(_, spec, _, _) or - sinkElement(_, spec, _, _) - } - - private class AccessPathRange extends AccessPath::Range { - AccessPathRange() { relevantSpec(this) } - } - - /** Holds if specification component `token` parses as parameter `pos`. */ - predicate parseParam(AccessPathToken token, ArgumentPosition pos) { - token.getName() = "Parameter" and - pos = parseParamBody(token.getAnArgument()) - } - - /** Holds if specification component `token` parses as argument `pos`. */ - predicate parseArg(AccessPathToken token, ParameterPosition pos) { - token.getName() = "Argument" and - pos = parseArgBody(token.getAnArgument()) - } - - /** Holds if specification component `token` parses as synthetic global `sg`. */ - predicate parseSynthGlobal(AccessPathToken token, string sg) { - token.getName() = "SyntheticGlobal" and - sg = token.getAnArgument() - } - - private class SyntheticGlobalFromAccessPath extends SummaryComponent::SyntheticGlobal { - SyntheticGlobalFromAccessPath() { parseSynthGlobal(_, this) } - } - - private SummaryComponent interpretComponent(AccessPathToken token) { - exists(ParameterPosition pos | - parseArg(token, pos) and result = SummaryComponent::argument(pos) - ) - or - exists(ArgumentPosition pos | - parseParam(token, pos) and result = SummaryComponent::parameter(pos) - ) - or - token = "ReturnValue" and result = SummaryComponent::return(getReturnValueKind()) - or - exists(string sg | - parseSynthGlobal(token, sg) and result = SummaryComponent::syntheticGlobal(sg) - ) - or - result = interpretComponentSpecific(token) - } - - /** - * Holds if `spec` specifies summary component stack `stack`. - */ - predicate interpretSpec(AccessPath spec, SummaryComponentStack stack) { - interpretSpec(spec, spec.getNumToken(), stack) - } - - /** Holds if the first `n` tokens of `spec` resolves to `stack`. */ - private predicate interpretSpec(AccessPath spec, int n, SummaryComponentStack stack) { - n = 1 and - stack = SummaryComponentStack::singleton(interpretComponent(spec.getToken(0))) - or - exists(SummaryComponent head, SummaryComponentStack tail | - interpretSpec(spec, n, head, tail) and - stack = SummaryComponentStack::push(head, tail) - ) - } - - /** Holds if the first `n` tokens of `spec` resolves to `head` followed by `tail` */ - private predicate interpretSpec( - AccessPath spec, int n, SummaryComponent head, SummaryComponentStack tail - ) { - interpretSpec(spec, n - 1, tail) and - head = interpretComponent(spec.getToken(n - 1)) - } - - private class MkStack extends RequiredSummaryComponentStack { - override predicate required(SummaryComponent head, SummaryComponentStack tail) { - interpretSpec(_, _, head, tail) - } - } - - private class SummarizedCallableExternal extends SummarizedCallable { - SummarizedCallableExternal() { summaryElement(this, _, _, _, _) } - - private predicate relevantSummaryElementGenerated( - AccessPath inSpec, AccessPath outSpec, string kind - ) { - exists(Provenance provenance | - provenance.isGenerated() and - summaryElement(this, inSpec, outSpec, kind, provenance) - ) and - not this.applyManualModel() - } - - private predicate relevantSummaryElement(AccessPath inSpec, AccessPath outSpec, string kind) { - exists(Provenance provenance | - provenance.isManual() and - summaryElement(this, inSpec, outSpec, kind, provenance) - ) - or - this.relevantSummaryElementGenerated(inSpec, outSpec, kind) - } - - override predicate propagatesFlow( - SummaryComponentStack input, SummaryComponentStack output, boolean preservesValue - ) { - exists(AccessPath inSpec, AccessPath outSpec, string kind | - this.relevantSummaryElement(inSpec, outSpec, kind) and - interpretSpec(inSpec, input) and - interpretSpec(outSpec, output) - | - kind = "value" and preservesValue = true - or - kind = "taint" and preservesValue = false - ) - } - - override predicate hasProvenance(Provenance provenance) { - summaryElement(this, _, _, _, provenance) - } - } - - /** Holds if component `c` of specification `spec` cannot be parsed. */ - predicate invalidSpecComponent(AccessPath spec, string c) { - c = spec.getToken(_) and - not exists(interpretComponent(c)) - } - - /** Holds if `provenance` is not a valid provenance value. */ - bindingset[provenance] - predicate invalidProvenance(string provenance) { not provenance instanceof Provenance } - - /** - * Holds if token `part` of specification `spec` has an invalid index. - * E.g., `Argument[-1]`. - */ - predicate invalidIndexComponent(AccessPath spec, AccessPathToken part) { - part = spec.getToken(_) and - part.getName() = ["Parameter", "Argument"] and - AccessPath::parseInt(part.getArgumentList()) < 0 - } - - private predicate inputNeedsReference(AccessPathToken c) { - c.getName() = "Argument" or - inputNeedsReferenceSpecific(c) - } - - private predicate outputNeedsReference(AccessPathToken c) { - c.getName() = ["Argument", "ReturnValue"] or - outputNeedsReferenceSpecific(c) - } - - private predicate sourceElementRef(InterpretNode ref, AccessPath output, string kind) { - exists(SourceOrSinkElement e | - sourceElement(e, output, kind, _) and - if outputNeedsReference(output.getToken(0)) - then e = ref.getCallTarget() - else e = ref.asElement() - ) - } - - private predicate sinkElementRef(InterpretNode ref, AccessPath input, string kind) { - exists(SourceOrSinkElement e | - sinkElement(e, input, kind, _) and - if inputNeedsReference(input.getToken(0)) - then e = ref.getCallTarget() - else e = ref.asElement() - ) - } - - /** Holds if the first `n` tokens of `output` resolve to the given interpretation. */ - private predicate interpretOutput( - AccessPath output, int n, InterpretNode ref, InterpretNode node - ) { - sourceElementRef(ref, output, _) and - n = 0 and - ( - if output = "" - then - // Allow language-specific interpretation of the empty access path - interpretOutputSpecific("", ref, node) - else node = ref - ) - or - exists(InterpretNode mid, AccessPathToken c | - interpretOutput(output, n - 1, ref, mid) and - c = output.getToken(n - 1) - | - exists(ArgumentPosition apos, ParameterPosition ppos | - node.asNode().(PostUpdateNode).getPreUpdateNode().(ArgNode).argumentOf(mid.asCall(), apos) and - parameterMatch(ppos, apos) - | - c = "Argument" or parseArg(c, ppos) - ) - or - exists(ArgumentPosition apos, ParameterPosition ppos | - node.asNode().(ParamNode).isParameterOf(mid.asCallable(), ppos) and - parameterMatch(ppos, apos) - | - c = "Parameter" or parseParam(c, apos) - ) - or - c = "ReturnValue" and - node.asNode() = getAnOutNodeExt(mid.asCall(), TValueReturn(getReturnValueKind())) - or - interpretOutputSpecific(c, mid, node) - ) - } - - /** Holds if the first `n` tokens of `input` resolve to the given interpretation. */ - private predicate interpretInput(AccessPath input, int n, InterpretNode ref, InterpretNode node) { - sinkElementRef(ref, input, _) and - n = 0 and - ( - if input = "" - then - // Allow language-specific interpretation of the empty access path - interpretInputSpecific("", ref, node) - else node = ref - ) - or - exists(InterpretNode mid, AccessPathToken c | - interpretInput(input, n - 1, ref, mid) and - c = input.getToken(n - 1) - | - exists(ArgumentPosition apos, ParameterPosition ppos | - node.asNode().(ArgNode).argumentOf(mid.asCall(), apos) and - parameterMatch(ppos, apos) - | - c = "Argument" or parseArg(c, ppos) - ) - or - exists(ReturnNodeExt ret | - c = "ReturnValue" and - ret = node.asNode() and - ret.getKind().(ValueReturnKind).getKind() = getReturnValueKind() and - mid.asCallable() = getNodeEnclosingCallable(ret) - ) - or - interpretInputSpecific(c, mid, node) - ) - } - - /** - * Holds if `node` is specified as a source with the given kind in a MaD flow - * model. - */ - predicate isSourceNode(InterpretNode node, string kind) { - exists(InterpretNode ref, AccessPath output | - sourceElementRef(ref, output, kind) and - interpretOutput(output, output.getNumToken(), ref, node) - ) - } - - /** - * Holds if `node` is specified as a sink with the given kind in a MaD flow - * model. - */ - predicate isSinkNode(InterpretNode node, string kind) { - exists(InterpretNode ref, AccessPath input | - sinkElementRef(ref, input, kind) and - interpretInput(input, input.getNumToken(), ref, node) - ) - } - } - - /** Provides a query predicate for outputting a set of relevant flow summaries. */ - module TestOutput { - /** A flow summary to include in the `summary/1` query predicate. */ - abstract class RelevantSummarizedCallable instanceof SummarizedCallable { - /** Gets the string representation of this callable used by `summary/1`. */ - abstract string getCallableCsv(); - - /** Holds if flow is propagated between `input` and `output`. */ - predicate relevantSummary( - SummaryComponentStack input, SummaryComponentStack output, boolean preservesValue - ) { - super.propagatesFlow(input, output, preservesValue) - } - - string toString() { result = super.toString() } - } - - /** A model to include in the `neutral/1` query predicate. */ - abstract class RelevantNeutralCallable instanceof NeutralCallable { - /** Gets the string representation of this callable used by `neutral/1`. */ - abstract string getCallableCsv(); - - /** - * Gets the kind of the neutral. - */ - string getKind() { result = super.getKind() } - - string toString() { result = super.toString() } - } - - /** Render the kind in the format used in flow summaries. */ - private string renderKind(boolean preservesValue) { - preservesValue = true and result = "value" - or - preservesValue = false and result = "taint" - } - - private string renderProvenance(SummarizedCallable c) { - if c.applyManualModel() then result = "manual" else c.hasProvenance(result) - } - - private string renderProvenanceNeutral(NeutralCallable c) { - if c.hasManualModel() then result = "manual" else c.hasProvenance(result) - } - - /** - * A query predicate for outputting flow summaries in semi-colon separated format in QL tests. - * The syntax is: "namespace;type;overrides;name;signature;ext;inputspec;outputspec;kind;provenance", - * ext is hardcoded to empty. - */ - query predicate summary(string csv) { - exists( - RelevantSummarizedCallable c, SummaryComponentStack input, SummaryComponentStack output, - boolean preservesValue - | - c.relevantSummary(input, output, preservesValue) and - csv = - c.getCallableCsv() // Callable information - + input.getMadRepresentation() + ";" // input - + output.getMadRepresentation() + ";" // output - + renderKind(preservesValue) + ";" // kind - + renderProvenance(c) // provenance - ) - } - - /** - * Holds if a neutral model `csv` exists (semi-colon separated format). Used for testing purposes. - * The syntax is: "namespace;type;name;signature;kind;provenance"", - */ - query predicate neutral(string csv) { - exists(RelevantNeutralCallable c | - csv = - c.getCallableCsv() // Callable information - + c.getKind() + ";" // kind - + renderProvenanceNeutral(c) // provenance - ) - } - } - - /** - * Provides query predicates for rendering the generated data flow graph for - * a summarized callable. - * - * Import this module into a `.ql` file of `@kind graph` to render the graph. - * The graph is restricted to callables from `RelevantSummarizedCallable`. - */ - module RenderSummarizedCallable { - /** A summarized callable to include in the graph. */ - abstract class RelevantSummarizedCallable instanceof SummarizedCallable { - string toString() { result = super.toString() } - } - - private newtype TNodeOrCall = - MkNode(SummaryNode n) { - exists(RelevantSummarizedCallable c | - n = TSummaryInternalNode(c, _) - or - n = TSummaryParameterNode(c, _) - ) - } or - MkCall(DataFlowCall call) { - call = summaryDataFlowCall(_) and - call.getEnclosingCallable() = inject(any(RelevantSummarizedCallable c)) - } - - private class NodeOrCall extends TNodeOrCall { - SummaryNode asNode() { this = MkNode(result) } - - DataFlowCall asCall() { this = MkCall(result) } - - string toString() { - result = this.asNode().toString() - or - result = this.asCall().toString() - } - - /** - * Holds if this element is at the specified location. - * The location spans column `startcolumn` of line `startline` to - * column `endcolumn` of line `endline` in file `filepath`. - * For more information, see - * [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/). - */ - predicate hasLocationInfo( - string filepath, int startline, int startcolumn, int endline, int endcolumn - ) { - filepath = "" and - startline = 0 and - startcolumn = 0 and - endline = 0 and - endcolumn = 0 - } - } - - query predicate nodes(NodeOrCall n, string key, string val) { - key = "semmle.label" and val = n.toString() - } - - private predicate edgesComponent(NodeOrCall a, NodeOrCall b, string value) { - exists(boolean preservesValue | - Private::Steps::summaryLocalStep(a.asNode(), b.asNode(), preservesValue) and - if preservesValue = true then value = "value" else value = "taint" - ) - or - exists(ContentSet c | - Private::Steps::summaryReadStep(a.asNode(), c, b.asNode()) and - value = "read (" + c + ")" - or - Private::Steps::summaryStoreStep(a.asNode(), c, b.asNode()) and - value = "store (" + c + ")" - or - Private::Steps::summaryClearsContent(a.asNode(), c) and - b = a and - value = "clear (" + c + ")" - or - Private::Steps::summaryExpectsContent(a.asNode(), c) and - b = a and - value = "expect (" + c + ")" - ) - or - summaryPostUpdateNode(b.asNode(), a.asNode()) and - value = "post-update" - or - b.asCall() = summaryDataFlowCall(a.asNode()) and - value = "receiver" - or - exists(ArgumentPosition pos | - summaryArgumentNode(b.asCall(), a.asNode(), pos) and - value = "argument (" + pos + ")" - ) - } - - query predicate edges(NodeOrCall a, NodeOrCall b, string key, string value) { - key = "semmle.label" and - value = strictconcat(string s | edgesComponent(a, b, s) | s, " / ") - } + predicate isParsedKeywordArgumentPosition(string c, string argName) { + isArgBody(c) and + c = argName + ":" } } diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/FlowSummaryImplSpecific.qll b/python/ql/lib/semmle/python/dataflow/new/internal/FlowSummaryImplSpecific.qll deleted file mode 100644 index a29b97b72c2..00000000000 --- a/python/ql/lib/semmle/python/dataflow/new/internal/FlowSummaryImplSpecific.qll +++ /dev/null @@ -1,324 +0,0 @@ -/** - * Provides Python specific classes and predicates for defining flow summaries. - * - * Flow summaries are defined for callables that are not extracted. - * Such callables go by different names in different parts of our codebase: - * - * - in `FlowSummary.qll`, which is user facing, they are called `SummarizedCallable`s. - * These contain summaries, implemented by the user via the predicates `propagatesFlow` and `propagatesFlowExt`. - * - * - in the data flow layer, they are called `LibraryCallable`s (as in the Ruby codebase). - * These are identified by strings and has predicates for finding calls to them. - * - * Having both extracted and non-extracted callables means that we now have three types of calls: - * - Extracted calls to extracted callables, either `NormalCall` or `SpecialCall`. These are handled by standard data flow. - * - Extracted calls to non-extracted callables, `LibraryCall`. These are handled by looking up the relevant summary when the - * global data flow graph is connected up via `getViableCallable`. - * - Non-extracted calls, `SummaryCall`. These are synthesised by the flow summary framework. - * - * The first two can be referred to as `ExtractedDataFlowCall`. In fact, `LibraryCall` is a subclass of `NormalCall`, where - * `getCallable` is set to `none()`. The member predicate `ExtractedDataFlowCall::getCallable` is _not_ the mechanism for - * call resolution in global data flow. That mechanism is `getViableCallable`. - * Resolving a call to a non-extracted callable goes via `LibraryCallable::getACall`, which may involve type tracking. - * To avoid that type tracking becomes mutually recursive with data flow, type tracking must use a call graph not including summaries. - * Type tracking sees the callgraph given by `ExtractedDataFlowCall::getACallable`. - * - * We do not support summaries of special methods via the special methods framework, - * the summary would have to identify the call. - * - * We might, while we still extract the standard library, want to support flow summaries of - * extracted callables, so that we can model part of the standard library with flow summaries. - * For this to work, we have be careful with the enclosing callable predicate. - */ - -private import python -private import DataFlowPrivate -private import DataFlowPublic -private import DataFlowImplCommon -private import FlowSummaryImpl::Private -private import FlowSummaryImpl::Public -private import semmle.python.dataflow.new.FlowSummary as FlowSummary - -/** - * A class of callables that are candidates for flow summary modeling. - */ -class SummarizedCallableBase = string; - -/** - * A class of callables that are candidates for neutral modeling. - */ -class NeutralCallableBase = string; - -/** View a `SummarizedCallable` as a `DataFlowCallable`. */ -DataFlowCallable inject(SummarizedCallable c) { result.asLibraryCallable() = c } - -/** Gets the parameter position of the instance parameter. */ -ArgumentPosition callbackSelfParameterPosition() { none() } // disables implicit summary flow to `this` for callbacks - -/** Gets the synthesized data-flow call for `receiver`. */ -SummaryCall summaryDataFlowCall(SummaryNode receiver) { receiver = result.getReceiver() } - -/** Gets the type of content `c`. */ -DataFlowType getContentType(Content c) { any() } - -/** Gets the type of the parameter at the given position. */ -DataFlowType getParameterType(SummarizedCallable c, ParameterPosition pos) { any() } - -/** Gets the return type of kind `rk` for callable `c`. */ -bindingset[c, rk] -DataFlowType getReturnType(SummarizedCallable c, ReturnKind rk) { any() } - -/** - * Gets the type of the parameter matching arguments at position `pos` in a - * synthesized call that targets a callback of type `t`. - */ -bindingset[t, pos] -DataFlowType getCallbackParameterType(DataFlowType t, ArgumentPosition pos) { any() } - -/** - * Gets the return type of kind `rk` in a synthesized call that targets a - * callback of type `t`. - */ -DataFlowType getCallbackReturnType(DataFlowType t, ReturnKind rk) { any() } - -/** Gets the type of synthetic global `sg`. */ -DataFlowType getSyntheticGlobalType(SummaryComponent::SyntheticGlobal sg) { any() } - -/** - * Holds if an external flow summary exists for `c` with input specification - * `input`, output specification `output`, kind `kind`, and provenance `provenance`. - */ -predicate summaryElement( - FlowSummary::SummarizedCallable c, string input, string output, string kind, string provenance -) { - exists(boolean preservesValue | - c.propagatesFlowExt(input, output, preservesValue) and - (if preservesValue = true then kind = "value" else kind = "taint") and - provenance = "manual" - ) -} - -/** - * Holds if a neutral model exists for `c` of kind `kind` - * and with provenance `provenance`. - * Note. Neutral models have not been implemented for Python. - */ -predicate neutralElement(NeutralCallableBase c, string kind, string provenance) { none() } - -/** - * Gets the summary component for specification component `c`, if any. - * - * This covers all the Python-specific components of a flow summary. - */ -SummaryComponent interpretComponentSpecific(AccessPathToken c) { - c = "ListElement" and - result = FlowSummary::SummaryComponent::listElement() - or - c = "SetElement" and - result = FlowSummary::SummaryComponent::setElement() - or - exists(int index | - c.getAnArgument("TupleElement") = index.toString() and - result = FlowSummary::SummaryComponent::tupleElement(index) - ) - or - exists(string key | - c.getAnArgument("DictionaryElement") = key and - result = FlowSummary::SummaryComponent::dictionaryElement(key) - ) - or - c = "DictionaryElementAny" and - result = FlowSummary::SummaryComponent::dictionaryElementAny() - or - exists(string attr | - c.getAnArgument("Attribute") = attr and - result = FlowSummary::SummaryComponent::attribute(attr) - ) -} - -private string getContentSpecific(Content cs) { - cs = TListElementContent() and result = "ListElement" - or - cs = TSetElementContent() and result = "SetElement" - or - exists(int index | - cs = TTupleElementContent(index) and result = "TupleElement[" + index.toString() + "]" - ) - or - exists(string key | - cs = TDictionaryElementContent(key) and result = "DictionaryElement[" + key + "]" - ) - or - cs = TDictionaryElementAnyContent() and result = "DictionaryElementAny" - or - exists(string attr | cs = TAttributeContent(attr) and result = "Attribute[" + attr + "]") -} - -/** Gets the textual representation of a summary component in the format used for MaD models. */ -string getMadRepresentationSpecific(SummaryComponent sc) { - exists(Content c | - sc = TContentSummaryComponent(c) and - result = getContentSpecific(c) - ) -} - -/** Gets the textual representation of a parameter position in the format used for flow summaries. */ -string getParameterPosition(ParameterPosition pos) { - pos.isSelf() and result = "self" - or - exists(int i | - pos.isPositional(i) and - result = i.toString() - ) - or - exists(string name | - pos.isKeyword(name) and - result = name + ":" - ) -} - -/** Gets the textual representation of an argument position in the format used for flow summaries. */ -string getArgumentPosition(ArgumentPosition pos) { - pos.isSelf() and result = "self" - or - exists(int i | - pos.isPositional(i) and - result = i.toString() - ) - or - exists(string name | - pos.isKeyword(name) and - result = name + ":" - ) -} - -/** Holds if input specification component `c` needs a reference. */ -predicate inputNeedsReferenceSpecific(string c) { none() } - -/** Holds if output specification component `c` needs a reference. */ -predicate outputNeedsReferenceSpecific(string c) { none() } - -/** Gets the return kind corresponding to specification `"ReturnValue"`. */ -ReturnKind getReturnValueKind() { any() } - -/** - * All definitions in this module are required by the shared implementation - * (for source/sink interpretation), but they are unused for Python, where - * we rely on API graphs instead. - */ -private module UnusedSourceSinkInterpretation { - /** - * Holds if an external source specification exists for `n` with output specification - * `output`, kind `kind`, and provenance `provenance`. - */ - predicate sourceElement(AstNode n, string output, string kind, string provenance) { none() } - - /** - * Holds if an external sink specification exists for `n` with input specification - * `input`, kind `kind` and provenance `provenance`. - */ - predicate sinkElement(AstNode n, string input, string kind, string provenance) { none() } - - class SourceOrSinkElement = AstNode; - - /** An entity used to interpret a source/sink specification. */ - class InterpretNode extends AstNode_ { - // InterpretNode is going away, this is just a dummy implementation. - // However, we have some old location tests picking them up, so we - // explicitly define them to not exist. - InterpretNode() { none() } - - /** Gets the element that this node corresponds to, if any. */ - SourceOrSinkElement asElement() { none() } - - /** Gets the data-flow node that this node corresponds to, if any. */ - Node asNode() { none() } - - /** Gets the call that this node corresponds to, if any. */ - DataFlowCall asCall() { none() } - - /** Gets the callable that this node corresponds to, if any. */ - DataFlowCallable asCallable() { none() } - - /** Gets the target of this call, if any. */ - SourceOrSinkElement getCallTarget() { none() } - } - - /** Provides additional sink specification logic. */ - predicate interpretOutputSpecific(string c, InterpretNode mid, InterpretNode node) { none() } - - /** Provides additional source specification logic. */ - predicate interpretInputSpecific(string c, InterpretNode mid, InterpretNode node) { none() } -} - -import UnusedSourceSinkInterpretation - -module ParsePositions { - private import FlowSummaryImpl - - private predicate isParamBody(string body) { - exists(AccessPathToken tok | - tok.getName() = "Parameter" and - body = tok.getAnArgument() - ) - } - - private predicate isArgBody(string body) { - exists(AccessPathToken tok | - tok.getName() = "Argument" and - body = tok.getAnArgument() - ) - } - - predicate isParsedPositionalParameterPosition(string c, int i) { - isParamBody(c) and - i = AccessPath::parseInt(c) - } - - predicate isParsedKeywordParameterPosition(string c, string paramName) { - isParamBody(c) and - c = paramName + ":" - } - - predicate isParsedPositionalArgumentPosition(string c, int i) { - isArgBody(c) and - i = AccessPath::parseInt(c) - } - - predicate isParsedKeywordArgumentPosition(string c, string argName) { - isArgBody(c) and - c = argName + ":" - } -} - -/** Gets the argument position obtained by parsing `X` in `Parameter[X]`. */ -ArgumentPosition parseParamBody(string s) { - exists(int i | - ParsePositions::isParsedPositionalParameterPosition(s, i) and - result.isPositional(i) - ) - or - exists(string name | - ParsePositions::isParsedKeywordParameterPosition(s, name) and - result.isKeyword(name) - ) - or - s = "self" and - result.isSelf() -} - -/** Gets the parameter position obtained by parsing `X` in `Argument[X]`. */ -ParameterPosition parseArgBody(string s) { - exists(int i | - ParsePositions::isParsedPositionalArgumentPosition(s, i) and - result.isPositional(i) - ) - or - exists(string name | - ParsePositions::isParsedKeywordArgumentPosition(s, name) and - result.isKeyword(name) - ) - or - s = "self" and - result.isSelf() -} diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackerSpecific.qll b/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackerSpecific.qll index 7b9d4f06f31..5fe6dc154a8 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackerSpecific.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackerSpecific.qll @@ -177,7 +177,7 @@ class Boolean extends boolean { } private import SummaryTypeTracker as SummaryTypeTracker -private import semmle.python.dataflow.new.FlowSummary as FlowSummary +private import semmle.python.dataflow.new.internal.FlowSummaryImpl as FlowSummaryImpl private import semmle.python.dataflow.new.internal.DataFlowDispatch as DataFlowDispatch pragma[noinline] @@ -205,30 +205,30 @@ private module SummaryTypeTrackerInput implements SummaryTypeTracker::Input { TypeTrackerContentFilter getFilterFromWithContentStep(TypeTrackerContent content) { none() } // Callables - class SummarizedCallable = FlowSummary::SummarizedCallable; + class SummarizedCallable = FlowSummaryImpl::Private::SummarizedCallableImpl; // Summaries and their stacks - class SummaryComponent = FlowSummary::SummaryComponent; + class SummaryComponent = FlowSummaryImpl::Private::SummaryComponent; - class SummaryComponentStack = FlowSummary::SummaryComponentStack; + class SummaryComponentStack = FlowSummaryImpl::Private::SummaryComponentStack; - predicate singleton = FlowSummary::SummaryComponentStack::singleton/1; + predicate singleton = FlowSummaryImpl::Private::SummaryComponentStack::singleton/1; - predicate push = FlowSummary::SummaryComponentStack::push/2; + predicate push = FlowSummaryImpl::Private::SummaryComponentStack::push/2; // Relating content to summaries - predicate content = FlowSummary::SummaryComponent::content/1; + predicate content = FlowSummaryImpl::Private::SummaryComponent::content/1; SummaryComponent withoutContent(TypeTrackerContent contents) { none() } SummaryComponent withContent(TypeTrackerContent contents) { none() } - predicate return = FlowSummary::SummaryComponent::return/0; + predicate return = FlowSummaryImpl::Private::SummaryComponent::return/0; // Relating nodes to summaries Node argumentOf(Node call, SummaryComponent arg, boolean isPostUpdate) { exists(DataFlowDispatch::ParameterPosition pos | - arg = FlowSummary::SummaryComponent::argument(pos) and + arg = FlowSummaryImpl::Private::SummaryComponent::argument(pos) and argumentPositionMatch(call, result, pos) and isPostUpdate = [false, true] // todo: implement when/if Python uses post-update nodes in type tracking ) @@ -238,7 +238,7 @@ private module SummaryTypeTrackerInput implements SummaryTypeTracker::Input { exists( DataFlowDispatch::ArgumentPosition apos, DataFlowDispatch::ParameterPosition ppos, Parameter p | - param = FlowSummary::SummaryComponent::parameter(apos) and + param = FlowSummaryImpl::Private::SummaryComponent::parameter(apos) and DataFlowDispatch::parameterMatch(ppos, apos) and result.asCfgNode().getNode() = p and ( @@ -254,14 +254,16 @@ private module SummaryTypeTrackerInput implements SummaryTypeTracker::Input { } Node returnOf(Node callable, SummaryComponent return) { - return = FlowSummary::SummaryComponent::return() and + return = FlowSummaryImpl::Private::SummaryComponent::return() and // `result` should be the return value of a callable expression (lambda or function) referenced by `callable` result.asCfgNode() = callable.getALocalSource().asExpr().(CallableExpr).getInnerScope().getAReturnValueFlowNode() } // Relating callables to nodes - Node callTo(SummarizedCallable callable) { result = callable.getACallSimple() } + Node callTo(SummarizedCallable callable) { + result = callable.(DataFlowDispatch::LibraryCallable).getACallSimple() + } } private module TypeTrackerSummaryFlow = SummaryTypeTracker::SummaryFlow; diff --git a/python/ql/lib/semmle/python/frameworks/Flask.qll b/python/ql/lib/semmle/python/frameworks/Flask.qll index 3feee0cf668..db1b40c8457 100644 --- a/python/ql/lib/semmle/python/frameworks/Flask.qll +++ b/python/ql/lib/semmle/python/frameworks/Flask.qll @@ -624,7 +624,7 @@ module Flask { .getAValueReachableFromSource() } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[0]" and output = "ReturnValue" and preservesValue = false @@ -650,7 +650,7 @@ module Flask { .getAValueReachableFromSource() } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[0]" and // Technically it's `Iterator[str]`, but list will do :) output = "ReturnValue.ListElement" and diff --git a/python/ql/lib/semmle/python/frameworks/Stdlib.qll b/python/ql/lib/semmle/python/frameworks/Stdlib.qll index 11806ad3a59..9def059cdcf 100644 --- a/python/ql/lib/semmle/python/frameworks/Stdlib.qll +++ b/python/ql/lib/semmle/python/frameworks/Stdlib.qll @@ -3085,7 +3085,7 @@ private module StdlibPrivate { result = API::moduleImport("re").getMember("compile").getAValueReachableFromSource() } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input in ["Argument[0]", "Argument[pattern:]"] and output = "ReturnValue.Attribute[pattern]" and preservesValue = true @@ -3116,7 +3116,7 @@ private module StdlibPrivate { override DataFlow::ArgumentNode getACallback() { none() } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { exists(string arg | this = "re.Match" and arg = "Argument[1]" or @@ -3173,7 +3173,7 @@ private module StdlibPrivate { override DataFlow::ArgumentNode getACallback() { none() } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { methodName = "expand" and preservesValue = false and ( @@ -3229,7 +3229,7 @@ private module StdlibPrivate { override DataFlow::ArgumentNode getACallback() { none() } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { exists(int offset | // for non-compiled regex the first argument is the pattern, so we need to // account for this difference @@ -4079,7 +4079,7 @@ private module StdlibPrivate { result = API::builtin("dict").getAValueReachableFromSource() } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { exists(DataFlow::DictionaryElementContent dc, string key | key = dc.getKey() | input = "Argument[0].DictionaryElement[" + key + "]" and output = "ReturnValue.DictionaryElement[" + key + "]" and @@ -4108,7 +4108,7 @@ private module StdlibPrivate { result = API::builtin("list").getAValueReachableFromSource() } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { ( input = "Argument[0].ListElement" or @@ -4138,7 +4138,7 @@ private module StdlibPrivate { result = API::builtin("tuple").getAValueReachableFromSource() } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { exists(DataFlow::TupleElementContent tc, int i | i = tc.getIndex() | input = "Argument[0].TupleElement[" + i.toString() + "]" and output = "ReturnValue.TupleElement[" + i.toString() + "]" and @@ -4163,7 +4163,7 @@ private module StdlibPrivate { result = API::builtin("set").getAValueReachableFromSource() } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { ( input = "Argument[0].ListElement" or @@ -4193,8 +4193,8 @@ private module StdlibPrivate { result = API::builtin("frozenset").getAValueReachableFromSource() } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { - any(SetSummary s).propagatesFlowExt(input, output, preservesValue) + override predicate propagatesFlow(string input, string output, boolean preservesValue) { + any(SetSummary s).propagatesFlow(input, output, preservesValue) } } @@ -4211,7 +4211,7 @@ private module StdlibPrivate { result = API::builtin("reversed").getAValueReachableFromSource() } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { ( input = "Argument[0].ListElement" or @@ -4241,7 +4241,7 @@ private module StdlibPrivate { result = API::builtin("sorted").getAValueReachableFromSource() } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { exists(string content | content = "ListElement" or @@ -4273,7 +4273,7 @@ private module StdlibPrivate { result = API::builtin("iter").getAValueReachableFromSource() } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { ( input = "Argument[0].ListElement" or @@ -4303,7 +4303,7 @@ private module StdlibPrivate { result = API::builtin("next").getAValueReachableFromSource() } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { ( input = "Argument[0].ListElement" or @@ -4336,7 +4336,7 @@ private module StdlibPrivate { override DataFlow::ArgumentNode getACallback() { none() } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { exists(string content | content = "ListElement" or @@ -4378,7 +4378,7 @@ private module StdlibPrivate { override DataFlow::ArgumentNode getACallback() { none() } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[self].ListElement" and output = "ReturnValue" and preservesValue = true @@ -4415,7 +4415,7 @@ private module StdlibPrivate { override DataFlow::ArgumentNode getACallback() { none() } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[self].DictionaryElement[" + key + "]" and output = "ReturnValue" and preservesValue = true @@ -4438,7 +4438,7 @@ private module StdlibPrivate { override DataFlow::ArgumentNode getACallback() { none() } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[self].DictionaryElement[" + key + "]" and output = "ReturnValue" and preservesValue = true @@ -4460,7 +4460,7 @@ private module StdlibPrivate { override DataFlow::ArgumentNode getACallback() { none() } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { // default value input = "Argument[1]" and output = "ReturnValue" and @@ -4483,7 +4483,7 @@ private module StdlibPrivate { override DataFlow::ArgumentNode getACallback() { none() } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { exists(DataFlow::DictionaryElementContent dc, string key | key = dc.getKey() | input = "Argument[self].DictionaryElement[" + key + "]" and output = "ReturnValue.TupleElement[1]" and @@ -4509,7 +4509,7 @@ private module StdlibPrivate { result.(DataFlow::AttrRead).getAttributeName() = "setdefault" } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { // store/read steps with dictionary content of this is modeled in DataFlowPrivate input = "Argument[1]" and output = "ReturnValue" and @@ -4538,7 +4538,7 @@ private module StdlibPrivate { override DataFlow::ArgumentNode getACallback() { none() } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { // If key is in the dictionary, return its value. input = "Argument[self].DictionaryElement[" + key + "]" and output = "ReturnValue" and @@ -4567,7 +4567,7 @@ private module StdlibPrivate { result.(DataFlow::AttrRead).getAttributeName() = "values" } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { exists(DataFlow::DictionaryElementContent dc, string key | key = dc.getKey() | input = "Argument[self].DictionaryElement[" + key + "]" and output = "ReturnValue.ListElement" and @@ -4594,7 +4594,7 @@ private module StdlibPrivate { result.(DataFlow::AttrRead).getAttributeName() = "keys" } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { // TODO: Once we have DictKeyContent, we need to transform that into ListElementContent input = "Argument[self]" and output = "ReturnValue" and @@ -4618,7 +4618,7 @@ private module StdlibPrivate { result.(DataFlow::AttrRead).getAttributeName() = "items" } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { exists(DataFlow::DictionaryElementContent dc, string key | key = dc.getKey() | input = "Argument[self].DictionaryElement[" + key + "]" and output = "ReturnValue.ListElement.TupleElement[1]" and @@ -4648,7 +4648,7 @@ private module StdlibPrivate { result.(DataFlow::AttrRead).getAttributeName() = "append" } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { // newly added element added to this input = "Argument[0]" and output = "Argument[self].ListElement" and @@ -4675,7 +4675,7 @@ private module StdlibPrivate { result.(DataFlow::AttrRead).getAttributeName() = "add" } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { // newly added element added to this input = "Argument[0]" and output = "Argument[self].SetElement" and @@ -4705,7 +4705,7 @@ private module StdlibPrivate { API::moduleImport("os").getMember(["getenv", "getenvb"]).getAValueReachableFromSource() } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input in ["Argument[1]", "Argument[default:]"] and output = "ReturnValue" and preservesValue = true diff --git a/python/ql/lib/semmle/python/frameworks/data/internal/ApiGraphModels.qll b/python/ql/lib/semmle/python/frameworks/data/internal/ApiGraphModels.qll index 1cb4e189339..e76a100263c 100644 --- a/python/ql/lib/semmle/python/frameworks/data/internal/ApiGraphModels.qll +++ b/python/ql/lib/semmle/python/frameworks/data/internal/ApiGraphModels.qll @@ -70,8 +70,8 @@ private module API = Specific::API; private module DataFlow = Specific::DataFlow; -private import Specific::AccessPathSyntax private import ApiGraphModelsExtensions as Extensions +import codeql.dataflow.internal.AccessPathSyntax /** Module containing hooks for providing input data to be interpreted as a model. */ module ModelInput { @@ -327,29 +327,29 @@ predicate isRelevantFullPath(string type, string path) { } /** A string from a CSV row that should be parsed as an access path. */ -private class AccessPathRange extends AccessPath::Range { - AccessPathRange() { - isRelevantFullPath(_, this) - or - exists(string type | isRelevantType(type) | - summaryModel(type, _, this, _, _) or - summaryModel(type, _, _, this, _) - ) - or - typeVariableModel(_, this) - } +private predicate accessPathRange(string s) { + isRelevantFullPath(_, s) + or + exists(string type | isRelevantType(type) | + summaryModel(type, _, s, _, _) or + summaryModel(type, _, _, s, _) + ) + or + typeVariableModel(_, s) } +import AccessPath + /** * Gets a successor of `node` in the API graph. */ bindingset[token] -API::Node getSuccessorFromNode(API::Node node, AccessPathToken token) { +API::Node getSuccessorFromNode(API::Node node, AccessPathTokenBase token) { // API graphs use the same label for arguments and parameters. An edge originating from a // use-node represents an argument, and an edge originating from a def-node represents a parameter. // We just map both to the same thing. token.getName() = ["Argument", "Parameter"] and - result = node.getParameter(AccessPath::parseIntUnbounded(token.getAnArgument())) + result = node.getParameter(parseIntUnbounded(token.getAnArgument())) or token.getName() = "ReturnValue" and result = node.getReturn() @@ -362,11 +362,9 @@ API::Node getSuccessorFromNode(API::Node node, AccessPathToken token) { * Gets an API-graph successor for the given invocation. */ bindingset[token] -API::Node getSuccessorFromInvoke(Specific::InvokeNode invoke, AccessPathToken token) { +API::Node getSuccessorFromInvoke(Specific::InvokeNode invoke, AccessPathTokenBase token) { token.getName() = "Argument" and - result = - invoke - .getParameter(AccessPath::parseIntWithArity(token.getAnArgument(), invoke.getNumArgument())) + result = invoke.getParameter(parseIntWithArity(token.getAnArgument(), invoke.getNumArgument())) or token.getName() = "ReturnValue" and result = invoke.getReturn() @@ -378,10 +376,12 @@ API::Node getSuccessorFromInvoke(Specific::InvokeNode invoke, AccessPathToken to /** * Holds if `invoke` invokes a call-site filter given by `token`. */ -pragma[inline] -private predicate invocationMatchesCallSiteFilter(Specific::InvokeNode invoke, AccessPathToken token) { +bindingset[token] +private predicate invocationMatchesCallSiteFilter( + Specific::InvokeNode invoke, AccessPathTokenBase token +) { token.getName() = "WithArity" and - invoke.getNumArgument() = AccessPath::parseIntUnbounded(token.getAnArgument()) + invoke.getNumArgument() = parseIntUnbounded(token.getAnArgument()) or Specific::invocationMatchesExtraCallSiteFilter(invoke, token) } diff --git a/python/ql/lib/semmle/python/frameworks/data/internal/ApiGraphModelsSpecific.qll b/python/ql/lib/semmle/python/frameworks/data/internal/ApiGraphModelsSpecific.qll index d0a5d1b9da5..c8cb27fdae2 100644 --- a/python/ql/lib/semmle/python/frameworks/data/internal/ApiGraphModelsSpecific.qll +++ b/python/ql/lib/semmle/python/frameworks/data/internal/ApiGraphModelsSpecific.qll @@ -4,14 +4,14 @@ * It must export the following members: * ```ql * class Unit // a unit type - * module AccessPathSyntax // a re-export of the AccessPathSyntax module + * * class InvokeNode // a type representing an invocation connected to the API graph * module API // the API graph module * predicate isPackageUsed(string package) * API::Node getExtraNodeFromPath(string package, string type, string path, int n) - * API::Node getExtraSuccessorFromNode(API::Node node, AccessPathToken token) - * API::Node getExtraSuccessorFromInvoke(API::InvokeNode node, AccessPathToken token) - * predicate invocationMatchesExtraCallSiteFilter(API::InvokeNode invoke, AccessPathToken token) + * API::Node getExtraSuccessorFromNode(API::Node node, AccessPathTokenBase token) + * API::Node getExtraSuccessorFromInvoke(API::InvokeNode node, AccessPathTokenBase token) + * predicate invocationMatchesExtraCallSiteFilter(API::InvokeNode invoke, AccessPathTokenBase token) * InvokeNode getAnInvocationOf(API::Node node) * predicate isExtraValidTokenNameInIdentifyingAccessPath(string name) * predicate isExtraValidNoArgumentTokenInIdentifyingAccessPath(string name) @@ -23,9 +23,7 @@ private import python as PY private import ApiGraphModels import semmle.python.ApiGraphs::API as API // Re-export libraries needed by ApiGraphModels.qll -import semmle.python.dataflow.new.internal.AccessPathSyntax as AccessPathSyntax import semmle.python.dataflow.new.DataFlow::DataFlow as DataFlow -private import AccessPathSyntax /** * Holds if models describing `type` may be relevant for the analysis of this database. @@ -49,7 +47,7 @@ API::Node getExtraNodeFromType(string type) { result = API::moduleImport(type) } * Gets a Python-specific API graph successor of `node` reachable by resolving `token`. */ bindingset[token] -API::Node getExtraSuccessorFromNode(API::Node node, AccessPathToken token) { +API::Node getExtraSuccessorFromNode(API::Node node, AccessPathTokenBase token) { token.getName() = "Member" and result = node.getMember(token.getAnArgument()) or @@ -89,7 +87,7 @@ API::Node getExtraSuccessorFromNode(API::Node node, AccessPathToken token) { * Gets a Python-specific API graph successor of `node` reachable by resolving `token`. */ bindingset[token] -API::Node getExtraSuccessorFromInvoke(API::CallNode node, AccessPathToken token) { +API::Node getExtraSuccessorFromInvoke(API::CallNode node, AccessPathTokenBase token) { token.getName() = "Instance" and result = node.getReturn() or @@ -129,7 +127,7 @@ API::Node getAFuzzySuccessor(API::Node node) { * Holds if `invoke` matches the PY-specific call site filter in `token`. */ bindingset[token] -predicate invocationMatchesExtraCallSiteFilter(API::CallNode invoke, AccessPathToken token) { +predicate invocationMatchesExtraCallSiteFilter(API::CallNode invoke, AccessPathTokenBase token) { token.getName() = "Call" and exists(invoke) // there is only one kind of call in Python. } diff --git a/python/ql/test/experimental/dataflow/summaries-checks/invalid-spec.ql b/python/ql/test/experimental/dataflow/summaries-checks/invalid-spec.ql index 74863d2fde3..19af6c8e744 100644 --- a/python/ql/test/experimental/dataflow/summaries-checks/invalid-spec.ql +++ b/python/ql/test/experimental/dataflow/summaries-checks/invalid-spec.ql @@ -3,6 +3,6 @@ import semmle.python.dataflow.new.FlowSummary import semmle.python.dataflow.new.internal.FlowSummaryImpl query predicate invalidSpecComponent(SummarizedCallable sc, string s, string c) { - (sc.propagatesFlowExt(s, _, _) or sc.propagatesFlowExt(_, s, _)) and + (sc.propagatesFlow(s, _, _) or sc.propagatesFlow(_, s, _)) and Private::External::invalidSpecComponent(s, c) } diff --git a/python/ql/test/experimental/dataflow/summaries-checks/missing-attribute-content.ql b/python/ql/test/experimental/dataflow/summaries-checks/missing-attribute-content.ql index 08287efa52c..eb5133318c6 100644 --- a/python/ql/test/experimental/dataflow/summaries-checks/missing-attribute-content.ql +++ b/python/ql/test/experimental/dataflow/summaries-checks/missing-attribute-content.ql @@ -4,7 +4,7 @@ import semmle.python.dataflow.new.internal.FlowSummaryImpl from SummarizedCallable sc, string s, string c, string attr where - (sc.propagatesFlowExt(s, _, _) or sc.propagatesFlowExt(_, s, _)) and + (sc.propagatesFlow(s, _, _) or sc.propagatesFlow(_, s, _)) and Private::External::invalidSpecComponent(s, c) and c = "Attribute[" + attr + "]" select "The attribute \"" + attr + diff --git a/python/ql/test/experimental/dataflow/summaries/TestSummaries.qll b/python/ql/test/experimental/dataflow/summaries/TestSummaries.qll index cdd61420bbb..acd9c2136c7 100644 --- a/python/ql/test/experimental/dataflow/summaries/TestSummaries.qll +++ b/python/ql/test/experimental/dataflow/summaries/TestSummaries.qll @@ -18,6 +18,10 @@ module RecursionGuard { (TT::callStep(_, _) implies any()) } + override predicate propagatesFlow(string input, string output, boolean preservesValue) { + none() + } + override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this } } } @@ -31,7 +35,7 @@ private class SummarizedCallableIdentity extends SummarizedCallable { override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[0]" and output = "ReturnValue" and preservesValue = true @@ -48,7 +52,7 @@ private class SummarizedCallableApplyLambda extends SummarizedCallable { override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[1]" and output = "Argument[0].Parameter[0]" and preservesValue = true @@ -68,7 +72,7 @@ private class SummarizedCallableReversed extends SummarizedCallable { override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[0].ListElement" and output = "ReturnValue.ListElement" and preservesValue = true @@ -84,7 +88,7 @@ private class SummarizedCallableMap extends SummarizedCallable { override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[1].ListElement" and output = "Argument[0].Parameter[0]" and preservesValue = true @@ -104,7 +108,7 @@ private class SummarizedCallableAppend extends SummarizedCallable { override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[0]" and output = "ReturnValue" and preservesValue = false @@ -126,7 +130,7 @@ private class SummarizedCallableJsonLoads extends SummarizedCallable { result = API::moduleImport("json").getMember("loads").getAValueReachableFromSource() } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[0]" and output = "ReturnValue.ListElement" and preservesValue = true diff --git a/python/ql/test/experimental/dataflow/summaries/summaries.ql b/python/ql/test/experimental/dataflow/summaries/summaries.ql index d3c0206d41f..e2a61cd6f46 100644 --- a/python/ql/test/experimental/dataflow/summaries/summaries.ql +++ b/python/ql/test/experimental/dataflow/summaries/summaries.ql @@ -12,7 +12,7 @@ import experimental.dataflow.testTaintConfig private import TestSummaries query predicate invalidSpecComponent(SummarizedCallable sc, string s, string c) { - (sc.propagatesFlowExt(s, _, _) or sc.propagatesFlowExt(_, s, _)) and + (sc.propagatesFlow(s, _, _) or sc.propagatesFlow(_, s, _)) and Private::External::invalidSpecComponent(s, c) } diff --git a/python/ql/test/experimental/dataflow/typetracking-summaries/TestSummaries.qll b/python/ql/test/experimental/dataflow/typetracking-summaries/TestSummaries.qll index 8d626b332a3..fa98b6f84a0 100644 --- a/python/ql/test/experimental/dataflow/typetracking-summaries/TestSummaries.qll +++ b/python/ql/test/experimental/dataflow/typetracking-summaries/TestSummaries.qll @@ -18,6 +18,10 @@ module RecursionGuard { (TT::callStep(_, _) implies any()) } + override predicate propagatesFlow(string input, string output, boolean preservesValue) { + none() + } + override DataFlow::CallCfgNode getACallSimple() { none() } override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this } @@ -39,7 +43,7 @@ private class SummarizedCallableIdentity extends SummarizedCallable { override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[0]" and output = "ReturnValue" and preservesValue = true @@ -58,7 +62,7 @@ private class SummarizedCallableApplyLambda extends SummarizedCallable { override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[1]" and output = "Argument[0].Parameter[0]" and preservesValue = true @@ -80,7 +84,7 @@ private class SummarizedCallableReversed extends SummarizedCallable { override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[0].ListElement" and output = "ReturnValue.ListElement" and preservesValue = true @@ -98,7 +102,7 @@ private class SummarizedCallableMap extends SummarizedCallable { override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[1].ListElement" and output = "Argument[0].Parameter[0]" and preservesValue = true @@ -120,7 +124,7 @@ private class SummarizedCallableAppend extends SummarizedCallable { override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[0]" and output = "ReturnValue" and preservesValue = false @@ -144,7 +148,7 @@ private class SummarizedCallableJsonLoads extends SummarizedCallable { result = API::moduleImport("json").getMember("loads").getAValueReachableFromSource() } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[0]" and output = "ReturnValue.ListElement" and preservesValue = true @@ -163,7 +167,7 @@ private class SummarizedCallableReadSecret extends SummarizedCallable { override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[0].Attribute[secret]" and output = "ReturnValue" and preservesValue = true @@ -181,7 +185,7 @@ private class SummarizedCallableSetSecret extends SummarizedCallable { override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[1]" and output = "Argument[0].Attribute[secret]" and preservesValue = true diff --git a/python/ql/test/library-tests/frameworks/data/test.ql b/python/ql/test/library-tests/frameworks/data/test.ql index 1564c0eb40a..701c74f1246 100644 --- a/python/ql/test/library-tests/frameworks/data/test.ql +++ b/python/ql/test/library-tests/frameworks/data/test.ql @@ -1,5 +1,5 @@ import python -import semmle.python.dataflow.new.internal.AccessPathSyntax as AccessPathSyntax +private import semmle.python.frameworks.data.internal.ApiGraphModels as ApiGraphModels import semmle.python.frameworks.data.ModelsAsData import semmle.python.dataflow.new.TaintTracking import semmle.python.dataflow.new.DataFlow @@ -27,6 +27,6 @@ query predicate isSource(DataFlow::Node node, string kind) { node = ModelOutput::getASourceNode(kind).asSource() } -query predicate syntaxErrors(AccessPathSyntax::AccessPath path) { path.hasSyntaxError() } +query predicate syntaxErrors(ApiGraphModels::AccessPath path) { path.hasSyntaxError() } query predicate warning = ModelOutput::getAWarning/0; diff --git a/python/ql/test/library-tests/frameworks/data/warnings.ql b/python/ql/test/library-tests/frameworks/data/warnings.ql index c6561797164..71487889ca1 100644 --- a/python/ql/test/library-tests/frameworks/data/warnings.ql +++ b/python/ql/test/library-tests/frameworks/data/warnings.ql @@ -1,5 +1,4 @@ import python -import semmle.python.dataflow.new.internal.AccessPathSyntax as AccessPathSyntax import semmle.python.frameworks.data.internal.ApiGraphModels as ApiGraphModels import semmle.python.frameworks.data.ModelsAsData