Python: Use FlowSummaryImpl from dataflow pack

This commit is contained in:
Tom Hvitved
2023-11-24 11:48:08 +01:00
parent a2093c9aa2
commit faaa558ed9
19 changed files with 305 additions and 2144 deletions

View File

@@ -56,7 +56,6 @@
"DataFlow Java/C#/Go/Ruby/Python/Swift Flow Summaries": [ "DataFlow Java/C#/Go/Ruby/Python/Swift Flow Summaries": [
"java/ql/lib/semmle/code/java/dataflow/internal/FlowSummaryImpl.qll", "java/ql/lib/semmle/code/java/dataflow/internal/FlowSummaryImpl.qll",
"go/ql/lib/semmle/go/dataflow/internal/FlowSummaryImpl.qll", "go/ql/lib/semmle/go/dataflow/internal/FlowSummaryImpl.qll",
"python/ql/lib/semmle/python/dataflow/new/internal/FlowSummaryImpl.qll",
"swift/ql/lib/codeql/swift/dataflow/internal/FlowSummaryImpl.qll" "swift/ql/lib/codeql/swift/dataflow/internal/FlowSummaryImpl.qll"
], ],
"SsaReadPosition Java/C#": [ "SsaReadPosition Java/C#": [
@@ -467,7 +466,6 @@
"AccessPathSyntax": [ "AccessPathSyntax": [
"go/ql/lib/semmle/go/dataflow/internal/AccessPathSyntax.qll", "go/ql/lib/semmle/go/dataflow/internal/AccessPathSyntax.qll",
"java/ql/lib/semmle/code/java/dataflow/internal/AccessPathSyntax.qll", "java/ql/lib/semmle/code/java/dataflow/internal/AccessPathSyntax.qll",
"python/ql/lib/semmle/python/dataflow/new/internal/AccessPathSyntax.qll",
"swift/ql/lib/codeql/swift/dataflow/internal/AccessPathSyntax.qll" "swift/ql/lib/codeql/swift/dataflow/internal/AccessPathSyntax.qll"
], ],
"IncompleteUrlSubstringSanitization": [ "IncompleteUrlSubstringSanitization": [

View File

@@ -13,61 +13,14 @@ private module Summaries {
private import semmle.python.Frameworks private import semmle.python.Frameworks
} }
class SummaryComponent = Impl::Public::SummaryComponent; deprecated class SummaryComponent = Impl::Private::SummaryComponent;
/** Provides predicates for constructing summary components. */ /** Provides predicates for constructing summary components. */
module SummaryComponent { deprecated module SummaryComponent = Impl::Private::SummaryComponent;
private import Impl::Public::SummaryComponent as SC
predicate parameter = SC::parameter/1; deprecated class SummaryComponentStack = Impl::Private::SummaryComponentStack;
predicate argument = SC::argument/1; deprecated module SummaryComponentStack = Impl::Private::SummaryComponentStack;
predicate content = SC::content/1;
/** Gets a summary component that represents a list element. */
SummaryComponent listElement() { result = content(any(ListElementContent c)) }
/** Gets a summary component that represents a set element. */
SummaryComponent setElement() { result = content(any(SetElementContent c)) }
/** Gets a summary component that represents a tuple element. */
SummaryComponent tupleElement(int index) {
exists(TupleElementContent c | c.getIndex() = index and result = content(c))
}
/** Gets a summary component that represents a dictionary element. */
SummaryComponent dictionaryElement(string key) {
exists(DictionaryElementContent c | c.getKey() = key and result = content(c))
}
/** Gets a summary component that represents a dictionary element at any key. */
SummaryComponent dictionaryElementAny() { result = content(any(DictionaryElementAnyContent c)) }
/** Gets a summary component that represents an attribute element. */
SummaryComponent attribute(string attr) {
exists(AttributeContent c | c.getAttribute() = attr and result = content(c))
}
/** Gets a summary component that represents the return value of a call. */
SummaryComponent return() { result = SC::return(any(ReturnKind rk)) }
}
class SummaryComponentStack = Impl::Public::SummaryComponentStack;
/** Provides predicates for constructing stacks of summary components. */
module SummaryComponentStack {
private import Impl::Public::SummaryComponentStack as SCS
predicate singleton = SCS::singleton/1;
predicate push = SCS::push/2;
predicate argument = SCS::argument/1;
/** Gets a singleton stack representing the return value of a call. */
SummaryComponentStack return() { result = singleton(SummaryComponent::return()) }
}
/** A callable with a flow summary, identified by a unique string. */ /** A callable with a flow summary, identified by a unique string. */
abstract class SummarizedCallable extends LibraryCallable, Impl::Public::SummarizedCallable { abstract class SummarizedCallable extends LibraryCallable, Impl::Public::SummarizedCallable {
@@ -75,21 +28,14 @@ abstract class SummarizedCallable extends LibraryCallable, Impl::Public::Summari
SummarizedCallable() { any() } SummarizedCallable() { any() }
/** /**
* Same as * DEPRECATED: Use `propagatesFlow` instead.
*
* ```ql
* propagatesFlow(
* SummaryComponentStack input, SummaryComponentStack output, boolean preservesValue
* )
* ```
*
* but uses an external (string) representation of the input and output stacks.
*/ */
pragma[nomagic] deprecated predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
predicate propagatesFlowExt(string input, string output, boolean preservesValue) { none() } this.propagatesFlow(input, output, preservesValue)
}
} }
class RequiredSummaryComponentStack = Impl::Public::RequiredSummaryComponentStack; deprecated class RequiredSummaryComponentStack = Impl::Private::RequiredSummaryComponentStack;
private class SummarizedCallableFromModel extends SummarizedCallable { private class SummarizedCallableFromModel extends SummarizedCallable {
string type; string type;
@@ -109,7 +55,7 @@ private class SummarizedCallableFromModel extends SummarizedCallable {
) )
} }
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { override predicate propagatesFlow(string input, string output, boolean preservesValue) {
exists(string kind | ModelOutput::relevantSummaryModel(type, path, input, output, kind) | exists(string kind | ModelOutput::relevantSummaryModel(type, path, input, output, kind) |
kind = "value" and kind = "value" and
preservesValue = true preservesValue = true

View File

@@ -1,182 +0,0 @@
/**
* Module for parsing access paths from MaD models, both the identifying access path used
* by dynamic languages, and the input/output specifications for summary steps.
*
* This file is used by the shared data flow library and by the JavaScript libraries
* (which does not use the shared data flow libraries).
*/
/**
* Convenience-predicate for extracting two capture groups at once.
*/
bindingset[input, regexp]
private predicate regexpCaptureTwo(string input, string regexp, string capture1, string capture2) {
capture1 = input.regexpCapture(regexp, 1) and
capture2 = input.regexpCapture(regexp, 2)
}
/** Companion module to the `AccessPath` class. */
module AccessPath {
/** A string that should be parsed as an access path. */
abstract class Range extends string {
bindingset[this]
Range() { any() }
}
/**
* Parses an integer constant `n` or interval `n1..n2` (inclusive) and gets the value
* of the constant or any value contained in the interval.
*/
bindingset[arg]
int parseInt(string arg) {
result = arg.toInt()
or
// Match "n1..n2"
exists(string lo, string hi |
regexpCaptureTwo(arg, "(-?\\d+)\\.\\.(-?\\d+)", lo, hi) and
result = [lo.toInt() .. hi.toInt()]
)
}
/**
* Parses a lower-bounded interval `n..` and gets the lower bound.
*/
bindingset[arg]
int parseLowerBound(string arg) { result = arg.regexpCapture("(-?\\d+)\\.\\.", 1).toInt() }
/**
* Parses an integer constant or interval (bounded or unbounded) that explicitly
* references the arity, such as `N-1` or `N-3..N-1`.
*
* Note that expressions of form `N-x` will never resolve to a negative index,
* even if `N` is zero (it will have no result in that case).
*/
bindingset[arg, arity]
private int parseIntWithExplicitArity(string arg, int arity) {
result >= 0 and // do not allow N-1 to resolve to a negative index
exists(string lo |
// N-x
lo = arg.regexpCapture("N-(\\d+)", 1) and
result = arity - lo.toInt()
or
// N-x..
lo = arg.regexpCapture("N-(\\d+)\\.\\.", 1) and
result = [arity - lo.toInt(), arity - 1]
)
or
exists(string lo, string hi |
// x..N-y
regexpCaptureTwo(arg, "(-?\\d+)\\.\\.N-(\\d+)", lo, hi) and
result = [lo.toInt() .. arity - hi.toInt()]
or
// N-x..N-y
regexpCaptureTwo(arg, "N-(\\d+)\\.\\.N-(\\d+)", lo, hi) and
result = [arity - lo.toInt() .. arity - hi.toInt()] and
result >= 0
or
// N-x..y
regexpCaptureTwo(arg, "N-(\\d+)\\.\\.(\\d+)", lo, hi) and
result = [arity - lo.toInt() .. hi.toInt()] and
result >= 0
)
}
/**
* Parses an integer constant or interval (bounded or unbounded) and gets any
* of the integers contained within (of which there may be infinitely many).
*
* Has no result for arguments involving an explicit arity, such as `N-1`.
*/
bindingset[arg, result]
int parseIntUnbounded(string arg) {
result = parseInt(arg)
or
result >= parseLowerBound(arg)
}
/**
* Parses an integer constant or interval (bounded or unbounded) that
* may reference the arity of a call, such as `N-1` or `N-3..N-1`.
*
* Note that expressions of form `N-x` will never resolve to a negative index,
* even if `N` is zero (it will have no result in that case).
*/
bindingset[arg, arity]
int parseIntWithArity(string arg, int arity) {
result = parseInt(arg)
or
result in [parseLowerBound(arg) .. arity - 1]
or
result = parseIntWithExplicitArity(arg, arity)
}
}
/** Gets the `n`th token on the access path as a string. */
private string getRawToken(AccessPath path, int n) {
// Avoid splitting by '.' since tokens may contain dots, e.g. `Field[foo.Bar.x]`.
// Instead use regexpFind to match valid tokens, and supplement with a final length
// check (in `AccessPath.hasSyntaxError`) to ensure all characters were included in a token.
result = path.regexpFind("\\w+(?:\\[[^\\]]*\\])?(?=\\.|$)", n, _)
}
/**
* A string that occurs as an access path (either identifying or input/output spec)
* which might be relevant for this database.
*/
class AccessPath extends string instanceof AccessPath::Range {
/** Holds if this string is not a syntactically valid access path. */
predicate hasSyntaxError() {
// If the lengths match, all characters must haven been included in a token
// or seen by the `.` lookahead pattern.
this != "" and
not this.length() = sum(int n | | getRawToken(this, n).length() + 1) - 1
}
/** Gets the `n`th token on the access path (if there are no syntax errors). */
AccessPathToken getToken(int n) {
result = getRawToken(this, n) and
not this.hasSyntaxError()
}
/** Gets the number of tokens on the path (if there are no syntax errors). */
int getNumToken() {
result = count(int n | exists(getRawToken(this, n))) and
not this.hasSyntaxError()
}
}
/**
* An access part token such as `Argument[1]` or `ReturnValue`, appearing in one or more access paths.
*/
class AccessPathToken extends string {
AccessPathToken() { this = getRawToken(_, _) }
private string getPart(int part) {
result = this.regexpCapture("([^\\[]+)(?:\\[([^\\]]*)\\])?", part)
}
/** Gets the name of the token, such as `Member` from `Member[x]` */
string getName() { result = this.getPart(1) }
/**
* Gets the argument list, such as `1,2` from `Member[1,2]`,
* or has no result if there are no arguments.
*/
string getArgumentList() { result = this.getPart(2) }
/** Gets the `n`th argument to this token, such as `x` or `y` from `Member[x,y]`. */
string getArgument(int n) { result = this.getArgumentList().splitAt(",", n).trim() }
/** Gets the `n`th argument to this `name` token, such as `x` or `y` from `Member[x,y]`. */
pragma[nomagic]
string getArgument(string name, int n) { name = this.getName() and result = this.getArgument(n) }
/** Gets an argument to this token, such as `x` or `y` from `Member[x,y]`. */
string getAnArgument() { result = this.getArgument(_) }
/** Gets an argument to this `name` token, such as `x` or `y` from `Member[x,y]`. */
string getAnArgument(string name) { result = this.getArgument(name, _) }
/** Gets the number of arguments to this token, such as 2 for `Member[x,y]` or zero for `ReturnValue`. */
int getNumArgument() { result = count(int n | exists(this.getArgument(n))) }
}

View File

@@ -36,7 +36,6 @@ private import python
private import DataFlowPublic private import DataFlowPublic
private import DataFlowPrivate private import DataFlowPrivate
private import FlowSummaryImpl as FlowSummaryImpl private import FlowSummaryImpl as FlowSummaryImpl
private import FlowSummaryImplSpecific as FlowSummaryImplSpecific
private import semmle.python.internal.CachedStages private import semmle.python.internal.CachedStages
private import semmle.python.dataflow.new.internal.TypeTracker::CallGraphConstruction as CallGraphConstruction private import semmle.python.dataflow.new.internal.TypeTracker::CallGraphConstruction as CallGraphConstruction
@@ -49,13 +48,13 @@ newtype TParameterPosition =
// since synthetic parameters are made for a synthetic summary callable, based on // since synthetic parameters are made for a synthetic summary callable, based on
// what Argument positions they have flow for, we need to make sure we have such // what Argument positions they have flow for, we need to make sure we have such
// parameter positions available. // parameter positions available.
FlowSummaryImplSpecific::ParsePositions::isParsedPositionalArgumentPosition(_, index) FlowSummaryImpl::ParsePositions::isParsedPositionalArgumentPosition(_, index)
} or } or
TKeywordParameterPosition(string name) { TKeywordParameterPosition(string name) {
name = any(Parameter p).getName() name = any(Parameter p).getName()
or or
// see comment for TPositionalParameterPosition // see comment for TPositionalParameterPosition
FlowSummaryImplSpecific::ParsePositions::isParsedKeywordArgumentPosition(_, name) FlowSummaryImpl::ParsePositions::isParsedKeywordArgumentPosition(_, name)
} or } or
TStarArgsParameterPosition(int index) { TStarArgsParameterPosition(int index) {
// since `.getPosition` does not work for `*args`, we need *args parameter positions // since `.getPosition` does not work for `*args`, we need *args parameter positions
@@ -136,13 +135,13 @@ newtype TArgumentPosition =
// since synthetic calls within a summarized callable could use a unique argument // since synthetic calls within a summarized callable could use a unique argument
// position, we need to ensure we make these available (these are specified as // position, we need to ensure we make these available (these are specified as
// parameters in the flow-summary spec) // parameters in the flow-summary spec)
FlowSummaryImplSpecific::ParsePositions::isParsedPositionalParameterPosition(_, index) FlowSummaryImpl::ParsePositions::isParsedPositionalParameterPosition(_, index)
} or } or
TKeywordArgumentPosition(string name) { TKeywordArgumentPosition(string name) {
exists(any(CallNode c).getArgByName(name)) exists(any(CallNode c).getArgByName(name))
or or
// see comment for TPositionalArgumentPosition // see comment for TPositionalArgumentPosition
FlowSummaryImplSpecific::ParsePositions::isParsedKeywordParameterPosition(_, name) FlowSummaryImpl::ParsePositions::isParsedKeywordParameterPosition(_, name)
} or } or
TStarArgsArgumentPosition(int index) { TStarArgsArgumentPosition(int index) {
exists(Call c | c.getPositionalArg(index) instanceof Starred) exists(Call c | c.getPositionalArg(index) instanceof Starred)
@@ -1559,12 +1558,15 @@ private class SummaryReturnNode extends FlowSummaryNode, ReturnNode {
} }
private class SummaryArgumentNode extends FlowSummaryNode, ArgumentNode { private class SummaryArgumentNode extends FlowSummaryNode, ArgumentNode {
private SummaryCall call_;
private ArgumentPosition pos_;
SummaryArgumentNode() { SummaryArgumentNode() {
FlowSummaryImpl::Private::summaryArgumentNode(_, this.getSummaryNode(), _) FlowSummaryImpl::Private::summaryArgumentNode(call_.getReceiver(), this.getSummaryNode(), _)
} }
override predicate argumentOf(DataFlowCall call, ArgumentPosition pos) { override predicate argumentOf(DataFlowCall call, ArgumentPosition pos) {
FlowSummaryImpl::Private::summaryArgumentNode(call, this.getSummaryNode(), pos) call = call_ and pos = pos_
} }
} }
@@ -1662,10 +1664,16 @@ private module OutNodes {
} }
private class SummaryOutNode extends FlowSummaryNode, OutNode { private class SummaryOutNode extends FlowSummaryNode, OutNode {
SummaryOutNode() { FlowSummaryImpl::Private::summaryOutNode(_, this.getSummaryNode(), _) } private SummaryCall call;
private ReturnKind kind_;
SummaryOutNode() {
FlowSummaryImpl::Private::summaryOutNode(call.getReceiver(), this.getSummaryNode(), kind_)
}
override DataFlowCall getCall(ReturnKind kind) { override DataFlowCall getCall(ReturnKind kind) {
FlowSummaryImpl::Private::summaryOutNode(result, this.getSummaryNode(), kind) result = call and
kind = kind_
} }
} }
} }

View File

@@ -1028,7 +1028,10 @@ predicate additionalLambdaFlowStep(Node nodeFrom, Node nodeTo, boolean preserves
* by default as a heuristic. * by default as a heuristic.
*/ */
predicate allowParameterReturnInSelf(ParameterNode p) { predicate allowParameterReturnInSelf(ParameterNode p) {
FlowSummaryImpl::Private::summaryAllowParameterReturnInSelf(p) exists(DataFlowCallable c, ParameterPosition pos |
p.(ParameterNodeImpl).isParameterOf(c, pos) and
FlowSummaryImpl::Private::summaryAllowParameterReturnInSelf(c.asLibraryCallable(), pos)
)
} }
/** An approximated `Content`. */ /** An approximated `Content`. */

View File

@@ -1,324 +0,0 @@
/**
* Provides Python specific classes and predicates for defining flow summaries.
*
* Flow summaries are defined for callables that are not extracted.
* Such callables go by different names in different parts of our codebase:
*
* - in `FlowSummary.qll`, which is user facing, they are called `SummarizedCallable`s.
* These contain summaries, implemented by the user via the predicates `propagatesFlow` and `propagatesFlowExt`.
*
* - in the data flow layer, they are called `LibraryCallable`s (as in the Ruby codebase).
* These are identified by strings and has predicates for finding calls to them.
*
* Having both extracted and non-extracted callables means that we now have three types of calls:
* - Extracted calls to extracted callables, either `NormalCall` or `SpecialCall`. These are handled by standard data flow.
* - Extracted calls to non-extracted callables, `LibraryCall`. These are handled by looking up the relevant summary when the
* global data flow graph is connected up via `getViableCallable`.
* - Non-extracted calls, `SummaryCall`. These are synthesised by the flow summary framework.
*
* The first two can be referred to as `ExtractedDataFlowCall`. In fact, `LibraryCall` is a subclass of `NormalCall`, where
* `getCallable` is set to `none()`. The member predicate `ExtractedDataFlowCall::getCallable` is _not_ the mechanism for
* call resolution in global data flow. That mechanism is `getViableCallable`.
* Resolving a call to a non-extracted callable goes via `LibraryCallable::getACall`, which may involve type tracking.
* To avoid that type tracking becomes mutually recursive with data flow, type tracking must use a call graph not including summaries.
* Type tracking sees the callgraph given by `ExtractedDataFlowCall::getACallable`.
*
* We do not support summaries of special methods via the special methods framework,
* the summary would have to identify the call.
*
* We might, while we still extract the standard library, want to support flow summaries of
* extracted callables, so that we can model part of the standard library with flow summaries.
* For this to work, we have be careful with the enclosing callable predicate.
*/
private import python
private import DataFlowPrivate
private import DataFlowPublic
private import DataFlowImplCommon
private import FlowSummaryImpl::Private
private import FlowSummaryImpl::Public
private import semmle.python.dataflow.new.FlowSummary as FlowSummary
/**
* A class of callables that are candidates for flow summary modeling.
*/
class SummarizedCallableBase = string;
/**
* A class of callables that are candidates for neutral modeling.
*/
class NeutralCallableBase = string;
/** View a `SummarizedCallable` as a `DataFlowCallable`. */
DataFlowCallable inject(SummarizedCallable c) { result.asLibraryCallable() = c }
/** Gets the parameter position of the instance parameter. */
ArgumentPosition callbackSelfParameterPosition() { none() } // disables implicit summary flow to `this` for callbacks
/** Gets the synthesized data-flow call for `receiver`. */
SummaryCall summaryDataFlowCall(SummaryNode receiver) { receiver = result.getReceiver() }
/** Gets the type of content `c`. */
DataFlowType getContentType(Content c) { any() }
/** Gets the type of the parameter at the given position. */
DataFlowType getParameterType(SummarizedCallable c, ParameterPosition pos) { any() }
/** Gets the return type of kind `rk` for callable `c`. */
bindingset[c, rk]
DataFlowType getReturnType(SummarizedCallable c, ReturnKind rk) { any() }
/**
* Gets the type of the parameter matching arguments at position `pos` in a
* synthesized call that targets a callback of type `t`.
*/
bindingset[t, pos]
DataFlowType getCallbackParameterType(DataFlowType t, ArgumentPosition pos) { any() }
/**
* Gets the return type of kind `rk` in a synthesized call that targets a
* callback of type `t`.
*/
DataFlowType getCallbackReturnType(DataFlowType t, ReturnKind rk) { any() }
/** Gets the type of synthetic global `sg`. */
DataFlowType getSyntheticGlobalType(SummaryComponent::SyntheticGlobal sg) { any() }
/**
* Holds if an external flow summary exists for `c` with input specification
* `input`, output specification `output`, kind `kind`, and provenance `provenance`.
*/
predicate summaryElement(
FlowSummary::SummarizedCallable c, string input, string output, string kind, string provenance
) {
exists(boolean preservesValue |
c.propagatesFlowExt(input, output, preservesValue) and
(if preservesValue = true then kind = "value" else kind = "taint") and
provenance = "manual"
)
}
/**
* Holds if a neutral model exists for `c` of kind `kind`
* and with provenance `provenance`.
* Note. Neutral models have not been implemented for Python.
*/
predicate neutralElement(NeutralCallableBase c, string kind, string provenance) { none() }
/**
* Gets the summary component for specification component `c`, if any.
*
* This covers all the Python-specific components of a flow summary.
*/
SummaryComponent interpretComponentSpecific(AccessPathToken c) {
c = "ListElement" and
result = FlowSummary::SummaryComponent::listElement()
or
c = "SetElement" and
result = FlowSummary::SummaryComponent::setElement()
or
exists(int index |
c.getAnArgument("TupleElement") = index.toString() and
result = FlowSummary::SummaryComponent::tupleElement(index)
)
or
exists(string key |
c.getAnArgument("DictionaryElement") = key and
result = FlowSummary::SummaryComponent::dictionaryElement(key)
)
or
c = "DictionaryElementAny" and
result = FlowSummary::SummaryComponent::dictionaryElementAny()
or
exists(string attr |
c.getAnArgument("Attribute") = attr and
result = FlowSummary::SummaryComponent::attribute(attr)
)
}
private string getContentSpecific(Content cs) {
cs = TListElementContent() and result = "ListElement"
or
cs = TSetElementContent() and result = "SetElement"
or
exists(int index |
cs = TTupleElementContent(index) and result = "TupleElement[" + index.toString() + "]"
)
or
exists(string key |
cs = TDictionaryElementContent(key) and result = "DictionaryElement[" + key + "]"
)
or
cs = TDictionaryElementAnyContent() and result = "DictionaryElementAny"
or
exists(string attr | cs = TAttributeContent(attr) and result = "Attribute[" + attr + "]")
}
/** Gets the textual representation of a summary component in the format used for MaD models. */
string getMadRepresentationSpecific(SummaryComponent sc) {
exists(Content c |
sc = TContentSummaryComponent(c) and
result = getContentSpecific(c)
)
}
/** Gets the textual representation of a parameter position in the format used for flow summaries. */
string getParameterPosition(ParameterPosition pos) {
pos.isSelf() and result = "self"
or
exists(int i |
pos.isPositional(i) and
result = i.toString()
)
or
exists(string name |
pos.isKeyword(name) and
result = name + ":"
)
}
/** Gets the textual representation of an argument position in the format used for flow summaries. */
string getArgumentPosition(ArgumentPosition pos) {
pos.isSelf() and result = "self"
or
exists(int i |
pos.isPositional(i) and
result = i.toString()
)
or
exists(string name |
pos.isKeyword(name) and
result = name + ":"
)
}
/** Holds if input specification component `c` needs a reference. */
predicate inputNeedsReferenceSpecific(string c) { none() }
/** Holds if output specification component `c` needs a reference. */
predicate outputNeedsReferenceSpecific(string c) { none() }
/** Gets the return kind corresponding to specification `"ReturnValue"`. */
ReturnKind getReturnValueKind() { any() }
/**
* All definitions in this module are required by the shared implementation
* (for source/sink interpretation), but they are unused for Python, where
* we rely on API graphs instead.
*/
private module UnusedSourceSinkInterpretation {
/**
* Holds if an external source specification exists for `n` with output specification
* `output`, kind `kind`, and provenance `provenance`.
*/
predicate sourceElement(AstNode n, string output, string kind, string provenance) { none() }
/**
* Holds if an external sink specification exists for `n` with input specification
* `input`, kind `kind` and provenance `provenance`.
*/
predicate sinkElement(AstNode n, string input, string kind, string provenance) { none() }
class SourceOrSinkElement = AstNode;
/** An entity used to interpret a source/sink specification. */
class InterpretNode extends AstNode_ {
// InterpretNode is going away, this is just a dummy implementation.
// However, we have some old location tests picking them up, so we
// explicitly define them to not exist.
InterpretNode() { none() }
/** Gets the element that this node corresponds to, if any. */
SourceOrSinkElement asElement() { none() }
/** Gets the data-flow node that this node corresponds to, if any. */
Node asNode() { none() }
/** Gets the call that this node corresponds to, if any. */
DataFlowCall asCall() { none() }
/** Gets the callable that this node corresponds to, if any. */
DataFlowCallable asCallable() { none() }
/** Gets the target of this call, if any. */
SourceOrSinkElement getCallTarget() { none() }
}
/** Provides additional sink specification logic. */
predicate interpretOutputSpecific(string c, InterpretNode mid, InterpretNode node) { none() }
/** Provides additional source specification logic. */
predicate interpretInputSpecific(string c, InterpretNode mid, InterpretNode node) { none() }
}
import UnusedSourceSinkInterpretation
module ParsePositions {
private import FlowSummaryImpl
private predicate isParamBody(string body) {
exists(AccessPathToken tok |
tok.getName() = "Parameter" and
body = tok.getAnArgument()
)
}
private predicate isArgBody(string body) {
exists(AccessPathToken tok |
tok.getName() = "Argument" and
body = tok.getAnArgument()
)
}
predicate isParsedPositionalParameterPosition(string c, int i) {
isParamBody(c) and
i = AccessPath::parseInt(c)
}
predicate isParsedKeywordParameterPosition(string c, string paramName) {
isParamBody(c) and
c = paramName + ":"
}
predicate isParsedPositionalArgumentPosition(string c, int i) {
isArgBody(c) and
i = AccessPath::parseInt(c)
}
predicate isParsedKeywordArgumentPosition(string c, string argName) {
isArgBody(c) and
c = argName + ":"
}
}
/** Gets the argument position obtained by parsing `X` in `Parameter[X]`. */
ArgumentPosition parseParamBody(string s) {
exists(int i |
ParsePositions::isParsedPositionalParameterPosition(s, i) and
result.isPositional(i)
)
or
exists(string name |
ParsePositions::isParsedKeywordParameterPosition(s, name) and
result.isKeyword(name)
)
or
s = "self" and
result.isSelf()
}
/** Gets the parameter position obtained by parsing `X` in `Argument[X]`. */
ParameterPosition parseArgBody(string s) {
exists(int i |
ParsePositions::isParsedPositionalArgumentPosition(s, i) and
result.isPositional(i)
)
or
exists(string name |
ParsePositions::isParsedKeywordArgumentPosition(s, name) and
result.isKeyword(name)
)
or
s = "self" and
result.isSelf()
}

View File

@@ -177,7 +177,7 @@ class Boolean extends boolean {
} }
private import SummaryTypeTracker as SummaryTypeTracker private import SummaryTypeTracker as SummaryTypeTracker
private import semmle.python.dataflow.new.FlowSummary as FlowSummary private import semmle.python.dataflow.new.internal.FlowSummaryImpl as FlowSummaryImpl
private import semmle.python.dataflow.new.internal.DataFlowDispatch as DataFlowDispatch private import semmle.python.dataflow.new.internal.DataFlowDispatch as DataFlowDispatch
pragma[noinline] pragma[noinline]
@@ -205,30 +205,30 @@ private module SummaryTypeTrackerInput implements SummaryTypeTracker::Input {
TypeTrackerContentFilter getFilterFromWithContentStep(TypeTrackerContent content) { none() } TypeTrackerContentFilter getFilterFromWithContentStep(TypeTrackerContent content) { none() }
// Callables // Callables
class SummarizedCallable = FlowSummary::SummarizedCallable; class SummarizedCallable = FlowSummaryImpl::Private::SummarizedCallableImpl;
// Summaries and their stacks // Summaries and their stacks
class SummaryComponent = FlowSummary::SummaryComponent; class SummaryComponent = FlowSummaryImpl::Private::SummaryComponent;
class SummaryComponentStack = FlowSummary::SummaryComponentStack; class SummaryComponentStack = FlowSummaryImpl::Private::SummaryComponentStack;
predicate singleton = FlowSummary::SummaryComponentStack::singleton/1; predicate singleton = FlowSummaryImpl::Private::SummaryComponentStack::singleton/1;
predicate push = FlowSummary::SummaryComponentStack::push/2; predicate push = FlowSummaryImpl::Private::SummaryComponentStack::push/2;
// Relating content to summaries // Relating content to summaries
predicate content = FlowSummary::SummaryComponent::content/1; predicate content = FlowSummaryImpl::Private::SummaryComponent::content/1;
SummaryComponent withoutContent(TypeTrackerContent contents) { none() } SummaryComponent withoutContent(TypeTrackerContent contents) { none() }
SummaryComponent withContent(TypeTrackerContent contents) { none() } SummaryComponent withContent(TypeTrackerContent contents) { none() }
predicate return = FlowSummary::SummaryComponent::return/0; predicate return = FlowSummaryImpl::Private::SummaryComponent::return/0;
// Relating nodes to summaries // Relating nodes to summaries
Node argumentOf(Node call, SummaryComponent arg, boolean isPostUpdate) { Node argumentOf(Node call, SummaryComponent arg, boolean isPostUpdate) {
exists(DataFlowDispatch::ParameterPosition pos | exists(DataFlowDispatch::ParameterPosition pos |
arg = FlowSummary::SummaryComponent::argument(pos) and arg = FlowSummaryImpl::Private::SummaryComponent::argument(pos) and
argumentPositionMatch(call, result, pos) and argumentPositionMatch(call, result, pos) and
isPostUpdate = [false, true] // todo: implement when/if Python uses post-update nodes in type tracking isPostUpdate = [false, true] // todo: implement when/if Python uses post-update nodes in type tracking
) )
@@ -238,7 +238,7 @@ private module SummaryTypeTrackerInput implements SummaryTypeTracker::Input {
exists( exists(
DataFlowDispatch::ArgumentPosition apos, DataFlowDispatch::ParameterPosition ppos, Parameter p DataFlowDispatch::ArgumentPosition apos, DataFlowDispatch::ParameterPosition ppos, Parameter p
| |
param = FlowSummary::SummaryComponent::parameter(apos) and param = FlowSummaryImpl::Private::SummaryComponent::parameter(apos) and
DataFlowDispatch::parameterMatch(ppos, apos) and DataFlowDispatch::parameterMatch(ppos, apos) and
result.asCfgNode().getNode() = p and result.asCfgNode().getNode() = p and
( (
@@ -254,14 +254,16 @@ private module SummaryTypeTrackerInput implements SummaryTypeTracker::Input {
} }
Node returnOf(Node callable, SummaryComponent return) { Node returnOf(Node callable, SummaryComponent return) {
return = FlowSummary::SummaryComponent::return() and return = FlowSummaryImpl::Private::SummaryComponent::return() and
// `result` should be the return value of a callable expression (lambda or function) referenced by `callable` // `result` should be the return value of a callable expression (lambda or function) referenced by `callable`
result.asCfgNode() = result.asCfgNode() =
callable.getALocalSource().asExpr().(CallableExpr).getInnerScope().getAReturnValueFlowNode() callable.getALocalSource().asExpr().(CallableExpr).getInnerScope().getAReturnValueFlowNode()
} }
// Relating callables to nodes // Relating callables to nodes
Node callTo(SummarizedCallable callable) { result = callable.getACallSimple() } Node callTo(SummarizedCallable callable) {
result = callable.(DataFlowDispatch::LibraryCallable).getACallSimple()
}
} }
private module TypeTrackerSummaryFlow = SummaryTypeTracker::SummaryFlow<SummaryTypeTrackerInput>; private module TypeTrackerSummaryFlow = SummaryTypeTracker::SummaryFlow<SummaryTypeTrackerInput>;

View File

@@ -624,7 +624,7 @@ module Flask {
.getAValueReachableFromSource() .getAValueReachableFromSource()
} }
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { override predicate propagatesFlow(string input, string output, boolean preservesValue) {
input = "Argument[0]" and input = "Argument[0]" and
output = "ReturnValue" and output = "ReturnValue" and
preservesValue = false preservesValue = false
@@ -650,7 +650,7 @@ module Flask {
.getAValueReachableFromSource() .getAValueReachableFromSource()
} }
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { override predicate propagatesFlow(string input, string output, boolean preservesValue) {
input = "Argument[0]" and input = "Argument[0]" and
// Technically it's `Iterator[str]`, but list will do :) // Technically it's `Iterator[str]`, but list will do :)
output = "ReturnValue.ListElement" and output = "ReturnValue.ListElement" and

View File

@@ -3085,7 +3085,7 @@ private module StdlibPrivate {
result = API::moduleImport("re").getMember("compile").getAValueReachableFromSource() result = API::moduleImport("re").getMember("compile").getAValueReachableFromSource()
} }
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { override predicate propagatesFlow(string input, string output, boolean preservesValue) {
input in ["Argument[0]", "Argument[pattern:]"] and input in ["Argument[0]", "Argument[pattern:]"] and
output = "ReturnValue.Attribute[pattern]" and output = "ReturnValue.Attribute[pattern]" and
preservesValue = true preservesValue = true
@@ -3116,7 +3116,7 @@ private module StdlibPrivate {
override DataFlow::ArgumentNode getACallback() { none() } override DataFlow::ArgumentNode getACallback() { none() }
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { override predicate propagatesFlow(string input, string output, boolean preservesValue) {
exists(string arg | exists(string arg |
this = "re.Match" and arg = "Argument[1]" this = "re.Match" and arg = "Argument[1]"
or or
@@ -3173,7 +3173,7 @@ private module StdlibPrivate {
override DataFlow::ArgumentNode getACallback() { none() } override DataFlow::ArgumentNode getACallback() { none() }
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { override predicate propagatesFlow(string input, string output, boolean preservesValue) {
methodName = "expand" and methodName = "expand" and
preservesValue = false and preservesValue = false and
( (
@@ -3229,7 +3229,7 @@ private module StdlibPrivate {
override DataFlow::ArgumentNode getACallback() { none() } override DataFlow::ArgumentNode getACallback() { none() }
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { override predicate propagatesFlow(string input, string output, boolean preservesValue) {
exists(int offset | exists(int offset |
// for non-compiled regex the first argument is the pattern, so we need to // for non-compiled regex the first argument is the pattern, so we need to
// account for this difference // account for this difference
@@ -4079,7 +4079,7 @@ private module StdlibPrivate {
result = API::builtin("dict").getAValueReachableFromSource() result = API::builtin("dict").getAValueReachableFromSource()
} }
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { override predicate propagatesFlow(string input, string output, boolean preservesValue) {
exists(DataFlow::DictionaryElementContent dc, string key | key = dc.getKey() | exists(DataFlow::DictionaryElementContent dc, string key | key = dc.getKey() |
input = "Argument[0].DictionaryElement[" + key + "]" and input = "Argument[0].DictionaryElement[" + key + "]" and
output = "ReturnValue.DictionaryElement[" + key + "]" and output = "ReturnValue.DictionaryElement[" + key + "]" and
@@ -4108,7 +4108,7 @@ private module StdlibPrivate {
result = API::builtin("list").getAValueReachableFromSource() result = API::builtin("list").getAValueReachableFromSource()
} }
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { override predicate propagatesFlow(string input, string output, boolean preservesValue) {
( (
input = "Argument[0].ListElement" input = "Argument[0].ListElement"
or or
@@ -4138,7 +4138,7 @@ private module StdlibPrivate {
result = API::builtin("tuple").getAValueReachableFromSource() result = API::builtin("tuple").getAValueReachableFromSource()
} }
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { override predicate propagatesFlow(string input, string output, boolean preservesValue) {
exists(DataFlow::TupleElementContent tc, int i | i = tc.getIndex() | exists(DataFlow::TupleElementContent tc, int i | i = tc.getIndex() |
input = "Argument[0].TupleElement[" + i.toString() + "]" and input = "Argument[0].TupleElement[" + i.toString() + "]" and
output = "ReturnValue.TupleElement[" + i.toString() + "]" and output = "ReturnValue.TupleElement[" + i.toString() + "]" and
@@ -4163,7 +4163,7 @@ private module StdlibPrivate {
result = API::builtin("set").getAValueReachableFromSource() result = API::builtin("set").getAValueReachableFromSource()
} }
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { override predicate propagatesFlow(string input, string output, boolean preservesValue) {
( (
input = "Argument[0].ListElement" input = "Argument[0].ListElement"
or or
@@ -4193,8 +4193,8 @@ private module StdlibPrivate {
result = API::builtin("frozenset").getAValueReachableFromSource() result = API::builtin("frozenset").getAValueReachableFromSource()
} }
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { override predicate propagatesFlow(string input, string output, boolean preservesValue) {
any(SetSummary s).propagatesFlowExt(input, output, preservesValue) any(SetSummary s).propagatesFlow(input, output, preservesValue)
} }
} }
@@ -4211,7 +4211,7 @@ private module StdlibPrivate {
result = API::builtin("reversed").getAValueReachableFromSource() result = API::builtin("reversed").getAValueReachableFromSource()
} }
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { override predicate propagatesFlow(string input, string output, boolean preservesValue) {
( (
input = "Argument[0].ListElement" input = "Argument[0].ListElement"
or or
@@ -4241,7 +4241,7 @@ private module StdlibPrivate {
result = API::builtin("sorted").getAValueReachableFromSource() result = API::builtin("sorted").getAValueReachableFromSource()
} }
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { override predicate propagatesFlow(string input, string output, boolean preservesValue) {
exists(string content | exists(string content |
content = "ListElement" content = "ListElement"
or or
@@ -4273,7 +4273,7 @@ private module StdlibPrivate {
result = API::builtin("iter").getAValueReachableFromSource() result = API::builtin("iter").getAValueReachableFromSource()
} }
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { override predicate propagatesFlow(string input, string output, boolean preservesValue) {
( (
input = "Argument[0].ListElement" input = "Argument[0].ListElement"
or or
@@ -4303,7 +4303,7 @@ private module StdlibPrivate {
result = API::builtin("next").getAValueReachableFromSource() result = API::builtin("next").getAValueReachableFromSource()
} }
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { override predicate propagatesFlow(string input, string output, boolean preservesValue) {
( (
input = "Argument[0].ListElement" input = "Argument[0].ListElement"
or or
@@ -4336,7 +4336,7 @@ private module StdlibPrivate {
override DataFlow::ArgumentNode getACallback() { none() } override DataFlow::ArgumentNode getACallback() { none() }
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { override predicate propagatesFlow(string input, string output, boolean preservesValue) {
exists(string content | exists(string content |
content = "ListElement" content = "ListElement"
or or
@@ -4378,7 +4378,7 @@ private module StdlibPrivate {
override DataFlow::ArgumentNode getACallback() { none() } override DataFlow::ArgumentNode getACallback() { none() }
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { override predicate propagatesFlow(string input, string output, boolean preservesValue) {
input = "Argument[self].ListElement" and input = "Argument[self].ListElement" and
output = "ReturnValue" and output = "ReturnValue" and
preservesValue = true preservesValue = true
@@ -4415,7 +4415,7 @@ private module StdlibPrivate {
override DataFlow::ArgumentNode getACallback() { none() } override DataFlow::ArgumentNode getACallback() { none() }
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { override predicate propagatesFlow(string input, string output, boolean preservesValue) {
input = "Argument[self].DictionaryElement[" + key + "]" and input = "Argument[self].DictionaryElement[" + key + "]" and
output = "ReturnValue" and output = "ReturnValue" and
preservesValue = true preservesValue = true
@@ -4438,7 +4438,7 @@ private module StdlibPrivate {
override DataFlow::ArgumentNode getACallback() { none() } override DataFlow::ArgumentNode getACallback() { none() }
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { override predicate propagatesFlow(string input, string output, boolean preservesValue) {
input = "Argument[self].DictionaryElement[" + key + "]" and input = "Argument[self].DictionaryElement[" + key + "]" and
output = "ReturnValue" and output = "ReturnValue" and
preservesValue = true preservesValue = true
@@ -4460,7 +4460,7 @@ private module StdlibPrivate {
override DataFlow::ArgumentNode getACallback() { none() } override DataFlow::ArgumentNode getACallback() { none() }
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { override predicate propagatesFlow(string input, string output, boolean preservesValue) {
// default value // default value
input = "Argument[1]" and input = "Argument[1]" and
output = "ReturnValue" and output = "ReturnValue" and
@@ -4483,7 +4483,7 @@ private module StdlibPrivate {
override DataFlow::ArgumentNode getACallback() { none() } override DataFlow::ArgumentNode getACallback() { none() }
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { override predicate propagatesFlow(string input, string output, boolean preservesValue) {
exists(DataFlow::DictionaryElementContent dc, string key | key = dc.getKey() | exists(DataFlow::DictionaryElementContent dc, string key | key = dc.getKey() |
input = "Argument[self].DictionaryElement[" + key + "]" and input = "Argument[self].DictionaryElement[" + key + "]" and
output = "ReturnValue.TupleElement[1]" and output = "ReturnValue.TupleElement[1]" and
@@ -4509,7 +4509,7 @@ private module StdlibPrivate {
result.(DataFlow::AttrRead).getAttributeName() = "setdefault" result.(DataFlow::AttrRead).getAttributeName() = "setdefault"
} }
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { override predicate propagatesFlow(string input, string output, boolean preservesValue) {
// store/read steps with dictionary content of this is modeled in DataFlowPrivate // store/read steps with dictionary content of this is modeled in DataFlowPrivate
input = "Argument[1]" and input = "Argument[1]" and
output = "ReturnValue" and output = "ReturnValue" and
@@ -4538,7 +4538,7 @@ private module StdlibPrivate {
override DataFlow::ArgumentNode getACallback() { none() } override DataFlow::ArgumentNode getACallback() { none() }
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { override predicate propagatesFlow(string input, string output, boolean preservesValue) {
// If key is in the dictionary, return its value. // If key is in the dictionary, return its value.
input = "Argument[self].DictionaryElement[" + key + "]" and input = "Argument[self].DictionaryElement[" + key + "]" and
output = "ReturnValue" and output = "ReturnValue" and
@@ -4567,7 +4567,7 @@ private module StdlibPrivate {
result.(DataFlow::AttrRead).getAttributeName() = "values" result.(DataFlow::AttrRead).getAttributeName() = "values"
} }
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { override predicate propagatesFlow(string input, string output, boolean preservesValue) {
exists(DataFlow::DictionaryElementContent dc, string key | key = dc.getKey() | exists(DataFlow::DictionaryElementContent dc, string key | key = dc.getKey() |
input = "Argument[self].DictionaryElement[" + key + "]" and input = "Argument[self].DictionaryElement[" + key + "]" and
output = "ReturnValue.ListElement" and output = "ReturnValue.ListElement" and
@@ -4594,7 +4594,7 @@ private module StdlibPrivate {
result.(DataFlow::AttrRead).getAttributeName() = "keys" result.(DataFlow::AttrRead).getAttributeName() = "keys"
} }
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { override predicate propagatesFlow(string input, string output, boolean preservesValue) {
// TODO: Once we have DictKeyContent, we need to transform that into ListElementContent // TODO: Once we have DictKeyContent, we need to transform that into ListElementContent
input = "Argument[self]" and input = "Argument[self]" and
output = "ReturnValue" and output = "ReturnValue" and
@@ -4618,7 +4618,7 @@ private module StdlibPrivate {
result.(DataFlow::AttrRead).getAttributeName() = "items" result.(DataFlow::AttrRead).getAttributeName() = "items"
} }
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { override predicate propagatesFlow(string input, string output, boolean preservesValue) {
exists(DataFlow::DictionaryElementContent dc, string key | key = dc.getKey() | exists(DataFlow::DictionaryElementContent dc, string key | key = dc.getKey() |
input = "Argument[self].DictionaryElement[" + key + "]" and input = "Argument[self].DictionaryElement[" + key + "]" and
output = "ReturnValue.ListElement.TupleElement[1]" and output = "ReturnValue.ListElement.TupleElement[1]" and
@@ -4648,7 +4648,7 @@ private module StdlibPrivate {
result.(DataFlow::AttrRead).getAttributeName() = "append" result.(DataFlow::AttrRead).getAttributeName() = "append"
} }
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { override predicate propagatesFlow(string input, string output, boolean preservesValue) {
// newly added element added to this // newly added element added to this
input = "Argument[0]" and input = "Argument[0]" and
output = "Argument[self].ListElement" and output = "Argument[self].ListElement" and
@@ -4675,7 +4675,7 @@ private module StdlibPrivate {
result.(DataFlow::AttrRead).getAttributeName() = "add" result.(DataFlow::AttrRead).getAttributeName() = "add"
} }
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { override predicate propagatesFlow(string input, string output, boolean preservesValue) {
// newly added element added to this // newly added element added to this
input = "Argument[0]" and input = "Argument[0]" and
output = "Argument[self].SetElement" and output = "Argument[self].SetElement" and
@@ -4705,7 +4705,7 @@ private module StdlibPrivate {
API::moduleImport("os").getMember(["getenv", "getenvb"]).getAValueReachableFromSource() API::moduleImport("os").getMember(["getenv", "getenvb"]).getAValueReachableFromSource()
} }
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { override predicate propagatesFlow(string input, string output, boolean preservesValue) {
input in ["Argument[1]", "Argument[default:]"] and input in ["Argument[1]", "Argument[default:]"] and
output = "ReturnValue" and output = "ReturnValue" and
preservesValue = true preservesValue = true

View File

@@ -70,8 +70,8 @@ private module API = Specific::API;
private module DataFlow = Specific::DataFlow; private module DataFlow = Specific::DataFlow;
private import Specific::AccessPathSyntax
private import ApiGraphModelsExtensions as Extensions private import ApiGraphModelsExtensions as Extensions
import codeql.dataflow.internal.AccessPathSyntax
/** Module containing hooks for providing input data to be interpreted as a model. */ /** Module containing hooks for providing input data to be interpreted as a model. */
module ModelInput { module ModelInput {
@@ -327,29 +327,29 @@ predicate isRelevantFullPath(string type, string path) {
} }
/** A string from a CSV row that should be parsed as an access path. */ /** A string from a CSV row that should be parsed as an access path. */
private class AccessPathRange extends AccessPath::Range { private predicate accessPathRange(string s) {
AccessPathRange() { isRelevantFullPath(_, s)
isRelevantFullPath(_, this) or
or exists(string type | isRelevantType(type) |
exists(string type | isRelevantType(type) | summaryModel(type, _, s, _, _) or
summaryModel(type, _, this, _, _) or summaryModel(type, _, _, s, _)
summaryModel(type, _, _, this, _) )
) or
or typeVariableModel(_, s)
typeVariableModel(_, this)
}
} }
import AccessPath<accessPathRange/1>
/** /**
* Gets a successor of `node` in the API graph. * Gets a successor of `node` in the API graph.
*/ */
bindingset[token] bindingset[token]
API::Node getSuccessorFromNode(API::Node node, AccessPathToken token) { API::Node getSuccessorFromNode(API::Node node, AccessPathTokenBase token) {
// API graphs use the same label for arguments and parameters. An edge originating from a // API graphs use the same label for arguments and parameters. An edge originating from a
// use-node represents an argument, and an edge originating from a def-node represents a parameter. // use-node represents an argument, and an edge originating from a def-node represents a parameter.
// We just map both to the same thing. // We just map both to the same thing.
token.getName() = ["Argument", "Parameter"] and token.getName() = ["Argument", "Parameter"] and
result = node.getParameter(AccessPath::parseIntUnbounded(token.getAnArgument())) result = node.getParameter(parseIntUnbounded(token.getAnArgument()))
or or
token.getName() = "ReturnValue" and token.getName() = "ReturnValue" and
result = node.getReturn() result = node.getReturn()
@@ -362,11 +362,9 @@ API::Node getSuccessorFromNode(API::Node node, AccessPathToken token) {
* Gets an API-graph successor for the given invocation. * Gets an API-graph successor for the given invocation.
*/ */
bindingset[token] bindingset[token]
API::Node getSuccessorFromInvoke(Specific::InvokeNode invoke, AccessPathToken token) { API::Node getSuccessorFromInvoke(Specific::InvokeNode invoke, AccessPathTokenBase token) {
token.getName() = "Argument" and token.getName() = "Argument" and
result = result = invoke.getParameter(parseIntWithArity(token.getAnArgument(), invoke.getNumArgument()))
invoke
.getParameter(AccessPath::parseIntWithArity(token.getAnArgument(), invoke.getNumArgument()))
or or
token.getName() = "ReturnValue" and token.getName() = "ReturnValue" and
result = invoke.getReturn() result = invoke.getReturn()
@@ -378,10 +376,12 @@ API::Node getSuccessorFromInvoke(Specific::InvokeNode invoke, AccessPathToken to
/** /**
* Holds if `invoke` invokes a call-site filter given by `token`. * Holds if `invoke` invokes a call-site filter given by `token`.
*/ */
pragma[inline] bindingset[token]
private predicate invocationMatchesCallSiteFilter(Specific::InvokeNode invoke, AccessPathToken token) { private predicate invocationMatchesCallSiteFilter(
Specific::InvokeNode invoke, AccessPathTokenBase token
) {
token.getName() = "WithArity" and token.getName() = "WithArity" and
invoke.getNumArgument() = AccessPath::parseIntUnbounded(token.getAnArgument()) invoke.getNumArgument() = parseIntUnbounded(token.getAnArgument())
or or
Specific::invocationMatchesExtraCallSiteFilter(invoke, token) Specific::invocationMatchesExtraCallSiteFilter(invoke, token)
} }

View File

@@ -4,14 +4,14 @@
* It must export the following members: * It must export the following members:
* ```ql * ```ql
* class Unit // a unit type * class Unit // a unit type
* module AccessPathSyntax // a re-export of the AccessPathSyntax module *
* class InvokeNode // a type representing an invocation connected to the API graph * class InvokeNode // a type representing an invocation connected to the API graph
* module API // the API graph module * module API // the API graph module
* predicate isPackageUsed(string package) * predicate isPackageUsed(string package)
* API::Node getExtraNodeFromPath(string package, string type, string path, int n) * API::Node getExtraNodeFromPath(string package, string type, string path, int n)
* API::Node getExtraSuccessorFromNode(API::Node node, AccessPathToken token) * API::Node getExtraSuccessorFromNode(API::Node node, AccessPathTokenBase token)
* API::Node getExtraSuccessorFromInvoke(API::InvokeNode node, AccessPathToken token) * API::Node getExtraSuccessorFromInvoke(API::InvokeNode node, AccessPathTokenBase token)
* predicate invocationMatchesExtraCallSiteFilter(API::InvokeNode invoke, AccessPathToken token) * predicate invocationMatchesExtraCallSiteFilter(API::InvokeNode invoke, AccessPathTokenBase token)
* InvokeNode getAnInvocationOf(API::Node node) * InvokeNode getAnInvocationOf(API::Node node)
* predicate isExtraValidTokenNameInIdentifyingAccessPath(string name) * predicate isExtraValidTokenNameInIdentifyingAccessPath(string name)
* predicate isExtraValidNoArgumentTokenInIdentifyingAccessPath(string name) * predicate isExtraValidNoArgumentTokenInIdentifyingAccessPath(string name)
@@ -23,9 +23,7 @@ private import python as PY
private import ApiGraphModels private import ApiGraphModels
import semmle.python.ApiGraphs::API as API import semmle.python.ApiGraphs::API as API
// Re-export libraries needed by ApiGraphModels.qll // Re-export libraries needed by ApiGraphModels.qll
import semmle.python.dataflow.new.internal.AccessPathSyntax as AccessPathSyntax
import semmle.python.dataflow.new.DataFlow::DataFlow as DataFlow import semmle.python.dataflow.new.DataFlow::DataFlow as DataFlow
private import AccessPathSyntax
/** /**
* Holds if models describing `type` may be relevant for the analysis of this database. * Holds if models describing `type` may be relevant for the analysis of this database.
@@ -49,7 +47,7 @@ API::Node getExtraNodeFromType(string type) { result = API::moduleImport(type) }
* Gets a Python-specific API graph successor of `node` reachable by resolving `token`. * Gets a Python-specific API graph successor of `node` reachable by resolving `token`.
*/ */
bindingset[token] bindingset[token]
API::Node getExtraSuccessorFromNode(API::Node node, AccessPathToken token) { API::Node getExtraSuccessorFromNode(API::Node node, AccessPathTokenBase token) {
token.getName() = "Member" and token.getName() = "Member" and
result = node.getMember(token.getAnArgument()) result = node.getMember(token.getAnArgument())
or or
@@ -89,7 +87,7 @@ API::Node getExtraSuccessorFromNode(API::Node node, AccessPathToken token) {
* Gets a Python-specific API graph successor of `node` reachable by resolving `token`. * Gets a Python-specific API graph successor of `node` reachable by resolving `token`.
*/ */
bindingset[token] bindingset[token]
API::Node getExtraSuccessorFromInvoke(API::CallNode node, AccessPathToken token) { API::Node getExtraSuccessorFromInvoke(API::CallNode node, AccessPathTokenBase token) {
token.getName() = "Instance" and token.getName() = "Instance" and
result = node.getReturn() result = node.getReturn()
or or
@@ -129,7 +127,7 @@ API::Node getAFuzzySuccessor(API::Node node) {
* Holds if `invoke` matches the PY-specific call site filter in `token`. * Holds if `invoke` matches the PY-specific call site filter in `token`.
*/ */
bindingset[token] bindingset[token]
predicate invocationMatchesExtraCallSiteFilter(API::CallNode invoke, AccessPathToken token) { predicate invocationMatchesExtraCallSiteFilter(API::CallNode invoke, AccessPathTokenBase token) {
token.getName() = "Call" and exists(invoke) // there is only one kind of call in Python. token.getName() = "Call" and exists(invoke) // there is only one kind of call in Python.
} }

View File

@@ -3,6 +3,6 @@ import semmle.python.dataflow.new.FlowSummary
import semmle.python.dataflow.new.internal.FlowSummaryImpl import semmle.python.dataflow.new.internal.FlowSummaryImpl
query predicate invalidSpecComponent(SummarizedCallable sc, string s, string c) { query predicate invalidSpecComponent(SummarizedCallable sc, string s, string c) {
(sc.propagatesFlowExt(s, _, _) or sc.propagatesFlowExt(_, s, _)) and (sc.propagatesFlow(s, _, _) or sc.propagatesFlow(_, s, _)) and
Private::External::invalidSpecComponent(s, c) Private::External::invalidSpecComponent(s, c)
} }

View File

@@ -4,7 +4,7 @@ import semmle.python.dataflow.new.internal.FlowSummaryImpl
from SummarizedCallable sc, string s, string c, string attr from SummarizedCallable sc, string s, string c, string attr
where where
(sc.propagatesFlowExt(s, _, _) or sc.propagatesFlowExt(_, s, _)) and (sc.propagatesFlow(s, _, _) or sc.propagatesFlow(_, s, _)) and
Private::External::invalidSpecComponent(s, c) and Private::External::invalidSpecComponent(s, c) and
c = "Attribute[" + attr + "]" c = "Attribute[" + attr + "]"
select "The attribute \"" + attr + select "The attribute \"" + attr +

View File

@@ -18,6 +18,10 @@ module RecursionGuard {
(TT::callStep(_, _) implies any()) (TT::callStep(_, _) implies any())
} }
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
none()
}
override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this } override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this }
} }
} }
@@ -31,7 +35,7 @@ private class SummarizedCallableIdentity extends SummarizedCallable {
override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this } override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this }
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { override predicate propagatesFlow(string input, string output, boolean preservesValue) {
input = "Argument[0]" and input = "Argument[0]" and
output = "ReturnValue" and output = "ReturnValue" and
preservesValue = true preservesValue = true
@@ -48,7 +52,7 @@ private class SummarizedCallableApplyLambda extends SummarizedCallable {
override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this } override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this }
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { override predicate propagatesFlow(string input, string output, boolean preservesValue) {
input = "Argument[1]" and input = "Argument[1]" and
output = "Argument[0].Parameter[0]" and output = "Argument[0].Parameter[0]" and
preservesValue = true preservesValue = true
@@ -68,7 +72,7 @@ private class SummarizedCallableReversed extends SummarizedCallable {
override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this } override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this }
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { override predicate propagatesFlow(string input, string output, boolean preservesValue) {
input = "Argument[0].ListElement" and input = "Argument[0].ListElement" and
output = "ReturnValue.ListElement" and output = "ReturnValue.ListElement" and
preservesValue = true preservesValue = true
@@ -84,7 +88,7 @@ private class SummarizedCallableMap extends SummarizedCallable {
override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this } override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this }
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { override predicate propagatesFlow(string input, string output, boolean preservesValue) {
input = "Argument[1].ListElement" and input = "Argument[1].ListElement" and
output = "Argument[0].Parameter[0]" and output = "Argument[0].Parameter[0]" and
preservesValue = true preservesValue = true
@@ -104,7 +108,7 @@ private class SummarizedCallableAppend extends SummarizedCallable {
override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this } override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this }
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { override predicate propagatesFlow(string input, string output, boolean preservesValue) {
input = "Argument[0]" and input = "Argument[0]" and
output = "ReturnValue" and output = "ReturnValue" and
preservesValue = false preservesValue = false
@@ -126,7 +130,7 @@ private class SummarizedCallableJsonLoads extends SummarizedCallable {
result = API::moduleImport("json").getMember("loads").getAValueReachableFromSource() result = API::moduleImport("json").getMember("loads").getAValueReachableFromSource()
} }
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { override predicate propagatesFlow(string input, string output, boolean preservesValue) {
input = "Argument[0]" and input = "Argument[0]" and
output = "ReturnValue.ListElement" and output = "ReturnValue.ListElement" and
preservesValue = true preservesValue = true

View File

@@ -12,7 +12,7 @@ import experimental.dataflow.testTaintConfig
private import TestSummaries private import TestSummaries
query predicate invalidSpecComponent(SummarizedCallable sc, string s, string c) { query predicate invalidSpecComponent(SummarizedCallable sc, string s, string c) {
(sc.propagatesFlowExt(s, _, _) or sc.propagatesFlowExt(_, s, _)) and (sc.propagatesFlow(s, _, _) or sc.propagatesFlow(_, s, _)) and
Private::External::invalidSpecComponent(s, c) Private::External::invalidSpecComponent(s, c)
} }

View File

@@ -18,6 +18,10 @@ module RecursionGuard {
(TT::callStep(_, _) implies any()) (TT::callStep(_, _) implies any())
} }
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
none()
}
override DataFlow::CallCfgNode getACallSimple() { none() } override DataFlow::CallCfgNode getACallSimple() { none() }
override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this } override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this }
@@ -39,7 +43,7 @@ private class SummarizedCallableIdentity extends SummarizedCallable {
override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this } override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this }
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { override predicate propagatesFlow(string input, string output, boolean preservesValue) {
input = "Argument[0]" and input = "Argument[0]" and
output = "ReturnValue" and output = "ReturnValue" and
preservesValue = true preservesValue = true
@@ -58,7 +62,7 @@ private class SummarizedCallableApplyLambda extends SummarizedCallable {
override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this } override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this }
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { override predicate propagatesFlow(string input, string output, boolean preservesValue) {
input = "Argument[1]" and input = "Argument[1]" and
output = "Argument[0].Parameter[0]" and output = "Argument[0].Parameter[0]" and
preservesValue = true preservesValue = true
@@ -80,7 +84,7 @@ private class SummarizedCallableReversed extends SummarizedCallable {
override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this } override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this }
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { override predicate propagatesFlow(string input, string output, boolean preservesValue) {
input = "Argument[0].ListElement" and input = "Argument[0].ListElement" and
output = "ReturnValue.ListElement" and output = "ReturnValue.ListElement" and
preservesValue = true preservesValue = true
@@ -98,7 +102,7 @@ private class SummarizedCallableMap extends SummarizedCallable {
override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this } override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this }
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { override predicate propagatesFlow(string input, string output, boolean preservesValue) {
input = "Argument[1].ListElement" and input = "Argument[1].ListElement" and
output = "Argument[0].Parameter[0]" and output = "Argument[0].Parameter[0]" and
preservesValue = true preservesValue = true
@@ -120,7 +124,7 @@ private class SummarizedCallableAppend extends SummarizedCallable {
override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this } override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this }
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { override predicate propagatesFlow(string input, string output, boolean preservesValue) {
input = "Argument[0]" and input = "Argument[0]" and
output = "ReturnValue" and output = "ReturnValue" and
preservesValue = false preservesValue = false
@@ -144,7 +148,7 @@ private class SummarizedCallableJsonLoads extends SummarizedCallable {
result = API::moduleImport("json").getMember("loads").getAValueReachableFromSource() result = API::moduleImport("json").getMember("loads").getAValueReachableFromSource()
} }
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { override predicate propagatesFlow(string input, string output, boolean preservesValue) {
input = "Argument[0]" and input = "Argument[0]" and
output = "ReturnValue.ListElement" and output = "ReturnValue.ListElement" and
preservesValue = true preservesValue = true
@@ -163,7 +167,7 @@ private class SummarizedCallableReadSecret extends SummarizedCallable {
override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this } override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this }
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { override predicate propagatesFlow(string input, string output, boolean preservesValue) {
input = "Argument[0].Attribute[secret]" and input = "Argument[0].Attribute[secret]" and
output = "ReturnValue" and output = "ReturnValue" and
preservesValue = true preservesValue = true
@@ -181,7 +185,7 @@ private class SummarizedCallableSetSecret extends SummarizedCallable {
override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this } override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this }
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { override predicate propagatesFlow(string input, string output, boolean preservesValue) {
input = "Argument[1]" and input = "Argument[1]" and
output = "Argument[0].Attribute[secret]" and output = "Argument[0].Attribute[secret]" and
preservesValue = true preservesValue = true

View File

@@ -1,5 +1,5 @@
import python import python
import semmle.python.dataflow.new.internal.AccessPathSyntax as AccessPathSyntax private import semmle.python.frameworks.data.internal.ApiGraphModels as ApiGraphModels
import semmle.python.frameworks.data.ModelsAsData import semmle.python.frameworks.data.ModelsAsData
import semmle.python.dataflow.new.TaintTracking import semmle.python.dataflow.new.TaintTracking
import semmle.python.dataflow.new.DataFlow import semmle.python.dataflow.new.DataFlow
@@ -27,6 +27,6 @@ query predicate isSource(DataFlow::Node node, string kind) {
node = ModelOutput::getASourceNode(kind).asSource() node = ModelOutput::getASourceNode(kind).asSource()
} }
query predicate syntaxErrors(AccessPathSyntax::AccessPath path) { path.hasSyntaxError() } query predicate syntaxErrors(ApiGraphModels::AccessPath path) { path.hasSyntaxError() }
query predicate warning = ModelOutput::getAWarning/0; query predicate warning = ModelOutput::getAWarning/0;

View File

@@ -1,5 +1,4 @@
import python import python
import semmle.python.dataflow.new.internal.AccessPathSyntax as AccessPathSyntax
import semmle.python.frameworks.data.internal.ApiGraphModels as ApiGraphModels import semmle.python.frameworks.data.internal.ApiGraphModels as ApiGraphModels
import semmle.python.frameworks.data.ModelsAsData import semmle.python.frameworks.data.ModelsAsData