Merge commit '737dd9d4c1' into jb1/lib/dataflowstack

This commit is contained in:
Josh Brown
2024-02-08 08:18:04 -08:00
2596 changed files with 351218 additions and 96833 deletions

View File

@@ -3,4 +3,4 @@ compatibility: backwards
py_exprs.rel: run py_exprs.qlo
py_stmts.rel: run py_stmts.qlo
py_patterns.rel: delete
py_patterns_lists.rel: delete
py_pattern_lists.rel: delete

View File

@@ -14,6 +14,8 @@ private module Input implements InputSig<PythonDataFlow> {
private import Private
private import Public
predicate postWithInFlowExclude(Node n) { n instanceof FlowSummaryNode }
predicate argHasPostUpdateExclude(ArgumentNode n) {
// TODO: Implement post-updates for *args, see tests added in https://github.com/github/codeql/pull/14936
exists(ArgumentPosition apos | n.argumentOf(_, apos) and apos.isStarArgs(_))
@@ -44,6 +46,13 @@ private module Input implements InputSig<PythonDataFlow> {
)
}
predicate uniqueEnclosingCallableExclude(Node n) {
// We only have a selection of valid callables.
// For instance, we do not have classes as `DataFlowCallable`s.
not n.(SynthCaptureNode).getSynthesizedCaptureNode().getEnclosingCallable() instanceof Function and
not n.(SynthCaptureNode).getSynthesizedCaptureNode().getEnclosingCallable() instanceof Module
}
predicate uniqueCallEnclosingCallableExclude(DataFlowCall call) {
not exists(call.getLocation().getFile().getRelativePath())
}
@@ -53,7 +62,7 @@ private module Input implements InputSig<PythonDataFlow> {
}
predicate multipleArgumentCallExclude(ArgumentNode arg, DataFlowCall call) {
// since we can have multiple DataFlowCall for a CallNode (for example if can
// since we can have multiple DataFlowCall for a CallNode (for example if it can
// resolve to multiple functions), but we only make _one_ ArgumentNode for each
// argument in the CallNode, we end up violating this consistency check in those
// cases. (see `getCallArg` in DataFlowDispatch.qll)

View File

@@ -1,3 +1,30 @@
## 0.11.7
### Minor Analysis Improvements
* Deleted many deprecated predicates and classes with uppercase `LDAP`, `HTTP`, `URL`, `CGI` etc. in their names. Use the PascalCased versions instead.
* Deleted the deprecated `localSourceStoreStep` predicate, use `flowsToStoreStep` instead.
* Deleted the deprecated `iteration_defined_variable` predicate from the `SSA` library.
* Deleted various deprecated predicates from the points-to libraries.
* Deleted the deprecated `semmle/python/security/OverlyLargeRangeQuery.qll`, `semmle/python/security/regexp/ExponentialBackTracking.qll`, `semmle/python/security/regexp/NfaUtils.qll`, and `semmle/python/security/regexp/NfaUtils.qll` files.
* The diagnostic query `py/diagnostics/successfully-extracted-files`, and therefore the Code Scanning UI measure of scanned Python files, now considers any Python file seen during extraction, even one with some errors, to be extracted / scanned.
## 0.11.6
### Major Analysis Improvements
* Added support for global data-flow through captured variables.
### Minor Analysis Improvements
* Captured subclass relationships ahead-of-time for most popular PyPI packages so we are able to resolve subclass relationships even without having the packages installed. For example we have captured that `flask_restful.Resource` is a subclass of `flask.views.MethodView`, so our Flask modeling will still consider a function named `post` on a `class Foo(flask_restful.Resource):` as a HTTP request handler.
* Python now makes use of the shared type tracking library, exposed as `semmle.python.dataflow.new.TypeTracking`. The existing type tracking library, `semmle.python.dataflow.new.TypeTracker`, has consequently been deprecated.
### Bug Fixes
- We would previously confuse all captured variables into a single scope entry node. Now they each get their own node so they can be tracked properly.
- The dataflow graph no longer contains SSA variables. Instead, flow is directed via the corresponding controlflow nodes. This should make the graph and the flow simpler to understand. Minor improvements in flow computation has been observed, but in general negligible changes to alerts are expected.
## 0.11.5
No user-facing changes.

View File

@@ -0,0 +1,15 @@
## 0.11.6
### Major Analysis Improvements
* Added support for global data-flow through captured variables.
### Minor Analysis Improvements
* Captured subclass relationships ahead-of-time for most popular PyPI packages so we are able to resolve subclass relationships even without having the packages installed. For example we have captured that `flask_restful.Resource` is a subclass of `flask.views.MethodView`, so our Flask modeling will still consider a function named `post` on a `class Foo(flask_restful.Resource):` as a HTTP request handler.
* Python now makes use of the shared type tracking library, exposed as `semmle.python.dataflow.new.TypeTracking`. The existing type tracking library, `semmle.python.dataflow.new.TypeTracker`, has consequently been deprecated.
### Bug Fixes
- We would previously confuse all captured variables into a single scope entry node. Now they each get their own node so they can be tracked properly.
- The dataflow graph no longer contains SSA variables. Instead, flow is directed via the corresponding controlflow nodes. This should make the graph and the flow simpler to understand. Minor improvements in flow computation has been observed, but in general negligible changes to alerts are expected.

View File

@@ -0,0 +1,10 @@
## 0.11.7
### Minor Analysis Improvements
* Deleted many deprecated predicates and classes with uppercase `LDAP`, `HTTP`, `URL`, `CGI` etc. in their names. Use the PascalCased versions instead.
* Deleted the deprecated `localSourceStoreStep` predicate, use `flowsToStoreStep` instead.
* Deleted the deprecated `iteration_defined_variable` predicate from the `SSA` library.
* Deleted various deprecated predicates from the points-to libraries.
* Deleted the deprecated `semmle/python/security/OverlyLargeRangeQuery.qll`, `semmle/python/security/regexp/ExponentialBackTracking.qll`, `semmle/python/security/regexp/NfaUtils.qll`, and `semmle/python/security/regexp/NfaUtils.qll` files.
* The diagnostic query `py/diagnostics/successfully-extracted-files`, and therefore the Code Scanning UI measure of scanned Python files, now considers any Python file seen during extraction, even one with some errors, to be extracted / scanned.

View File

@@ -1,2 +1,2 @@
---
lastReleaseVersion: 0.11.5
lastReleaseVersion: 0.11.7

View File

@@ -4,9 +4,9 @@
*/
import python
private import semmle.python.dataflow.new.internal.TypeTrackerSpecific
private import semmle.python.dataflow.new.internal.TypeTrackingImpl
private import semmle.python.ApiGraphs
class CallCfgNodeWithTarget extends DataFlow::Node instanceof DataFlow::CallCfgNode {
DataFlow::Node getTarget() { returnStep(result, this) }
DataFlow::Node getTarget() { TypeTrackingInput::returnStep(result, this) }
}

View File

@@ -26,7 +26,6 @@ import semmle.python.types.FunctionObject
import semmle.python.types.ModuleObject
import semmle.python.types.Version
import semmle.python.types.Descriptors
import semmle.python.protocols
import semmle.python.SSA
import semmle.python.SelfAttribute
import semmle.python.types.Properties

View File

@@ -1,5 +1,5 @@
name: codeql/python-all
version: 0.11.5
version: 0.11.7
groups: python
dbscheme: semmlecode.python.dbscheme
extractor: python

View File

@@ -685,9 +685,6 @@ module Ldap {
}
}
/** DEPRECATED: Alias for Ldap */
deprecated module LDAP = Ldap;
/**
* A data-flow node that escapes meta-characters, which could be used to prevent
* injection attacks.
@@ -1157,9 +1154,6 @@ module Http {
// remote-flow-sources in general.
}
/** DEPRECATED: Alias for Http */
deprecated module HTTP = Http;
/**
* Provides models for cryptographic things.
*

View File

@@ -126,7 +126,10 @@ class ControlFlowNode extends @py_flow_node {
cached
string toString() {
Stages::AST::ref() and
exists(Scope s | s.getEntryNode() = this | result = "Entry node for " + s.toString())
// Since modules can have ambigous names, entry nodes can too, if we do not collate them.
exists(Scope s | s.getEntryNode() = this |
result = "Entry node for " + concat( | | s.toString(), ",")
)
or
exists(Scope s | s.getANormalExit() = this | result = "Exit node for " + s.toString())
or

View File

@@ -177,7 +177,7 @@ private predicate legalDottedName(string name) {
}
bindingset[name]
private predicate legalShortName(string name) { name.regexpMatch("(\\p{L}|_)(\\p{L}|\\d|_)*") }
predicate legalShortName(string name) { name.regexpMatch("(\\p{L}|_)(\\p{L}|\\d|_)*") }
private string moduleNameFromBase(Container file) {
// We used to also require `isPotentialPackage(f)` to hold in this case,

View File

@@ -13,61 +13,14 @@ private module Summaries {
private import semmle.python.Frameworks
}
class SummaryComponent = Impl::Public::SummaryComponent;
deprecated class SummaryComponent = Impl::Private::SummaryComponent;
/** Provides predicates for constructing summary components. */
module SummaryComponent {
private import Impl::Public::SummaryComponent as SC
deprecated module SummaryComponent = Impl::Private::SummaryComponent;
predicate parameter = SC::parameter/1;
deprecated class SummaryComponentStack = Impl::Private::SummaryComponentStack;
predicate argument = SC::argument/1;
predicate content = SC::content/1;
/** Gets a summary component that represents a list element. */
SummaryComponent listElement() { result = content(any(ListElementContent c)) }
/** Gets a summary component that represents a set element. */
SummaryComponent setElement() { result = content(any(SetElementContent c)) }
/** Gets a summary component that represents a tuple element. */
SummaryComponent tupleElement(int index) {
exists(TupleElementContent c | c.getIndex() = index and result = content(c))
}
/** Gets a summary component that represents a dictionary element. */
SummaryComponent dictionaryElement(string key) {
exists(DictionaryElementContent c | c.getKey() = key and result = content(c))
}
/** Gets a summary component that represents a dictionary element at any key. */
SummaryComponent dictionaryElementAny() { result = content(any(DictionaryElementAnyContent c)) }
/** Gets a summary component that represents an attribute element. */
SummaryComponent attribute(string attr) {
exists(AttributeContent c | c.getAttribute() = attr and result = content(c))
}
/** Gets a summary component that represents the return value of a call. */
SummaryComponent return() { result = SC::return(any(ReturnKind rk)) }
}
class SummaryComponentStack = Impl::Public::SummaryComponentStack;
/** Provides predicates for constructing stacks of summary components. */
module SummaryComponentStack {
private import Impl::Public::SummaryComponentStack as SCS
predicate singleton = SCS::singleton/1;
predicate push = SCS::push/2;
predicate argument = SCS::argument/1;
/** Gets a singleton stack representing the return value of a call. */
SummaryComponentStack return() { result = singleton(SummaryComponent::return()) }
}
deprecated module SummaryComponentStack = Impl::Private::SummaryComponentStack;
/** A callable with a flow summary, identified by a unique string. */
abstract class SummarizedCallable extends LibraryCallable, Impl::Public::SummarizedCallable {
@@ -75,21 +28,14 @@ abstract class SummarizedCallable extends LibraryCallable, Impl::Public::Summari
SummarizedCallable() { any() }
/**
* Same as
*
* ```ql
* propagatesFlow(
* SummaryComponentStack input, SummaryComponentStack output, boolean preservesValue
* )
* ```
*
* but uses an external (string) representation of the input and output stacks.
* DEPRECATED: Use `propagatesFlow` instead.
*/
pragma[nomagic]
predicate propagatesFlowExt(string input, string output, boolean preservesValue) { none() }
deprecated predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
this.propagatesFlow(input, output, preservesValue)
}
}
class RequiredSummaryComponentStack = Impl::Public::RequiredSummaryComponentStack;
deprecated class RequiredSummaryComponentStack = Impl::Private::RequiredSummaryComponentStack;
private class SummarizedCallableFromModel extends SummarizedCallable {
string type;
@@ -109,7 +55,7 @@ private class SummarizedCallableFromModel extends SummarizedCallable {
)
}
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
exists(string kind | ModelOutput::relevantSummaryModel(type, path, input, output, kind) |
kind = "value" and
preservesValue = true

View File

@@ -1,4 +1,6 @@
/**
* DEPRECATED: Use `semmle.python.dataflow.new.TypeTracking` instead.
*
* This file acts as a wrapper for `internal.TypeTracker`, exposing some of the functionality with
* names that are more appropriate for Python.
*/
@@ -8,12 +10,14 @@ private import internal.TypeTracker as Internal
private import internal.TypeTrackerSpecific as InternalSpecific
/** A string that may appear as the name of an attribute or access path. */
class AttributeName = InternalSpecific::TypeTrackerContent;
deprecated class AttributeName = InternalSpecific::TypeTrackerContent;
/** An attribute name, or the empty string (representing no attribute). */
class OptionalAttributeName = InternalSpecific::OptionalTypeTrackerContent;
deprecated class OptionalAttributeName = InternalSpecific::OptionalTypeTrackerContent;
/**
* DEPRECATED: Use `semmle.python.dataflow.new.TypeTracking` instead.
*
* The summary of the steps needed to track a value to a given dataflow node.
*
* This can be used to track objects that implement a certain API in order to
@@ -40,7 +44,7 @@ class OptionalAttributeName = InternalSpecific::OptionalTypeTrackerContent;
* `t = t2.step(myType(t2), result)`. If you additionally want to track individual
* intra-procedural steps, use `t = t2.smallstep(myCallback(t2), result)`.
*/
class TypeTracker extends Internal::TypeTracker {
deprecated class TypeTracker extends Internal::TypeTracker {
/**
* Holds if this is the starting point of type tracking, and the value starts in the attribute named `attrName`.
* The type tracking only ends after the attribute has been loaded.
@@ -55,12 +59,12 @@ class TypeTracker extends Internal::TypeTracker {
string getAttr() { result = this.getContent() }
}
module TypeTracker = Internal::TypeTracker;
deprecated module TypeTracker = Internal::TypeTracker;
class StepSummary = Internal::StepSummary;
deprecated class StepSummary = Internal::StepSummary;
module StepSummary = Internal::StepSummary;
deprecated module StepSummary = Internal::StepSummary;
class TypeBackTracker = Internal::TypeBackTracker;
deprecated class TypeBackTracker = Internal::TypeBackTracker;
module TypeBackTracker = Internal::TypeBackTracker;
deprecated module TypeBackTracker = Internal::TypeBackTracker;

View File

@@ -0,0 +1,56 @@
/**
* Provides classes and predicates for simple data-flow reachability suitable
* for tracking types.
*/
private import internal.TypeTrackingImpl as Impl
import Impl::Shared::TypeTracking<Impl::TypeTrackingInput>
/** A string that may appear as the name of an attribute or access path. */
class AttributeName = Impl::TypeTrackingInput::Content;
/**
* A summary of the steps needed to track a value to a given dataflow node.
*
* This can be used to track objects that implement a certain API in order to
* recognize calls to that API. Note that type-tracking does not by itself provide a
* source/sink relation, that is, it may determine that a node has a given type,
* but it won't determine where that type came from.
*
* It is recommended that all uses of this type are written in the following form,
* for tracking some type `myType`:
* ```ql
* Node myType(TypeTracker tt) {
* tt.start() and
* result = < source of myType >
* or
* exists(TypeTracker tt2 |
* tt = tt2.step(myType(tt2), result)
* )
* }
*
* Node myType() { myType(TypeTracker::end()).flowsTo(result) }
* ```
*
* If you want to track individual intra-procedural steps, use `tt2.smallstep`
* instead of `tt2.step`.
*/
class TypeTracker extends Impl::TypeTracker {
/**
* Holds if this is the starting point of type tracking, and the value starts in the attribute named `attrName`.
* The type tracking only ends after the attribute has been loaded.
*/
predicate startInAttr(string attrName) { this.startInContent(attrName) }
/**
* INTERNAL. DO NOT USE.
*
* Gets the attribute associated with this type tracker.
*/
string getAttr() {
result = this.getContent().asSome()
or
this.getContent().isNone() and
result = ""
}
}

View File

@@ -1,182 +0,0 @@
/**
* Module for parsing access paths from MaD models, both the identifying access path used
* by dynamic languages, and the input/output specifications for summary steps.
*
* This file is used by the shared data flow library and by the JavaScript libraries
* (which does not use the shared data flow libraries).
*/
/**
* Convenience-predicate for extracting two capture groups at once.
*/
bindingset[input, regexp]
private predicate regexpCaptureTwo(string input, string regexp, string capture1, string capture2) {
capture1 = input.regexpCapture(regexp, 1) and
capture2 = input.regexpCapture(regexp, 2)
}
/** Companion module to the `AccessPath` class. */
module AccessPath {
/** A string that should be parsed as an access path. */
abstract class Range extends string {
bindingset[this]
Range() { any() }
}
/**
* Parses an integer constant `n` or interval `n1..n2` (inclusive) and gets the value
* of the constant or any value contained in the interval.
*/
bindingset[arg]
int parseInt(string arg) {
result = arg.toInt()
or
// Match "n1..n2"
exists(string lo, string hi |
regexpCaptureTwo(arg, "(-?\\d+)\\.\\.(-?\\d+)", lo, hi) and
result = [lo.toInt() .. hi.toInt()]
)
}
/**
* Parses a lower-bounded interval `n..` and gets the lower bound.
*/
bindingset[arg]
int parseLowerBound(string arg) { result = arg.regexpCapture("(-?\\d+)\\.\\.", 1).toInt() }
/**
* Parses an integer constant or interval (bounded or unbounded) that explicitly
* references the arity, such as `N-1` or `N-3..N-1`.
*
* Note that expressions of form `N-x` will never resolve to a negative index,
* even if `N` is zero (it will have no result in that case).
*/
bindingset[arg, arity]
private int parseIntWithExplicitArity(string arg, int arity) {
result >= 0 and // do not allow N-1 to resolve to a negative index
exists(string lo |
// N-x
lo = arg.regexpCapture("N-(\\d+)", 1) and
result = arity - lo.toInt()
or
// N-x..
lo = arg.regexpCapture("N-(\\d+)\\.\\.", 1) and
result = [arity - lo.toInt(), arity - 1]
)
or
exists(string lo, string hi |
// x..N-y
regexpCaptureTwo(arg, "(-?\\d+)\\.\\.N-(\\d+)", lo, hi) and
result = [lo.toInt() .. arity - hi.toInt()]
or
// N-x..N-y
regexpCaptureTwo(arg, "N-(\\d+)\\.\\.N-(\\d+)", lo, hi) and
result = [arity - lo.toInt() .. arity - hi.toInt()] and
result >= 0
or
// N-x..y
regexpCaptureTwo(arg, "N-(\\d+)\\.\\.(\\d+)", lo, hi) and
result = [arity - lo.toInt() .. hi.toInt()] and
result >= 0
)
}
/**
* Parses an integer constant or interval (bounded or unbounded) and gets any
* of the integers contained within (of which there may be infinitely many).
*
* Has no result for arguments involving an explicit arity, such as `N-1`.
*/
bindingset[arg, result]
int parseIntUnbounded(string arg) {
result = parseInt(arg)
or
result >= parseLowerBound(arg)
}
/**
* Parses an integer constant or interval (bounded or unbounded) that
* may reference the arity of a call, such as `N-1` or `N-3..N-1`.
*
* Note that expressions of form `N-x` will never resolve to a negative index,
* even if `N` is zero (it will have no result in that case).
*/
bindingset[arg, arity]
int parseIntWithArity(string arg, int arity) {
result = parseInt(arg)
or
result in [parseLowerBound(arg) .. arity - 1]
or
result = parseIntWithExplicitArity(arg, arity)
}
}
/** Gets the `n`th token on the access path as a string. */
private string getRawToken(AccessPath path, int n) {
// Avoid splitting by '.' since tokens may contain dots, e.g. `Field[foo.Bar.x]`.
// Instead use regexpFind to match valid tokens, and supplement with a final length
// check (in `AccessPath.hasSyntaxError`) to ensure all characters were included in a token.
result = path.regexpFind("\\w+(?:\\[[^\\]]*\\])?(?=\\.|$)", n, _)
}
/**
* A string that occurs as an access path (either identifying or input/output spec)
* which might be relevant for this database.
*/
class AccessPath extends string instanceof AccessPath::Range {
/** Holds if this string is not a syntactically valid access path. */
predicate hasSyntaxError() {
// If the lengths match, all characters must haven been included in a token
// or seen by the `.` lookahead pattern.
this != "" and
not this.length() = sum(int n | | getRawToken(this, n).length() + 1) - 1
}
/** Gets the `n`th token on the access path (if there are no syntax errors). */
AccessPathToken getToken(int n) {
result = getRawToken(this, n) and
not this.hasSyntaxError()
}
/** Gets the number of tokens on the path (if there are no syntax errors). */
int getNumToken() {
result = count(int n | exists(getRawToken(this, n))) and
not this.hasSyntaxError()
}
}
/**
* An access part token such as `Argument[1]` or `ReturnValue`, appearing in one or more access paths.
*/
class AccessPathToken extends string {
AccessPathToken() { this = getRawToken(_, _) }
private string getPart(int part) {
result = this.regexpCapture("([^\\[]+)(?:\\[([^\\]]*)\\])?", part)
}
/** Gets the name of the token, such as `Member` from `Member[x]` */
string getName() { result = this.getPart(1) }
/**
* Gets the argument list, such as `1,2` from `Member[1,2]`,
* or has no result if there are no arguments.
*/
string getArgumentList() { result = this.getPart(2) }
/** Gets the `n`th argument to this token, such as `x` or `y` from `Member[x,y]`. */
string getArgument(int n) { result = this.getArgumentList().splitAt(",", n).trim() }
/** Gets the `n`th argument to this `name` token, such as `x` or `y` from `Member[x,y]`. */
pragma[nomagic]
string getArgument(string name, int n) { name = this.getName() and result = this.getArgument(n) }
/** Gets an argument to this token, such as `x` or `y` from `Member[x,y]`. */
string getAnArgument() { result = this.getArgument(_) }
/** Gets an argument to this `name` token, such as `x` or `y` from `Member[x,y]`. */
string getAnArgument(string name) { result = this.getArgument(name, _) }
/** Gets the number of arguments to this token, such as 2 for `Member[x,y]` or zero for `ReturnValue`. */
int getNumArgument() { result = count(int n | exists(this.getArgument(n))) }
}

View File

@@ -36,26 +36,32 @@ private import python
private import DataFlowPublic
private import DataFlowPrivate
private import FlowSummaryImpl as FlowSummaryImpl
private import FlowSummaryImplSpecific as FlowSummaryImplSpecific
private import semmle.python.internal.CachedStages
private import semmle.python.dataflow.new.internal.TypeTracker::CallGraphConstruction as CallGraphConstruction
private import semmle.python.dataflow.new.internal.TypeTrackingImpl::CallGraphConstruction as CallGraphConstruction
newtype TParameterPosition =
/** Used for `self` in methods, and `cls` in classmethods. */
TSelfParameterPosition() or
/**
* This is used for tracking flow through captured variables, and
* we use separate parameter/argument positions in order to distinguish
* "lambda self" from "normal self", as lambdas may also access outer `self`
* variables (through variable capture).
*/
TLambdaSelfParameterPosition() or
TPositionalParameterPosition(int index) {
index = any(Parameter p).getPosition()
or
// since synthetic parameters are made for a synthetic summary callable, based on
// what Argument positions they have flow for, we need to make sure we have such
// parameter positions available.
FlowSummaryImplSpecific::ParsePositions::isParsedPositionalArgumentPosition(_, index)
FlowSummaryImpl::ParsePositions::isParsedPositionalArgumentPosition(_, index)
} or
TKeywordParameterPosition(string name) {
name = any(Parameter p).getName()
or
// see comment for TPositionalParameterPosition
FlowSummaryImplSpecific::ParsePositions::isParsedKeywordArgumentPosition(_, name)
FlowSummaryImpl::ParsePositions::isParsedKeywordArgumentPosition(_, name)
} or
TStarArgsParameterPosition(int index) {
// since `.getPosition` does not work for `*args`, we need *args parameter positions
@@ -79,6 +85,9 @@ class ParameterPosition extends TParameterPosition {
/** Holds if this position represents a `self`/`cls` parameter. */
predicate isSelf() { this = TSelfParameterPosition() }
/** Holds if this position represents a reference to a lambda itself. Only used for tracking flow through captured variables. */
predicate isLambdaSelf() { this = TLambdaSelfParameterPosition() }
/** Holds if this position represents a positional parameter at (0-based) `index`. */
predicate isPositional(int index) { this = TPositionalParameterPosition(index) }
@@ -110,6 +119,8 @@ class ParameterPosition extends TParameterPosition {
string toString() {
this.isSelf() and result = "self"
or
this.isLambdaSelf() and result = "lambda self"
or
exists(int index | this.isPositional(index) and result = "position " + index)
or
exists(string name | this.isKeyword(name) and result = "keyword " + name)
@@ -130,19 +141,26 @@ class ParameterPosition extends TParameterPosition {
newtype TArgumentPosition =
/** Used for `self` in methods, and `cls` in classmethods. */
TSelfArgumentPosition() or
/**
* This is used for tracking flow through captured variables, and
* we use separate parameter/argument positions in order to distinguish
* "lambda self" from "normal self", as lambdas may also access outer `self`
* variables (through variable capture).
*/
TLambdaSelfArgumentPosition() or
TPositionalArgumentPosition(int index) {
exists(any(CallNode c).getArg(index))
or
// since synthetic calls within a summarized callable could use a unique argument
// position, we need to ensure we make these available (these are specified as
// parameters in the flow-summary spec)
FlowSummaryImplSpecific::ParsePositions::isParsedPositionalParameterPosition(_, index)
FlowSummaryImpl::ParsePositions::isParsedPositionalParameterPosition(_, index)
} or
TKeywordArgumentPosition(string name) {
exists(any(CallNode c).getArgByName(name))
or
// see comment for TPositionalArgumentPosition
FlowSummaryImplSpecific::ParsePositions::isParsedKeywordParameterPosition(_, name)
FlowSummaryImpl::ParsePositions::isParsedKeywordParameterPosition(_, name)
} or
TStarArgsArgumentPosition(int index) {
exists(Call c | c.getPositionalArg(index) instanceof Starred)
@@ -154,6 +172,9 @@ class ArgumentPosition extends TArgumentPosition {
/** Holds if this position represents a `self`/`cls` argument. */
predicate isSelf() { this = TSelfArgumentPosition() }
/** Holds if this position represents a lambda `self` argument. Only used for tracking flow through captured variables. */
predicate isLambdaSelf() { this = TLambdaSelfArgumentPosition() }
/** Holds if this position represents a positional argument at (0-based) `index`. */
predicate isPositional(int index) { this = TPositionalArgumentPosition(index) }
@@ -170,6 +191,8 @@ class ArgumentPosition extends TArgumentPosition {
string toString() {
this.isSelf() and result = "self"
or
this.isLambdaSelf() and result = "lambda self"
or
exists(int pos | this.isPositional(pos) and result = "position " + pos)
or
exists(string name | this.isKeyword(name) and result = "keyword " + name)
@@ -184,6 +207,8 @@ class ArgumentPosition extends TArgumentPosition {
predicate parameterMatch(ParameterPosition ppos, ArgumentPosition apos) {
ppos.isSelf() and apos.isSelf()
or
ppos.isLambdaSelf() and apos.isLambdaSelf()
or
exists(int index | ppos.isPositional(index) and apos.isPositional(index))
or
exists(string name | ppos.isKeyword(name) and apos.isKeyword(name))
@@ -1514,6 +1539,37 @@ abstract class ParameterNodeImpl extends Node {
}
}
/**
* A synthetic parameter representing the values of the variables captured
* by the callable being called. This parameter represents a single object
* where all the values are stored as attributes.
* This is also known as the environment part of a closure.
*
* This is used for tracking flow through captured variables.
*/
class SynthCapturedVariablesParameterNode extends ParameterNodeImpl,
TSynthCapturedVariablesParameterNode
{
private Function callable;
SynthCapturedVariablesParameterNode() { this = TSynthCapturedVariablesParameterNode(callable) }
final Function getCallable() { result = callable }
override Parameter getParameter() { none() }
override predicate isParameterOf(DataFlowCallable c, ParameterPosition pos) {
c = TFunction(callable) and
pos.isLambdaSelf()
}
override Scope getScope() { result = callable }
override Location getLocation() { result = callable.getLocation() }
override string toString() { result = "lambda self in " + callable }
}
/** A parameter for a library callable with a flow summary. */
class SummaryParameterNode extends ParameterNodeImpl, FlowSummaryNode {
SummaryParameterNode() {
@@ -1566,12 +1622,15 @@ private class SummaryReturnNode extends FlowSummaryNode, ReturnNode {
}
private class SummaryArgumentNode extends FlowSummaryNode, ArgumentNode {
private SummaryCall call_;
private ArgumentPosition pos_;
SummaryArgumentNode() {
FlowSummaryImpl::Private::summaryArgumentNode(_, this.getSummaryNode(), _)
FlowSummaryImpl::Private::summaryArgumentNode(call_.getReceiver(), this.getSummaryNode(), pos_)
}
override predicate argumentOf(DataFlowCall call, ArgumentPosition pos) {
FlowSummaryImpl::Private::summaryArgumentNode(call, this.getSummaryNode(), pos)
call = call_ and pos = pos_
}
}
@@ -1585,6 +1644,39 @@ private class SummaryPostUpdateNode extends FlowSummaryNode, PostUpdateNodeImpl
override Node getPreUpdateNode() { result = pre }
}
/**
* A synthetic argument representing the values of the variables captured
* by the callable being called. This argument represents a single object
* where all the values are stored as attributes.
* This is also known as the environment part of a closure.
*
* This is used for tracking flow through captured variables.
*
* TODO:
* We might want a synthetic node here, but currently that incurs problems
* with non-monotonic recursion, because of the use of `resolveCall` in the
* char pred. This may be solvable by using
* `CallGraphConstruction::Make` in stead of
* `CallGraphConstruction::Simple::Make` appropriately.
*/
class CapturedVariablesArgumentNode extends CfgNode, ArgumentNode {
CallNode callNode;
CapturedVariablesArgumentNode() {
node = callNode.getFunction() and
exists(Function target | resolveCall(callNode, target, _) |
target = any(VariableCapture::CapturedVariable v).getACapturingScope()
)
}
override string toString() { result = "Capturing closure argument" }
override predicate argumentOf(DataFlowCall call, ArgumentPosition pos) {
callNode = call.getNode() and
pos.isLambdaSelf()
}
}
/** Gets a viable run-time target for the call `call`. */
DataFlowCallable viableCallable(DataFlowCall call) {
call instanceof ExtractedDataFlowCall and
@@ -1669,10 +1761,16 @@ private module OutNodes {
}
private class SummaryOutNode extends FlowSummaryNode, OutNode {
SummaryOutNode() { FlowSummaryImpl::Private::summaryOutNode(_, this.getSummaryNode(), _) }
private SummaryCall call;
private ReturnKind kind_;
SummaryOutNode() {
FlowSummaryImpl::Private::summaryOutNode(call.getReceiver(), this.getSummaryNode(), kind_)
}
override DataFlowCall getCall(ReturnKind kind) {
FlowSummaryImpl::Private::summaryOutNode(result, this.getSummaryNode(), kind)
result = call and
kind = kind_
}
}
}

View File

@@ -10,10 +10,12 @@ private import DataFlowImplSpecific::Private
import DataFlowImplSpecific::Public
private import DataFlowImpl
import DataFlowImplCommonPublic
import FlowStateString
deprecated import FlowStateString
private import codeql.util.Unit
/**
* DEPRECATED: Use `Global` and `GlobalWithState` instead.
*
* A configuration of interprocedural data flow analysis. This defines
* sources, sinks, and any other configurable aspect of the analysis. Each
* use of the global data flow library must define its own unique extension
@@ -48,7 +50,7 @@ private import codeql.util.Unit
* should instead depend on a `DataFlow2::Configuration`, a
* `DataFlow3::Configuration`, or a `DataFlow4::Configuration`.
*/
abstract class Configuration extends string {
abstract deprecated class Configuration extends string {
bindingset[this]
Configuration() { any() }
@@ -189,7 +191,7 @@ abstract class Configuration extends string {
* Good performance cannot be guaranteed in the presence of such recursion, so
* it should be replaced by using more than one copy of the data flow library.
*/
abstract private class ConfigurationRecursionPrevention extends Configuration {
abstract deprecated private class ConfigurationRecursionPrevention extends Configuration {
bindingset[this]
ConfigurationRecursionPrevention() { any() }
@@ -210,7 +212,7 @@ abstract private class ConfigurationRecursionPrevention extends Configuration {
}
}
private FlowState relevantState(Configuration config) {
deprecated private FlowState relevantState(Configuration config) {
config.isSource(_, result) or
config.isSink(_, result) or
config.isBarrier(_, result) or
@@ -219,17 +221,17 @@ private FlowState relevantState(Configuration config) {
}
private newtype TConfigState =
TMkConfigState(Configuration config, FlowState state) {
deprecated TMkConfigState(Configuration config, FlowState state) {
state = relevantState(config) or state instanceof FlowStateEmpty
}
private Configuration getConfig(TConfigState state) { state = TMkConfigState(result, _) }
deprecated private Configuration getConfig(TConfigState state) { state = TMkConfigState(result, _) }
private FlowState getState(TConfigState state) { state = TMkConfigState(_, result) }
deprecated private FlowState getState(TConfigState state) { state = TMkConfigState(_, result) }
private predicate singleConfiguration() { 1 = strictcount(Configuration c) }
deprecated private predicate singleConfiguration() { 1 = strictcount(Configuration c) }
private module Config implements FullStateConfigSig {
deprecated private module Config implements FullStateConfigSig {
class FlowState = TConfigState;
predicate isSource(Node source, FlowState state) {
@@ -296,13 +298,13 @@ private module Config implements FullStateConfigSig {
predicate includeHiddenNodes() { any(Configuration config).includeHiddenNodes() }
}
private import Impl<Config> as I
deprecated private import Impl<Config> as I
/**
* A `Node` augmented with a call context (except for sinks), an access path, and a configuration.
* Only those `PathNode`s that are reachable from a source, and which can reach a sink, are generated.
*/
class PathNode instanceof I::PathNode {
deprecated class PathNode instanceof I::PathNode {
/** Gets a textual representation of this element. */
final string toString() { result = super.toString() }
@@ -329,10 +331,10 @@ class PathNode instanceof I::PathNode {
final Node getNode() { result = super.getNode() }
/** Gets the `FlowState` of this node. */
final FlowState getState() { result = getState(super.getState()) }
deprecated final FlowState getState() { result = getState(super.getState()) }
/** Gets the associated configuration. */
final Configuration getConfiguration() { result = getConfig(super.getState()) }
deprecated final Configuration getConfiguration() { result = getConfig(super.getState()) }
/** Gets a successor of this node, if any. */
final PathNode getASuccessor() { result = super.getASuccessor() }
@@ -347,9 +349,9 @@ class PathNode instanceof I::PathNode {
final predicate isSinkGroup(string group) { super.isSinkGroup(group) }
}
module PathGraph = I::PathGraph;
deprecated module PathGraph = I::PathGraph;
private predicate hasFlow(Node source, Node sink, Configuration config) {
deprecated private predicate hasFlow(Node source, Node sink, Configuration config) {
exists(PathNode source0, PathNode sink0 |
hasFlowPath(source0, sink0, config) and
source0.getNode() = source and
@@ -357,10 +359,10 @@ private predicate hasFlow(Node source, Node sink, Configuration config) {
)
}
private predicate hasFlowPath(PathNode source, PathNode sink, Configuration config) {
deprecated private predicate hasFlowPath(PathNode source, PathNode sink, Configuration config) {
I::flowPath(source, sink) and source.getConfiguration() = config
}
private predicate hasFlowTo(Node sink, Configuration config) { hasFlow(_, sink, config) }
deprecated private predicate hasFlowTo(Node sink, Configuration config) { hasFlow(_, sink, config) }
predicate flowsTo = hasFlow/3;
deprecated predicate flowsTo = hasFlow/3;

View File

@@ -10,10 +10,12 @@ private import DataFlowImplSpecific::Private
import DataFlowImplSpecific::Public
private import DataFlowImpl
import DataFlowImplCommonPublic
import FlowStateString
deprecated import FlowStateString
private import codeql.util.Unit
/**
* DEPRECATED: Use `Global` and `GlobalWithState` instead.
*
* A configuration of interprocedural data flow analysis. This defines
* sources, sinks, and any other configurable aspect of the analysis. Each
* use of the global data flow library must define its own unique extension
@@ -48,7 +50,7 @@ private import codeql.util.Unit
* should instead depend on a `DataFlow2::Configuration`, a
* `DataFlow3::Configuration`, or a `DataFlow4::Configuration`.
*/
abstract class Configuration extends string {
abstract deprecated class Configuration extends string {
bindingset[this]
Configuration() { any() }
@@ -189,7 +191,7 @@ abstract class Configuration extends string {
* Good performance cannot be guaranteed in the presence of such recursion, so
* it should be replaced by using more than one copy of the data flow library.
*/
abstract private class ConfigurationRecursionPrevention extends Configuration {
abstract deprecated private class ConfigurationRecursionPrevention extends Configuration {
bindingset[this]
ConfigurationRecursionPrevention() { any() }
@@ -210,7 +212,7 @@ abstract private class ConfigurationRecursionPrevention extends Configuration {
}
}
private FlowState relevantState(Configuration config) {
deprecated private FlowState relevantState(Configuration config) {
config.isSource(_, result) or
config.isSink(_, result) or
config.isBarrier(_, result) or
@@ -219,17 +221,17 @@ private FlowState relevantState(Configuration config) {
}
private newtype TConfigState =
TMkConfigState(Configuration config, FlowState state) {
deprecated TMkConfigState(Configuration config, FlowState state) {
state = relevantState(config) or state instanceof FlowStateEmpty
}
private Configuration getConfig(TConfigState state) { state = TMkConfigState(result, _) }
deprecated private Configuration getConfig(TConfigState state) { state = TMkConfigState(result, _) }
private FlowState getState(TConfigState state) { state = TMkConfigState(_, result) }
deprecated private FlowState getState(TConfigState state) { state = TMkConfigState(_, result) }
private predicate singleConfiguration() { 1 = strictcount(Configuration c) }
deprecated private predicate singleConfiguration() { 1 = strictcount(Configuration c) }
private module Config implements FullStateConfigSig {
deprecated private module Config implements FullStateConfigSig {
class FlowState = TConfigState;
predicate isSource(Node source, FlowState state) {
@@ -296,13 +298,13 @@ private module Config implements FullStateConfigSig {
predicate includeHiddenNodes() { any(Configuration config).includeHiddenNodes() }
}
private import Impl<Config> as I
deprecated private import Impl<Config> as I
/**
* A `Node` augmented with a call context (except for sinks), an access path, and a configuration.
* Only those `PathNode`s that are reachable from a source, and which can reach a sink, are generated.
*/
class PathNode instanceof I::PathNode {
deprecated class PathNode instanceof I::PathNode {
/** Gets a textual representation of this element. */
final string toString() { result = super.toString() }
@@ -329,10 +331,10 @@ class PathNode instanceof I::PathNode {
final Node getNode() { result = super.getNode() }
/** Gets the `FlowState` of this node. */
final FlowState getState() { result = getState(super.getState()) }
deprecated final FlowState getState() { result = getState(super.getState()) }
/** Gets the associated configuration. */
final Configuration getConfiguration() { result = getConfig(super.getState()) }
deprecated final Configuration getConfiguration() { result = getConfig(super.getState()) }
/** Gets a successor of this node, if any. */
final PathNode getASuccessor() { result = super.getASuccessor() }
@@ -347,9 +349,9 @@ class PathNode instanceof I::PathNode {
final predicate isSinkGroup(string group) { super.isSinkGroup(group) }
}
module PathGraph = I::PathGraph;
deprecated module PathGraph = I::PathGraph;
private predicate hasFlow(Node source, Node sink, Configuration config) {
deprecated private predicate hasFlow(Node source, Node sink, Configuration config) {
exists(PathNode source0, PathNode sink0 |
hasFlowPath(source0, sink0, config) and
source0.getNode() = source and
@@ -357,10 +359,10 @@ private predicate hasFlow(Node source, Node sink, Configuration config) {
)
}
private predicate hasFlowPath(PathNode source, PathNode sink, Configuration config) {
deprecated private predicate hasFlowPath(PathNode source, PathNode sink, Configuration config) {
I::flowPath(source, sink) and source.getConfiguration() = config
}
private predicate hasFlowTo(Node sink, Configuration config) { hasFlow(_, sink, config) }
deprecated private predicate hasFlowTo(Node sink, Configuration config) { hasFlow(_, sink, config) }
predicate flowsTo = hasFlow/3;
deprecated predicate flowsTo = hasFlow/3;

View File

@@ -10,10 +10,12 @@ private import DataFlowImplSpecific::Private
import DataFlowImplSpecific::Public
private import DataFlowImpl
import DataFlowImplCommonPublic
import FlowStateString
deprecated import FlowStateString
private import codeql.util.Unit
/**
* DEPRECATED: Use `Global` and `GlobalWithState` instead.
*
* A configuration of interprocedural data flow analysis. This defines
* sources, sinks, and any other configurable aspect of the analysis. Each
* use of the global data flow library must define its own unique extension
@@ -48,7 +50,7 @@ private import codeql.util.Unit
* should instead depend on a `DataFlow2::Configuration`, a
* `DataFlow3::Configuration`, or a `DataFlow4::Configuration`.
*/
abstract class Configuration extends string {
abstract deprecated class Configuration extends string {
bindingset[this]
Configuration() { any() }
@@ -189,7 +191,7 @@ abstract class Configuration extends string {
* Good performance cannot be guaranteed in the presence of such recursion, so
* it should be replaced by using more than one copy of the data flow library.
*/
abstract private class ConfigurationRecursionPrevention extends Configuration {
abstract deprecated private class ConfigurationRecursionPrevention extends Configuration {
bindingset[this]
ConfigurationRecursionPrevention() { any() }
@@ -210,7 +212,7 @@ abstract private class ConfigurationRecursionPrevention extends Configuration {
}
}
private FlowState relevantState(Configuration config) {
deprecated private FlowState relevantState(Configuration config) {
config.isSource(_, result) or
config.isSink(_, result) or
config.isBarrier(_, result) or
@@ -219,17 +221,17 @@ private FlowState relevantState(Configuration config) {
}
private newtype TConfigState =
TMkConfigState(Configuration config, FlowState state) {
deprecated TMkConfigState(Configuration config, FlowState state) {
state = relevantState(config) or state instanceof FlowStateEmpty
}
private Configuration getConfig(TConfigState state) { state = TMkConfigState(result, _) }
deprecated private Configuration getConfig(TConfigState state) { state = TMkConfigState(result, _) }
private FlowState getState(TConfigState state) { state = TMkConfigState(_, result) }
deprecated private FlowState getState(TConfigState state) { state = TMkConfigState(_, result) }
private predicate singleConfiguration() { 1 = strictcount(Configuration c) }
deprecated private predicate singleConfiguration() { 1 = strictcount(Configuration c) }
private module Config implements FullStateConfigSig {
deprecated private module Config implements FullStateConfigSig {
class FlowState = TConfigState;
predicate isSource(Node source, FlowState state) {
@@ -296,13 +298,13 @@ private module Config implements FullStateConfigSig {
predicate includeHiddenNodes() { any(Configuration config).includeHiddenNodes() }
}
private import Impl<Config> as I
deprecated private import Impl<Config> as I
/**
* A `Node` augmented with a call context (except for sinks), an access path, and a configuration.
* Only those `PathNode`s that are reachable from a source, and which can reach a sink, are generated.
*/
class PathNode instanceof I::PathNode {
deprecated class PathNode instanceof I::PathNode {
/** Gets a textual representation of this element. */
final string toString() { result = super.toString() }
@@ -329,10 +331,10 @@ class PathNode instanceof I::PathNode {
final Node getNode() { result = super.getNode() }
/** Gets the `FlowState` of this node. */
final FlowState getState() { result = getState(super.getState()) }
deprecated final FlowState getState() { result = getState(super.getState()) }
/** Gets the associated configuration. */
final Configuration getConfiguration() { result = getConfig(super.getState()) }
deprecated final Configuration getConfiguration() { result = getConfig(super.getState()) }
/** Gets a successor of this node, if any. */
final PathNode getASuccessor() { result = super.getASuccessor() }
@@ -347,9 +349,9 @@ class PathNode instanceof I::PathNode {
final predicate isSinkGroup(string group) { super.isSinkGroup(group) }
}
module PathGraph = I::PathGraph;
deprecated module PathGraph = I::PathGraph;
private predicate hasFlow(Node source, Node sink, Configuration config) {
deprecated private predicate hasFlow(Node source, Node sink, Configuration config) {
exists(PathNode source0, PathNode sink0 |
hasFlowPath(source0, sink0, config) and
source0.getNode() = source and
@@ -357,10 +359,10 @@ private predicate hasFlow(Node source, Node sink, Configuration config) {
)
}
private predicate hasFlowPath(PathNode source, PathNode sink, Configuration config) {
deprecated private predicate hasFlowPath(PathNode source, PathNode sink, Configuration config) {
I::flowPath(source, sink) and source.getConfiguration() = config
}
private predicate hasFlowTo(Node sink, Configuration config) { hasFlow(_, sink, config) }
deprecated private predicate hasFlowTo(Node sink, Configuration config) { hasFlow(_, sink, config) }
predicate flowsTo = hasFlow/3;
deprecated predicate flowsTo = hasFlow/3;

View File

@@ -10,10 +10,12 @@ private import DataFlowImplSpecific::Private
import DataFlowImplSpecific::Public
private import DataFlowImpl
import DataFlowImplCommonPublic
import FlowStateString
deprecated import FlowStateString
private import codeql.util.Unit
/**
* DEPRECATED: Use `Global` and `GlobalWithState` instead.
*
* A configuration of interprocedural data flow analysis. This defines
* sources, sinks, and any other configurable aspect of the analysis. Each
* use of the global data flow library must define its own unique extension
@@ -48,7 +50,7 @@ private import codeql.util.Unit
* should instead depend on a `DataFlow2::Configuration`, a
* `DataFlow3::Configuration`, or a `DataFlow4::Configuration`.
*/
abstract class Configuration extends string {
abstract deprecated class Configuration extends string {
bindingset[this]
Configuration() { any() }
@@ -189,7 +191,7 @@ abstract class Configuration extends string {
* Good performance cannot be guaranteed in the presence of such recursion, so
* it should be replaced by using more than one copy of the data flow library.
*/
abstract private class ConfigurationRecursionPrevention extends Configuration {
abstract deprecated private class ConfigurationRecursionPrevention extends Configuration {
bindingset[this]
ConfigurationRecursionPrevention() { any() }
@@ -210,7 +212,7 @@ abstract private class ConfigurationRecursionPrevention extends Configuration {
}
}
private FlowState relevantState(Configuration config) {
deprecated private FlowState relevantState(Configuration config) {
config.isSource(_, result) or
config.isSink(_, result) or
config.isBarrier(_, result) or
@@ -219,17 +221,17 @@ private FlowState relevantState(Configuration config) {
}
private newtype TConfigState =
TMkConfigState(Configuration config, FlowState state) {
deprecated TMkConfigState(Configuration config, FlowState state) {
state = relevantState(config) or state instanceof FlowStateEmpty
}
private Configuration getConfig(TConfigState state) { state = TMkConfigState(result, _) }
deprecated private Configuration getConfig(TConfigState state) { state = TMkConfigState(result, _) }
private FlowState getState(TConfigState state) { state = TMkConfigState(_, result) }
deprecated private FlowState getState(TConfigState state) { state = TMkConfigState(_, result) }
private predicate singleConfiguration() { 1 = strictcount(Configuration c) }
deprecated private predicate singleConfiguration() { 1 = strictcount(Configuration c) }
private module Config implements FullStateConfigSig {
deprecated private module Config implements FullStateConfigSig {
class FlowState = TConfigState;
predicate isSource(Node source, FlowState state) {
@@ -296,13 +298,13 @@ private module Config implements FullStateConfigSig {
predicate includeHiddenNodes() { any(Configuration config).includeHiddenNodes() }
}
private import Impl<Config> as I
deprecated private import Impl<Config> as I
/**
* A `Node` augmented with a call context (except for sinks), an access path, and a configuration.
* Only those `PathNode`s that are reachable from a source, and which can reach a sink, are generated.
*/
class PathNode instanceof I::PathNode {
deprecated class PathNode instanceof I::PathNode {
/** Gets a textual representation of this element. */
final string toString() { result = super.toString() }
@@ -329,10 +331,10 @@ class PathNode instanceof I::PathNode {
final Node getNode() { result = super.getNode() }
/** Gets the `FlowState` of this node. */
final FlowState getState() { result = getState(super.getState()) }
deprecated final FlowState getState() { result = getState(super.getState()) }
/** Gets the associated configuration. */
final Configuration getConfiguration() { result = getConfig(super.getState()) }
deprecated final Configuration getConfiguration() { result = getConfig(super.getState()) }
/** Gets a successor of this node, if any. */
final PathNode getASuccessor() { result = super.getASuccessor() }
@@ -347,9 +349,9 @@ class PathNode instanceof I::PathNode {
final predicate isSinkGroup(string group) { super.isSinkGroup(group) }
}
module PathGraph = I::PathGraph;
deprecated module PathGraph = I::PathGraph;
private predicate hasFlow(Node source, Node sink, Configuration config) {
deprecated private predicate hasFlow(Node source, Node sink, Configuration config) {
exists(PathNode source0, PathNode sink0 |
hasFlowPath(source0, sink0, config) and
source0.getNode() = source and
@@ -357,10 +359,10 @@ private predicate hasFlow(Node source, Node sink, Configuration config) {
)
}
private predicate hasFlowPath(PathNode source, PathNode sink, Configuration config) {
deprecated private predicate hasFlowPath(PathNode source, PathNode sink, Configuration config) {
I::flowPath(source, sink) and source.getConfiguration() = config
}
private predicate hasFlowTo(Node sink, Configuration config) { hasFlow(_, sink, config) }
deprecated private predicate hasFlowTo(Node sink, Configuration config) { hasFlow(_, sink, config) }
predicate flowsTo = hasFlow/3;
deprecated predicate flowsTo = hasFlow/3;

View File

@@ -1,56 +0,0 @@
/**
* Provides consistency queries for checking invariants in the language-specific
* data-flow classes and predicates.
*/
private import python
private import DataFlowImplSpecific
private import TaintTrackingImplSpecific
private import codeql.dataflow.internal.DataFlowImplConsistency
private module Input implements InputSig<PythonDataFlow> {
private import Private
private import Public
predicate argHasPostUpdateExclude(ArgumentNode n) {
exists(ArgumentPosition apos | n.argumentOf(_, apos) and apos.isStarArgs(_))
or
exists(ArgumentPosition apos | n.argumentOf(_, apos) and apos.isDictSplat())
}
predicate reverseReadExclude(Node n) {
// since `self`/`cls` parameters can be marked as implicit argument to `super()`,
// they will have PostUpdateNodes. We have a read-step from the synthetic `**kwargs`
// parameter, but dataflow-consistency queries should _not_ complain about there not
// being a post-update node for the synthetic `**kwargs` parameter.
n instanceof SynthDictSplatParameterNode
}
predicate uniqueParameterNodePositionExclude(DataFlowCallable c, ParameterPosition pos, Node p) {
// For normal parameters that can both be passed as positional arguments or keyword
// arguments, we currently have parameter positions for both cases..
//
// TODO: Figure out how bad breaking this consistency check is
exists(Function func, Parameter param |
c.getScope() = func and
p = parameterNode(param) and
c.getParameter(pos) = p and
param = func.getArg(_) and
param = func.getArgByName(_)
)
}
predicate uniqueCallEnclosingCallableExclude(DataFlowCall call) {
not exists(call.getLocation().getFile().getRelativePath())
}
predicate identityLocalStepExclude(Node n) {
not exists(n.getLocation().getFile().getRelativePath())
}
predicate multipleArgumentCallExclude(ArgumentNode arg, DataFlowCall call) {
isArgumentNode(arg, call, _)
}
}
module Consistency = MakeConsistency<PythonDataFlow, PythonTaintTracking, Input>;

View File

@@ -17,6 +17,7 @@ private import semmle.python.Frameworks
import MatchUnpacking
import IterableUnpacking
import DataFlowDispatch
import VariableCapture as VariableCapture
/** Gets the callable in which this node occurs. */
DataFlowCallable nodeGetEnclosingCallable(Node n) { result = n.getEnclosingCallable() }
@@ -281,28 +282,33 @@ class DataFlowExpr = Expr;
/**
* A module to compute local flow.
*
* Flow will generally go from control flow nodes into essa variables at definitions,
* Flow will generally go from control flow nodes for expressions into
* control flow nodes for variables at definitions,
* and from there via use-use flow to other control flow nodes.
*
* Some syntaxtic constructs are handled separately.
*/
module LocalFlow {
/** Holds if `nodeFrom` is the control flow node defining the essa variable `nodeTo`. */
/** Holds if `nodeFrom` is the expression defining the value for the variable `nodeTo`. */
predicate definitionFlowStep(Node nodeFrom, Node nodeTo) {
// Definition
// `x = f(42)`
// nodeFrom is `f(42)`, cfg node
// nodeTo is `x`, essa var
nodeFrom.(CfgNode).getNode() =
nodeTo.(EssaNode).getVar().getDefinition().(AssignmentDefinition).getValue()
// nodeFrom is `f(42)`
// nodeTo is `x`
exists(AssignmentDefinition def |
nodeFrom.(CfgNode).getNode() = def.getValue() and
nodeTo.(CfgNode).getNode() = def.getDefiningNode()
)
or
// With definition
// `with f(42) as x:`
// nodeFrom is `f(42)`, cfg node
// nodeTo is `x`, essa var
exists(With with, ControlFlowNode contextManager, ControlFlowNode var |
// nodeFrom is `f(42)`
// nodeTo is `x`
exists(With with, ControlFlowNode contextManager, WithDefinition withDef, ControlFlowNode var |
var = withDef.getDefiningNode()
|
nodeFrom.(CfgNode).getNode() = contextManager and
nodeTo.(EssaNode).getVar().getDefinition().(WithDefinition).getDefiningNode() = var and
nodeTo.(CfgNode).getNode() = var and
// see `with_flow` in `python/ql/src/semmle/python/dataflow/Implementation.qll`
with.getContextExpr() = contextManager.getNode() and
with.getOptionalVars() = var.getNode() and
@@ -313,34 +319,6 @@ module LocalFlow {
// * `foo = x.foo(); await foo.async_method(); foo.close()` and
// * `async with x.foo() as foo: await foo.async_method()`.
)
or
// Async with var definition
// `async with f(42) as x:`
// nodeFrom is `x`, cfg node
// nodeTo is `x`, essa var
//
// This makes the cfg node the local source of the awaited value.
//
// We have this step in addition to the step above, to handle cases where the QL
// modeling of `f(42)` requires a `.getAwaited()` step (in API graphs) when not
// using `async with`, so you can do both:
// * `foo = await x.foo(); await foo.async_method(); foo.close()` and
// * `async with x.foo() as foo: await foo.async_method()`.
exists(With with, ControlFlowNode var |
nodeFrom.(CfgNode).getNode() = var and
nodeTo.(EssaNode).getVar().getDefinition().(WithDefinition).getDefiningNode() = var and
with.getOptionalVars() = var.getNode() and
with.isAsync()
)
or
// Parameter definition
// `def foo(x):`
// nodeFrom is `x`, cfgNode
// nodeTo is `x`, essa var
exists(ParameterDefinition pd |
nodeFrom.(CfgNode).getNode() = pd.getDefiningNode() and
nodeTo.(EssaNode).getVar() = pd.getVariable()
)
}
predicate expressionFlowStep(Node nodeFrom, Node nodeTo) {
@@ -372,9 +350,15 @@ module LocalFlow {
// First use after definition
// `y = 42`
// `x = f(y)`
// nodeFrom is `y` on first line, essa var
// nodeTo is `y` on second line, cfg node
defToFirstUse(nodeFrom.asVar(), nodeTo.asCfgNode())
// nodeFrom is `y` on first line
// nodeTo is `y` on second line
exists(EssaDefinition def |
nodeFrom.(CfgNode).getNode() = def.(EssaNodeDefinition).getDefiningNode()
or
nodeFrom.(ScopeEntryDefinitionNode).getDefinition() = def
|
AdjacentUses::firstUse(def, nodeTo.(CfgNode).getNode())
)
or
// Next use after use
// `x = f(y)`
@@ -491,6 +475,8 @@ predicate simpleLocalFlowStep(Node nodeFrom, Node nodeTo) {
simpleLocalFlowStepForTypetracking(nodeFrom, nodeTo)
or
summaryFlowSteps(nodeFrom, nodeTo)
or
variableCaptureLocalFlowStep(nodeFrom, nodeTo)
}
/**
@@ -501,8 +487,7 @@ predicate simpleLocalFlowStep(Node nodeFrom, Node nodeTo) {
* or at runtime when callables in the module are called.
*/
predicate simpleLocalFlowStepForTypetracking(Node nodeFrom, Node nodeTo) {
IncludePostUpdateFlow<PhaseDependentFlow<LocalFlow::localFlowStep/2>::step/2>::step(nodeFrom,
nodeTo)
LocalFlow::localFlowStep(nodeFrom, nodeTo)
}
private predicate summaryLocalStep(Node nodeFrom, Node nodeTo) {
@@ -514,6 +499,16 @@ predicate summaryFlowSteps(Node nodeFrom, Node nodeTo) {
IncludePostUpdateFlow<PhaseDependentFlow<summaryLocalStep/2>::step/2>::step(nodeFrom, nodeTo)
}
predicate variableCaptureLocalFlowStep(Node nodeFrom, Node nodeTo) {
// Blindly applying use-use flow can result in a node that steps to itself, for
// example in while-loops. To uphold dataflow consistency checks, we don't want
// that. However, we do want to allow `[post] n` to `n` (to handle while loops), so
// we should only do the filtering after `IncludePostUpdateFlow` has ben applied.
IncludePostUpdateFlow<PhaseDependentFlow<VariableCapture::valueStep/2>::step/2>::step(nodeFrom,
nodeTo) and
nodeFrom != nodeTo
}
/** `ModuleVariable`s are accessed via jump steps at runtime. */
predicate runtimeJumpStep(Node nodeFrom, Node nodeTo) {
// Module variable read
@@ -565,11 +560,7 @@ predicate neverSkipInPathGraph(Node n) {
// ```
// we would end up saying that the path MUST not skip the x in `y = x`, which is just
// annoying and doesn't help the path explanation become clearer.
n.asVar() instanceof EssaDefinition and
// For a parameter we have flow from ControlFlowNode to SSA node, and then onwards
// with use-use flow, and since the CFN is already part of the path graph, we don't
// want to force showing the SSA node as well.
not n.asVar() instanceof ParameterDefinition
n.asCfgNode() = any(EssaNodeDefinition def).getDefiningNode()
}
/**
@@ -581,7 +572,7 @@ predicate compatibleTypes(DataFlowType t1, DataFlowType t2) { any() }
predicate typeStrongerThan(DataFlowType t1, DataFlowType t2) { none() }
predicate localMustFlowStep(Node node1, Node node2) { none() }
predicate localMustFlowStep(Node nodeFrom, Node nodeTo) { none() }
/**
* Gets the type of `node`.
@@ -685,6 +676,38 @@ predicate storeStep(Node nodeFrom, ContentSet c, Node nodeTo) {
synthStarArgsElementParameterNodeStoreStep(nodeFrom, c, nodeTo)
or
synthDictSplatArgumentNodeStoreStep(nodeFrom, c, nodeTo)
or
VariableCapture::storeStep(nodeFrom, c, nodeTo)
}
/**
* A synthesized data flow node representing a closure object that tracks
* captured variables.
*/
class SynthCaptureNode extends Node, TSynthCaptureNode {
private VariableCapture::Flow::SynthesizedCaptureNode cn;
SynthCaptureNode() { this = TSynthCaptureNode(cn) }
/** Gets the `SynthesizedCaptureNode` that this node represents. */
VariableCapture::Flow::SynthesizedCaptureNode getSynthesizedCaptureNode() { result = cn }
override Scope getScope() { result = cn.getEnclosingCallable() }
override Location getLocation() { result = cn.getLocation() }
override string toString() { result = cn.toString() }
}
private class SynthCapturePostUpdateNode extends PostUpdateNodeImpl, SynthCaptureNode {
private SynthCaptureNode pre;
SynthCapturePostUpdateNode() {
VariableCapture::Flow::capturePostUpdateNode(this.getSynthesizedCaptureNode(),
pre.getSynthesizedCaptureNode())
}
override Node getPreUpdateNode() { result = pre }
}
/**
@@ -888,6 +911,8 @@ predicate readStep(Node nodeFrom, ContentSet c, Node nodeTo) {
nodeTo.(FlowSummaryNode).getSummaryNode())
or
synthDictSplatParameterNodeReadStep(nodeFrom, c, nodeTo)
or
VariableCapture::readStep(nodeFrom, c, nodeTo)
}
/** Data flows from a sequence to a subscript of the sequence. */
@@ -916,7 +941,7 @@ predicate subscriptReadStep(CfgNode nodeFrom, Content c, CfgNode nodeTo) {
predicate forReadStep(CfgNode nodeFrom, Content c, Node nodeTo) {
exists(ForTarget target |
nodeFrom.asExpr() = target.getSource() and
nodeTo.asVar().(EssaNodeDefinition).getDefiningNode() = target
nodeTo.asCfgNode() = target
) and
(
c instanceof ListElementContent
@@ -984,22 +1009,6 @@ predicate attributeClearStep(Node n, AttributeContent c) {
*/
predicate isUnreachableInCall(Node n, DataFlowCall call) { none() }
//--------
// Virtual dispatch with call context
//--------
/**
* Gets a viable dispatch target of `call` in the context `ctx`. This is
* restricted to those `call`s for which a context might make a difference.
*/
DataFlowCallable viableImplInCallContext(DataFlowCall call, DataFlowCall ctx) { none() }
/**
* Holds if the set of viable implementations that can be called by `call`
* might be improved by knowing the call context. This is the case if the qualifier accesses a parameter of
* the enclosing callable `c` (including the implicit `this` parameter).
*/
predicate mayBenefitFromCallContext(DataFlowCall call, DataFlowCallable c) { none() }
/**
* Holds if access paths with `c` at their head always should be tracked at high
* precision. This disables adaptive access path precision for such access paths.
@@ -1017,6 +1026,10 @@ predicate nodeIsHidden(Node n) {
n instanceof SynthDictSplatArgumentNode
or
n instanceof SynthDictSplatParameterNode
or
n instanceof SynthCaptureNode
or
n instanceof SynthCapturedVariablesParameterNode
}
class LambdaCallKind = Unit;
@@ -1052,7 +1065,15 @@ predicate additionalLambdaFlowStep(Node nodeFrom, Node nodeTo, boolean preserves
* by default as a heuristic.
*/
predicate allowParameterReturnInSelf(ParameterNode p) {
FlowSummaryImpl::Private::summaryAllowParameterReturnInSelf(p)
exists(DataFlowCallable c, ParameterPosition pos |
p.(ParameterNodeImpl).isParameterOf(c, pos) and
FlowSummaryImpl::Private::summaryAllowParameterReturnInSelf(c.asLibraryCallable(), pos)
)
or
exists(Function f |
VariableCapture::Flow::heuristicAllowInstanceParameterReturnInSelf(f) and
p = TSynthCapturedVariablesParameterNode(f)
)
}
/** An approximated `Content`. */

View File

@@ -4,7 +4,7 @@
private import python
private import DataFlowPrivate
import semmle.python.dataflow.new.TypeTracker
import semmle.python.dataflow.new.TypeTracking
import Attributes
import LocalSources
private import semmle.python.essa.SsaCompute
@@ -15,23 +15,24 @@ private import semmle.python.frameworks.data.ModelsAsData
/**
* IPA type for data flow nodes.
*
* Flow between SSA variables are computed in `Essa.qll`
* Nodes broadly fall into three categories.
*
* Flow from SSA variables to control flow nodes are generally via uses.
*
* Flow from control flow nodes to SSA variables are generally via assignments.
*
* The current implementation of these cross flows can be seen in `EssaTaintTracking`.
* - Control flow nodes: Flow between these is based on use-use flow computed via an SSA analysis.
* - Module variable nodes: These represent global variables and act as canonical targets for reads and writes of these.
* - Synthetic nodes: These handle flow in various special cases.
*/
newtype TNode =
/** A node corresponding to an SSA variable. */
TEssaNode(EssaVariable var) or
/** A node corresponding to a control flow node. */
TCfgNode(ControlFlowNode node) {
isExpressionNode(node)
or
node.getNode() instanceof Pattern
} or
/**
* A node corresponding to a scope entry definition. That is, the value of a variable
* as it enters a scope.
*/
TScopeEntryDefinitionNode(ScopeEntryDefinition def) { not def.getScope() instanceof Module } or
/**
* A synthetic node representing the value of an object before a state change.
*
@@ -116,6 +117,14 @@ newtype TNode =
/** A synthetic node to allow flow to keyword parameters from a `**kwargs` argument. */
TSynthDictSplatParameterNode(DataFlowCallable callable) {
exists(ParameterPosition ppos | ppos.isKeyword(_) | exists(callable.getParameter(ppos)))
} or
/** A synthetic node representing a captured variable. */
TSynthCaptureNode(VariableCapture::Flow::SynthesizedCaptureNode cn) or
/** A synthetic node representing the heap of a function. Used for variable capture. */
TSynthCapturedVariablesParameterNode(Function f) {
f = any(VariableCapture::CapturedVariable v).getACapturingScope() and
// TODO: Remove this restriction when adding proper support for captured variables in the body of the function we generate for comprehensions
exists(TFunction(f))
}
private import semmle.python.internal.CachedStages
@@ -156,9 +165,6 @@ class Node extends TNode {
this.getLocation().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
}
/** Gets the ESSA variable corresponding to this node, if any. */
EssaVariable asVar() { none() }
/** Gets the control-flow node corresponding to this node, if any. */
ControlFlowNode asCfgNode() { none() }
@@ -171,25 +177,6 @@ class Node extends TNode {
LocalSourceNode getALocalSource() { result.flowsTo(this) }
}
/** A data-flow node corresponding to an SSA variable. */
class EssaNode extends Node, TEssaNode {
EssaVariable var;
EssaNode() { this = TEssaNode(var) }
/** Gets the `EssaVariable` represented by this data-flow node. */
EssaVariable getVar() { result = var }
override EssaVariable asVar() { result = var }
/** Gets a textual representation of this element. */
override string toString() { result = var.toString() }
override Scope getScope() { result = var.getScope() }
override Location getLocation() { result = var.getLocation() }
}
/** A data-flow node corresponding to a control-flow node. */
class CfgNode extends Node, TCfgNode {
ControlFlowNode node;
@@ -281,6 +268,28 @@ class ExprNode extends CfgNode {
/** Gets a node corresponding to expression `e`. */
ExprNode exprNode(DataFlowExpr e) { result.getNode().getNode() = e }
/**
* A node corresponding to a scope entry definition. That is, the value of a variable
* as it enters a scope.
*/
class ScopeEntryDefinitionNode extends Node, TScopeEntryDefinitionNode {
ScopeEntryDefinition def;
ScopeEntryDefinitionNode() { this = TScopeEntryDefinitionNode(def) }
/** Gets the `ScopeEntryDefinition` associated with this node. */
ScopeEntryDefinition getDefinition() { result = def }
/** Gets the source variable represented by this node. */
SsaSourceVariable getVariable() { result = def.getSourceVariable() }
override Location getLocation() { result = def.getLocation() }
override Scope getScope() { result = def.getScope() }
override string toString() { result = "Entry definition for " + this.getVariable().toString() }
}
/**
* The value of a parameter at function entry, viewed as a node in a data
* flow graph.
@@ -412,8 +421,8 @@ class ModuleVariableNode extends Node, TModuleVariableNode {
}
/** Gets an `EssaNode` that corresponds to an assignment of this global variable. */
EssaNode getAWrite() {
result.getVar().getDefinition().(EssaNodeDefinition).definedBy(var, any(DefinitionNode defn))
Node getAWrite() {
any(EssaNodeDefinition def).definedBy(var, result.asCfgNode().(DefinitionNode))
}
/** Gets the possible values of the variable at the end of import time */
@@ -626,7 +635,9 @@ newtype TContent =
exists(string input, string output | ModelOutput::relevantSummaryModel(_, _, input, output, _) |
attr = [input, output].regexpFind("(?<=(^|\\.)Attribute\\[)[^\\]]+(?=\\])", _, _).trim()
)
}
} or
/** A captured variable. */
TCapturedVariableContent(VariableCapture::CapturedVariable v)
/**
* A data-flow value can have associated content.
@@ -689,6 +700,18 @@ class AttributeContent extends TAttributeContent, Content {
override string toString() { result = "Attribute " + attr }
}
/** A captured variable. */
class CapturedVariableContent extends Content, TCapturedVariableContent {
private VariableCapture::CapturedVariable v;
CapturedVariableContent() { this = TCapturedVariableContent(v) }
/** Gets the captured variable. */
VariableCapture::CapturedVariable getVariable() { result = v }
override string toString() { result = "captured " + v }
}
/**
* An entity that represents a set of `Content`s.
*

View File

@@ -1,324 +0,0 @@
/**
* Provides Python specific classes and predicates for defining flow summaries.
*
* Flow summaries are defined for callables that are not extracted.
* Such callables go by different names in different parts of our codebase:
*
* - in `FlowSummary.qll`, which is user facing, they are called `SummarizedCallable`s.
* These contain summaries, implemented by the user via the predicates `propagatesFlow` and `propagatesFlowExt`.
*
* - in the data flow layer, they are called `LibraryCallable`s (as in the Ruby codebase).
* These are identified by strings and has predicates for finding calls to them.
*
* Having both extracted and non-extracted callables means that we now have three types of calls:
* - Extracted calls to extracted callables, either `NormalCall` or `SpecialCall`. These are handled by standard data flow.
* - Extracted calls to non-extracted callables, `LibraryCall`. These are handled by looking up the relevant summary when the
* global data flow graph is connected up via `getViableCallable`.
* - Non-extracted calls, `SummaryCall`. These are synthesised by the flow summary framework.
*
* The first two can be referred to as `ExtractedDataFlowCall`. In fact, `LibraryCall` is a subclass of `NormalCall`, where
* `getCallable` is set to `none()`. The member predicate `ExtractedDataFlowCall::getCallable` is _not_ the mechanism for
* call resolution in global data flow. That mechanism is `getViableCallable`.
* Resolving a call to a non-extracted callable goes via `LibraryCallable::getACall`, which may involve type tracking.
* To avoid that type tracking becomes mutually recursive with data flow, type tracking must use a call graph not including summaries.
* Type tracking sees the callgraph given by `ExtractedDataFlowCall::getACallable`.
*
* We do not support summaries of special methods via the special methods framework,
* the summary would have to identify the call.
*
* We might, while we still extract the standard library, want to support flow summaries of
* extracted callables, so that we can model part of the standard library with flow summaries.
* For this to work, we have be careful with the enclosing callable predicate.
*/
private import python
private import DataFlowPrivate
private import DataFlowPublic
private import DataFlowImplCommon
private import FlowSummaryImpl::Private
private import FlowSummaryImpl::Public
private import semmle.python.dataflow.new.FlowSummary as FlowSummary
/**
* A class of callables that are candidates for flow summary modeling.
*/
class SummarizedCallableBase = string;
/**
* A class of callables that are candidates for neutral modeling.
*/
class NeutralCallableBase = string;
/** View a `SummarizedCallable` as a `DataFlowCallable`. */
DataFlowCallable inject(SummarizedCallable c) { result.asLibraryCallable() = c }
/** Gets the parameter position of the instance parameter. */
ArgumentPosition callbackSelfParameterPosition() { none() } // disables implicit summary flow to `this` for callbacks
/** Gets the synthesized data-flow call for `receiver`. */
SummaryCall summaryDataFlowCall(SummaryNode receiver) { receiver = result.getReceiver() }
/** Gets the type of content `c`. */
DataFlowType getContentType(Content c) { any() }
/** Gets the type of the parameter at the given position. */
DataFlowType getParameterType(SummarizedCallable c, ParameterPosition pos) { any() }
/** Gets the return type of kind `rk` for callable `c`. */
bindingset[c, rk]
DataFlowType getReturnType(SummarizedCallable c, ReturnKind rk) { any() }
/**
* Gets the type of the parameter matching arguments at position `pos` in a
* synthesized call that targets a callback of type `t`.
*/
bindingset[t, pos]
DataFlowType getCallbackParameterType(DataFlowType t, ArgumentPosition pos) { any() }
/**
* Gets the return type of kind `rk` in a synthesized call that targets a
* callback of type `t`.
*/
DataFlowType getCallbackReturnType(DataFlowType t, ReturnKind rk) { any() }
/** Gets the type of synthetic global `sg`. */
DataFlowType getSyntheticGlobalType(SummaryComponent::SyntheticGlobal sg) { any() }
/**
* Holds if an external flow summary exists for `c` with input specification
* `input`, output specification `output`, kind `kind`, and provenance `provenance`.
*/
predicate summaryElement(
FlowSummary::SummarizedCallable c, string input, string output, string kind, string provenance
) {
exists(boolean preservesValue |
c.propagatesFlowExt(input, output, preservesValue) and
(if preservesValue = true then kind = "value" else kind = "taint") and
provenance = "manual"
)
}
/**
* Holds if a neutral model exists for `c` of kind `kind`
* and with provenance `provenance`.
* Note. Neutral models have not been implemented for Python.
*/
predicate neutralElement(NeutralCallableBase c, string kind, string provenance) { none() }
/**
* Gets the summary component for specification component `c`, if any.
*
* This covers all the Python-specific components of a flow summary.
*/
SummaryComponent interpretComponentSpecific(AccessPathToken c) {
c = "ListElement" and
result = FlowSummary::SummaryComponent::listElement()
or
c = "SetElement" and
result = FlowSummary::SummaryComponent::setElement()
or
exists(int index |
c.getAnArgument("TupleElement") = index.toString() and
result = FlowSummary::SummaryComponent::tupleElement(index)
)
or
exists(string key |
c.getAnArgument("DictionaryElement") = key and
result = FlowSummary::SummaryComponent::dictionaryElement(key)
)
or
c = "DictionaryElementAny" and
result = FlowSummary::SummaryComponent::dictionaryElementAny()
or
exists(string attr |
c.getAnArgument("Attribute") = attr and
result = FlowSummary::SummaryComponent::attribute(attr)
)
}
private string getContentSpecific(Content cs) {
cs = TListElementContent() and result = "ListElement"
or
cs = TSetElementContent() and result = "SetElement"
or
exists(int index |
cs = TTupleElementContent(index) and result = "TupleElement[" + index.toString() + "]"
)
or
exists(string key |
cs = TDictionaryElementContent(key) and result = "DictionaryElement[" + key + "]"
)
or
cs = TDictionaryElementAnyContent() and result = "DictionaryElementAny"
or
exists(string attr | cs = TAttributeContent(attr) and result = "Attribute[" + attr + "]")
}
/** Gets the textual representation of a summary component in the format used for MaD models. */
string getMadRepresentationSpecific(SummaryComponent sc) {
exists(Content c |
sc = TContentSummaryComponent(c) and
result = getContentSpecific(c)
)
}
/** Gets the textual representation of a parameter position in the format used for flow summaries. */
string getParameterPosition(ParameterPosition pos) {
pos.isSelf() and result = "self"
or
exists(int i |
pos.isPositional(i) and
result = i.toString()
)
or
exists(string name |
pos.isKeyword(name) and
result = name + ":"
)
}
/** Gets the textual representation of an argument position in the format used for flow summaries. */
string getArgumentPosition(ArgumentPosition pos) {
pos.isSelf() and result = "self"
or
exists(int i |
pos.isPositional(i) and
result = i.toString()
)
or
exists(string name |
pos.isKeyword(name) and
result = name + ":"
)
}
/** Holds if input specification component `c` needs a reference. */
predicate inputNeedsReferenceSpecific(string c) { none() }
/** Holds if output specification component `c` needs a reference. */
predicate outputNeedsReferenceSpecific(string c) { none() }
/** Gets the return kind corresponding to specification `"ReturnValue"`. */
ReturnKind getReturnValueKind() { any() }
/**
* All definitions in this module are required by the shared implementation
* (for source/sink interpretation), but they are unused for Python, where
* we rely on API graphs instead.
*/
private module UnusedSourceSinkInterpretation {
/**
* Holds if an external source specification exists for `n` with output specification
* `output`, kind `kind`, and provenance `provenance`.
*/
predicate sourceElement(AstNode n, string output, string kind, string provenance) { none() }
/**
* Holds if an external sink specification exists for `n` with input specification
* `input`, kind `kind` and provenance `provenance`.
*/
predicate sinkElement(AstNode n, string input, string kind, string provenance) { none() }
class SourceOrSinkElement = AstNode;
/** An entity used to interpret a source/sink specification. */
class InterpretNode extends AstNode_ {
// InterpretNode is going away, this is just a dummy implementation.
// However, we have some old location tests picking them up, so we
// explicitly define them to not exist.
InterpretNode() { none() }
/** Gets the element that this node corresponds to, if any. */
SourceOrSinkElement asElement() { none() }
/** Gets the data-flow node that this node corresponds to, if any. */
Node asNode() { none() }
/** Gets the call that this node corresponds to, if any. */
DataFlowCall asCall() { none() }
/** Gets the callable that this node corresponds to, if any. */
DataFlowCallable asCallable() { none() }
/** Gets the target of this call, if any. */
SourceOrSinkElement getCallTarget() { none() }
}
/** Provides additional sink specification logic. */
predicate interpretOutputSpecific(string c, InterpretNode mid, InterpretNode node) { none() }
/** Provides additional source specification logic. */
predicate interpretInputSpecific(string c, InterpretNode mid, InterpretNode node) { none() }
}
import UnusedSourceSinkInterpretation
module ParsePositions {
private import FlowSummaryImpl
private predicate isParamBody(string body) {
exists(AccessPathToken tok |
tok.getName() = "Parameter" and
body = tok.getAnArgument()
)
}
private predicate isArgBody(string body) {
exists(AccessPathToken tok |
tok.getName() = "Argument" and
body = tok.getAnArgument()
)
}
predicate isParsedPositionalParameterPosition(string c, int i) {
isParamBody(c) and
i = AccessPath::parseInt(c)
}
predicate isParsedKeywordParameterPosition(string c, string paramName) {
isParamBody(c) and
c = paramName + ":"
}
predicate isParsedPositionalArgumentPosition(string c, int i) {
isArgBody(c) and
i = AccessPath::parseInt(c)
}
predicate isParsedKeywordArgumentPosition(string c, string argName) {
isArgBody(c) and
c = argName + ":"
}
}
/** Gets the argument position obtained by parsing `X` in `Parameter[X]`. */
ArgumentPosition parseParamBody(string s) {
exists(int i |
ParsePositions::isParsedPositionalParameterPosition(s, i) and
result.isPositional(i)
)
or
exists(string name |
ParsePositions::isParsedKeywordParameterPosition(s, name) and
result.isKeyword(name)
)
or
s = "self" and
result.isSelf()
}
/** Gets the parameter position obtained by parsing `X` in `Argument[X]`. */
ParameterPosition parseArgBody(string s) {
exists(int i |
ParsePositions::isParsedPositionalArgumentPosition(s, i) and
result.isPositional(i)
)
or
exists(string name |
ParsePositions::isParsedKeywordArgumentPosition(s, name) and
result.isKeyword(name)
)
or
s = "self" and
result.isSelf()
}

View File

@@ -7,7 +7,7 @@
private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.internal.ImportStar
private import semmle.python.dataflow.new.TypeTracker
private import semmle.python.dataflow.new.TypeTracking
private import semmle.python.dataflow.new.internal.DataFlowPrivate
/**
@@ -112,7 +112,7 @@ module ImportResolution {
not allowedEssaImportStep(_, firstDef)
|
not LocalFlow::defToFirstUse(firstDef, _) and
val.asVar() = firstDef
val.asCfgNode() = firstDef.getDefinition().(EssaNodeDefinition).getDefiningNode()
or
exists(ControlFlowNode mid, ControlFlowNode end |
LocalFlow::defToFirstUse(firstDef, mid) and
@@ -320,11 +320,11 @@ module ImportResolution {
// name as a submodule, we always consider that this attribute _could_ be a
// reference to the submodule, even if we don't know that the submodule has been
// imported yet.
exists(string submodule, Module package |
submodule = result.asVar().getName() and
SsaSource::init_module_submodule_defn(result.asVar().getSourceVariable(),
package.getEntryNode()) and
m = getModuleFromName(package.getPackageName() + "." + submodule)
exists(string submodule, Module package, EssaVariable var |
submodule = var.getName() and
SsaSource::init_module_submodule_defn(var.getSourceVariable(), package.getEntryNode()) and
m = getModuleFromName(package.getPackageName() + "." + submodule) and
result.asCfgNode() = var.getDefinition().(EssaNodeDefinition).getDefiningNode()
)
}

View File

@@ -87,13 +87,13 @@
* This is adequate as the route through `TIterableElement(sequence)` does not transfer precise content.
*
* 5. [Read] Content is read from `sequence` to its elements.
* a) If the element is a plain variable, the target is the corresponding essa node.
* a) If the element is a plain variable, the target is the corresponding control flow node.
*
* b) If the element is itself a sequence, with control-flow node `seq`, the target is `TIterableSequence(seq)`.
*
* c) If the element is a starred variable, with control-flow node `v`, the target is `TIterableElement(v)`.
*
* 6. [Store] Content is stored from `TIterableElement(v)` to the essa variable for `v`, with
* 6. [Store] Content is stored from `TIterableElement(v)` to the control flow node for variable `v`, with
* content type `ListElementContent`.
*
* 7. [Flow, Read, Store] Steps 2 through 7 are repeated for all recursive elements which are sequences.
@@ -313,7 +313,7 @@ predicate iterableUnpackingConvertingStoreStep(Node nodeFrom, Content c, Node no
* Step 5
* For a sequence node inside an iterable unpacking, data flows from the sequence to its elements. There are
* three cases for what `toNode` should be:
* a) If the element is a plain variable, `toNode` is the corresponding essa node.
* a) If the element is a plain variable, `toNode` is the corresponding control flow node.
*
* b) If the element is itself a sequence, with control-flow node `seq`, `toNode` is `TIterableSequence(seq)`.
*
@@ -351,20 +351,25 @@ predicate iterableUnpackingElementReadStep(Node nodeFrom, Content c, Node nodeTo
nodeTo = TIterableElementNode(element)
else
// Step 5a
nodeTo.asVar().getDefinition().(MultiAssignmentDefinition).getDefiningNode() = element
exists(MultiAssignmentDefinition mad | element = mad.getDefiningNode() |
nodeTo.(CfgNode).getNode() = element
)
)
)
}
/**
* Step 6
* Data flows from `TIterableElement(v)` to the essa variable for `v`, with
* Data flows from `TIterableElement(v)` to the control flow node for variable `v`, with
* content type `ListElementContent`.
*/
predicate iterableUnpackingStarredElementStoreStep(Node nodeFrom, Content c, Node nodeTo) {
exists(ControlFlowNode starred | starred.getNode() instanceof Starred |
exists(ControlFlowNode starred, MultiAssignmentDefinition mad |
starred.getNode() instanceof Starred and
starred = mad.getDefiningNode()
|
nodeFrom = TIterableElementNode(starred) and
nodeTo.asVar().getDefinition().(MultiAssignmentDefinition).getDefiningNode() = starred and
nodeTo.asCfgNode() = starred and
c instanceof ListElementContent
)
}

View File

@@ -71,7 +71,9 @@ class LocalSourceNode extends Node {
or
// We include all scope entry definitions, as these act as the local source within the scope they
// enter.
this.asVar() instanceof ScopeEntryDefinition
this instanceof ScopeEntryDefinitionNode
or
this instanceof ParameterNode
}
/** Holds if this `LocalSourceNode` can flow to `nodeTo` in one or more local flow steps. */
@@ -151,7 +153,7 @@ class LocalSourceNode extends Node {
* See `TypeBackTracker` for more details about how to use this.
*/
pragma[inline]
LocalSourceNode backtrack(TypeBackTracker t2, TypeBackTracker t) { t2 = t.step(result, this) }
LocalSourceNode backtrack(TypeBackTracker t2, TypeBackTracker t) { t = t2.step(result, this) }
}
/**
@@ -165,7 +167,7 @@ class LocalSourceNodeNotModuleVariableNode extends LocalSourceNode {
LocalSourceNodeNotModuleVariableNode() {
this instanceof ExprNode
or
this.asVar() instanceof ScopeEntryDefinition
this instanceof ScopeEntryDefinitionNode
}
}
@@ -238,7 +240,7 @@ private module Cached {
* Helper predicate for `hasLocalSource`. Removes any steps go to module variable reads, as these
* are already local source nodes in their own right.
*/
cached
pragma[nomagic]
private predicate localSourceFlowStep(Node nodeFrom, Node nodeTo) {
simpleLocalFlowStep(nodeFrom, nodeTo) and
not nodeTo = any(ModuleVariableNode v).getARead()

View File

@@ -89,8 +89,9 @@ predicate matchAsFlowStep(Node nodeFrom, Node nodeTo) {
or
// the interior pattern flows to the alias
nodeFrom.(CfgNode).getNode().getNode() = subject.getPattern() and
nodeTo.(EssaNode).getVar().getDefinition().(PatternAliasDefinition).getDefiningNode().getNode() =
alias
exists(PatternAliasDefinition pad | pad.getDefiningNode().getNode() = alias |
nodeTo.(CfgNode).getNode() = pad.getDefiningNode()
)
)
}
@@ -123,13 +124,9 @@ predicate matchLiteralFlowStep(Node nodeFrom, Node nodeTo) {
predicate matchCaptureFlowStep(Node nodeFrom, Node nodeTo) {
exists(MatchCapturePattern capture, Name var | capture.getVariable() = var |
nodeFrom.(CfgNode).getNode().getNode() = capture and
nodeTo
.(EssaNode)
.getVar()
.getDefinition()
.(PatternCaptureDefinition)
.getDefiningNode()
.getNode() = var
exists(PatternCaptureDefinition pcd | pcd.getDefiningNode().getNode() = var |
nodeTo.(CfgNode).getNode() = pcd.getDefiningNode()
)
)
}

View File

@@ -1,412 +0,0 @@
/**
* Provides the implementation of type tracking steps through flow summaries.
* To use this, you must implement the `Input` signature. You can then use the predicates in the `Output`
* signature to implement the predicates of the same names inside `TypeTrackerSpecific.qll`.
*/
/** The classes and predicates needed to generate type-tracking steps from summaries. */
signature module Input {
// Dataflow nodes
class Node;
// Content
class TypeTrackerContent;
class TypeTrackerContentFilter;
// Relating content and filters
/**
* Gets a content filter to use for a `WithoutContent[content]` step, (data is not allowed to be stored in `content`)
* or has no result if
* the step should be treated as ordinary flow.
*
* `WithoutContent` is often used to perform strong updates on individual collection elements, but for
* type-tracking this is rarely beneficial and quite expensive. However, `WithoutContent` can be quite useful
* for restricting the type of an object, and in these cases we translate it to a filter.
*/
TypeTrackerContentFilter getFilterFromWithoutContentStep(TypeTrackerContent content);
/**
* Gets a content filter to use for a `WithContent[content]` step, (data must be stored in `content`)
* or has no result if
* the step cannot be handled by type-tracking.
*
* `WithContent` is often used to perform strong updates on individual collection elements (or rather
* to preserve those that didn't get updated). But for type-tracking this is rarely beneficial and quite expensive.
* However, `WithContent` can be quite useful for restricting the type of an object, and in these cases we translate it to a filter.
*/
TypeTrackerContentFilter getFilterFromWithContentStep(TypeTrackerContent content);
// Summaries and their stacks
class SummaryComponent;
class SummaryComponentStack {
SummaryComponent head();
}
/** Gets a singleton stack containing `component`. */
SummaryComponentStack singleton(SummaryComponent component);
/**
* Gets the stack obtained by pushing `head` onto `tail`.
*/
SummaryComponentStack push(SummaryComponent head, SummaryComponentStack tail);
/** Gets a singleton stack representing a return. */
SummaryComponent return();
// Relating content to summaries
/** Gets a summary component for content `c`. */
SummaryComponent content(TypeTrackerContent contents);
/** Gets a summary component where data is not allowed to be stored in `contents`. */
SummaryComponent withoutContent(TypeTrackerContent contents);
/** Gets a summary component where data must be stored in `contents`. */
SummaryComponent withContent(TypeTrackerContent contents);
// Callables
class SummarizedCallable {
predicate propagatesFlow(
SummaryComponentStack input, SummaryComponentStack output, boolean preservesValue
);
}
// Relating nodes to summaries
/**
* Gets a dataflow node respresenting the argument of `call` indicated by `arg`.
*
* Returns the post-update node of the argument when `isPostUpdate` is true.
*/
Node argumentOf(Node call, SummaryComponent arg, boolean isPostUpdate);
/** Gets a dataflow node respresenting the parameter of `callable` indicated by `param`. */
Node parameterOf(Node callable, SummaryComponent param);
/** Gets a dataflow node respresenting the return of `callable` indicated by `return`. */
Node returnOf(Node callable, SummaryComponent return);
// Relating callables to nodes
/** Gets a dataflow node respresenting a call to `callable`. */
Node callTo(SummarizedCallable callable);
}
/**
* The predicates provided by a summary type tracker.
* These are meant to be used in `TypeTrackerSpecific.qll`
* inside the predicates of the same names.
*/
signature module Output<Input I> {
/**
* Holds if there is a level step from `nodeFrom` to `nodeTo`, which does not depend on the call graph.
*/
predicate levelStepNoCall(I::Node nodeFrom, I::Node nodeTo);
/**
* Holds if `nodeTo` is the result of accessing the `content` content of `nodeFrom`.
*/
predicate basicLoadStep(I::Node nodeFrom, I::Node nodeTo, I::TypeTrackerContent content);
/**
* Holds if `nodeFrom` is being written to the `content` content of the object in `nodeTo`.
*/
predicate basicStoreStep(I::Node nodeFrom, I::Node nodeTo, I::TypeTrackerContent content);
/**
* Holds if the `loadContent` of `nodeFrom` is stored in the `storeContent` of `nodeTo`.
*/
predicate basicLoadStoreStep(
I::Node nodeFrom, I::Node nodeTo, I::TypeTrackerContent loadContent,
I::TypeTrackerContent storeContent
);
/**
* Holds if type-tracking should step from `nodeFrom` to `nodeTo` but block flow of contents matched by `filter` through here.
*/
predicate basicWithoutContentStep(
I::Node nodeFrom, I::Node nodeTo, I::TypeTrackerContentFilter filter
);
/**
* Holds if type-tracking should step from `nodeFrom` to `nodeTo` if inside a content matched by `filter`.
*/
predicate basicWithContentStep(
I::Node nodeFrom, I::Node nodeTo, I::TypeTrackerContentFilter filter
);
}
/**
* Implementation of the summary type tracker, that is type tracking through flow summaries.
*/
module SummaryFlow<Input I> implements Output<I> {
pragma[nomagic]
private predicate isNonLocal(I::SummaryComponent component) {
component = I::content(_)
or
component = I::withContent(_)
}
pragma[nomagic]
private predicate hasLoadSummary(
I::SummarizedCallable callable, I::TypeTrackerContent contents, I::SummaryComponentStack input,
I::SummaryComponentStack output
) {
callable.propagatesFlow(I::push(I::content(contents), input), output, true) and
not isNonLocal(input.head()) and
not isNonLocal(output.head())
}
pragma[nomagic]
private predicate hasStoreSummary(
I::SummarizedCallable callable, I::TypeTrackerContent contents, I::SummaryComponentStack input,
I::SummaryComponentStack output
) {
not isNonLocal(input.head()) and
not isNonLocal(output.head()) and
(
callable.propagatesFlow(input, I::push(I::content(contents), output), true)
or
// Allow the input to start with an arbitrary WithoutContent[X].
// Since type-tracking only tracks one content deep, and we're about to store into another content,
// we're already preventing the input from being in a content.
callable
.propagatesFlow(I::push(I::withoutContent(_), input),
I::push(I::content(contents), output), true)
)
}
pragma[nomagic]
private predicate hasLoadStoreSummary(
I::SummarizedCallable callable, I::TypeTrackerContent loadContents,
I::TypeTrackerContent storeContents, I::SummaryComponentStack input,
I::SummaryComponentStack output
) {
callable
.propagatesFlow(I::push(I::content(loadContents), input),
I::push(I::content(storeContents), output), true) and
not isNonLocal(input.head()) and
not isNonLocal(output.head())
}
pragma[nomagic]
private predicate hasWithoutContentSummary(
I::SummarizedCallable callable, I::TypeTrackerContentFilter filter,
I::SummaryComponentStack input, I::SummaryComponentStack output
) {
exists(I::TypeTrackerContent content |
callable.propagatesFlow(I::push(I::withoutContent(content), input), output, true) and
filter = I::getFilterFromWithoutContentStep(content) and
not isNonLocal(input.head()) and
not isNonLocal(output.head()) and
input != output
)
}
pragma[nomagic]
private predicate hasWithContentSummary(
I::SummarizedCallable callable, I::TypeTrackerContentFilter filter,
I::SummaryComponentStack input, I::SummaryComponentStack output
) {
exists(I::TypeTrackerContent content |
callable.propagatesFlow(I::push(I::withContent(content), input), output, true) and
filter = I::getFilterFromWithContentStep(content) and
not isNonLocal(input.head()) and
not isNonLocal(output.head()) and
input != output
)
}
private predicate componentLevelStep(I::SummaryComponent component) {
exists(I::TypeTrackerContent content |
component = I::withoutContent(content) and
not exists(I::getFilterFromWithoutContentStep(content))
)
}
/**
* Gets a data flow `I::Node` corresponding an argument or return value of `call`,
* as specified by `component`. `isOutput` indicates whether the node represents
* an output node or an input node.
*/
bindingset[call, component]
private I::Node evaluateSummaryComponentLocal(
I::Node call, I::SummaryComponent component, boolean isOutput
) {
result = I::argumentOf(call, component, isOutput)
or
component = I::return() and
result = call and
isOutput = true
}
/**
* Holds if `callable` is relevant for type-tracking and we therefore want `stack` to
* be evaluated locally at its call sites.
*/
pragma[nomagic]
private predicate dependsOnSummaryComponentStack(
I::SummarizedCallable callable, I::SummaryComponentStack stack
) {
exists(I::callTo(callable)) and
(
callable.propagatesFlow(stack, _, true)
or
callable.propagatesFlow(_, stack, true)
or
// include store summaries as they may skip an initial step at the input
hasStoreSummary(callable, _, stack, _)
)
or
dependsOnSummaryComponentStackCons(callable, _, stack)
}
pragma[nomagic]
private predicate dependsOnSummaryComponentStackCons(
I::SummarizedCallable callable, I::SummaryComponent head, I::SummaryComponentStack tail
) {
dependsOnSummaryComponentStack(callable, I::push(head, tail))
}
pragma[nomagic]
private predicate dependsOnSummaryComponentStackConsLocal(
I::SummarizedCallable callable, I::SummaryComponent head, I::SummaryComponentStack tail
) {
dependsOnSummaryComponentStackCons(callable, head, tail) and
not isNonLocal(head)
}
pragma[nomagic]
private predicate dependsOnSummaryComponentStackLeaf(
I::SummarizedCallable callable, I::SummaryComponent leaf
) {
dependsOnSummaryComponentStack(callable, I::singleton(leaf))
}
/**
* Gets a data flow I::Node corresponding to the local input or output of `call`
* identified by `stack`, if possible.
*/
pragma[nomagic]
private I::Node evaluateSummaryComponentStackLocal(
I::SummarizedCallable callable, I::Node call, I::SummaryComponentStack stack, boolean isOutput
) {
exists(I::SummaryComponent component |
dependsOnSummaryComponentStackLeaf(callable, component) and
stack = I::singleton(component) and
call = I::callTo(callable) and
result = evaluateSummaryComponentLocal(call, component, isOutput)
)
or
exists(
I::Node prev, I::SummaryComponent head, I::SummaryComponentStack tail, boolean isOutput0
|
prev = evaluateSummaryComponentStackLocal(callable, call, tail, isOutput0) and
dependsOnSummaryComponentStackConsLocal(callable, pragma[only_bind_into](head),
pragma[only_bind_out](tail)) and
stack = I::push(pragma[only_bind_out](head), pragma[only_bind_out](tail))
|
// `Parameter[X]` is only allowed in the output of flow summaries (hence `isOutput = true`),
// however the target of the parameter (e.g. `Argument[Y].Parameter[X]`) should be fetched
// not from a post-update argument node (hence `isOutput0 = false`)
result = I::parameterOf(prev, head) and
isOutput0 = false and
isOutput = true
or
// `ReturnValue` is only allowed in the input of flow summaries (hence `isOutput = false`),
// and the target of the return value (e.g. `Argument[X].ReturnValue`) should be fetched not
// from a post-update argument node (hence `isOutput0 = false`)
result = I::returnOf(prev, head) and
isOutput0 = false and
isOutput = false
or
componentLevelStep(head) and
result = prev and
isOutput = isOutput0
)
}
// Implement Output
predicate levelStepNoCall(I::Node nodeFrom, I::Node nodeTo) {
exists(
I::SummarizedCallable callable, I::Node call, I::SummaryComponentStack input,
I::SummaryComponentStack output
|
callable.propagatesFlow(input, output, true) and
call = I::callTo(callable) and
nodeFrom = evaluateSummaryComponentStackLocal(callable, call, input, false) and
nodeTo = evaluateSummaryComponentStackLocal(callable, call, output, true)
)
}
predicate basicLoadStep(I::Node nodeFrom, I::Node nodeTo, I::TypeTrackerContent content) {
exists(
I::SummarizedCallable callable, I::Node call, I::SummaryComponentStack input,
I::SummaryComponentStack output
|
hasLoadSummary(callable, content, pragma[only_bind_into](input),
pragma[only_bind_into](output)) and
call = I::callTo(callable) and
nodeFrom = evaluateSummaryComponentStackLocal(callable, call, input, false) and
nodeTo = evaluateSummaryComponentStackLocal(callable, call, output, true)
)
}
predicate basicStoreStep(I::Node nodeFrom, I::Node nodeTo, I::TypeTrackerContent content) {
exists(
I::SummarizedCallable callable, I::Node call, I::SummaryComponentStack input,
I::SummaryComponentStack output
|
hasStoreSummary(callable, content, pragma[only_bind_into](input),
pragma[only_bind_into](output)) and
call = I::callTo(callable) and
nodeFrom = evaluateSummaryComponentStackLocal(callable, call, input, false) and
nodeTo = evaluateSummaryComponentStackLocal(callable, call, output, true)
)
}
predicate basicLoadStoreStep(
I::Node nodeFrom, I::Node nodeTo, I::TypeTrackerContent loadContent,
I::TypeTrackerContent storeContent
) {
exists(
I::SummarizedCallable callable, I::Node call, I::SummaryComponentStack input,
I::SummaryComponentStack output
|
hasLoadStoreSummary(callable, loadContent, storeContent, pragma[only_bind_into](input),
pragma[only_bind_into](output)) and
call = I::callTo(callable) and
nodeFrom = evaluateSummaryComponentStackLocal(callable, call, input, false) and
nodeTo = evaluateSummaryComponentStackLocal(callable, call, output, true)
)
}
predicate basicWithoutContentStep(
I::Node nodeFrom, I::Node nodeTo, I::TypeTrackerContentFilter filter
) {
exists(
I::SummarizedCallable callable, I::Node call, I::SummaryComponentStack input,
I::SummaryComponentStack output
|
hasWithoutContentSummary(callable, filter, pragma[only_bind_into](input),
pragma[only_bind_into](output)) and
call = I::callTo(callable) and
nodeFrom = evaluateSummaryComponentStackLocal(callable, call, input, false) and
nodeTo = evaluateSummaryComponentStackLocal(callable, call, output, true)
)
}
predicate basicWithContentStep(
I::Node nodeFrom, I::Node nodeTo, I::TypeTrackerContentFilter filter
) {
exists(
I::SummarizedCallable callable, I::Node call, I::SummaryComponentStack input,
I::SummaryComponentStack output
|
hasWithContentSummary(callable, filter, pragma[only_bind_into](input),
pragma[only_bind_into](output)) and
call = I::callTo(callable) and
nodeFrom = evaluateSummaryComponentStackLocal(callable, call, input, false) and
nodeTo = evaluateSummaryComponentStackLocal(callable, call, output, true)
)
}
}

View File

@@ -216,8 +216,10 @@ predicate awaitStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
*/
predicate asyncWithStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
exists(With with, ControlFlowNode contextManager, ControlFlowNode var |
var = any(WithDefinition wd).getDefiningNode()
|
nodeFrom.(DataFlow::CfgNode).getNode() = contextManager and
nodeTo.(DataFlow::EssaNode).getVar().getDefinition().(WithDefinition).getDefiningNode() = var and
nodeTo.(DataFlow::CfgNode).getNode() = var and
// see `with_flow` in `python/ql/src/semmle/python/dataflow/Implementation.qll`
with.getContextExpr() = contextManager.getNode() and
with.getOptionalVars() = var.getNode() and

View File

@@ -8,22 +8,22 @@ private module Cached {
* A description of a step on an inter-procedural data flow path.
*/
cached
newtype TStepSummary =
deprecated newtype TStepSummary =
LevelStep() or
CallStep() or
ReturnStep() or
StoreStep(TypeTrackerContent content) { basicStoreStep(_, _, content) } or
LoadStep(TypeTrackerContent content) { basicLoadStep(_, _, content) } or
LoadStoreStep(TypeTrackerContent load, TypeTrackerContent store) {
deprecated StoreStep(TypeTrackerContent content) { basicStoreStep(_, _, content) } or
deprecated LoadStep(TypeTrackerContent content) { basicLoadStep(_, _, content) } or
deprecated LoadStoreStep(TypeTrackerContent load, TypeTrackerContent store) {
basicLoadStoreStep(_, _, load, store)
} or
WithContent(ContentFilter filter) { basicWithContentStep(_, _, filter) } or
WithoutContent(ContentFilter filter) { basicWithoutContentStep(_, _, filter) } or
deprecated WithContent(ContentFilter filter) { basicWithContentStep(_, _, filter) } or
deprecated WithoutContent(ContentFilter filter) { basicWithoutContentStep(_, _, filter) } or
JumpStep()
cached
newtype TTypeTracker =
MkTypeTracker(Boolean hasCall, OptionalTypeTrackerContent content) {
deprecated newtype TTypeTracker =
deprecated MkTypeTracker(Boolean hasCall, OptionalTypeTrackerContent content) {
content = noContent()
or
// Restrict `content` to those that might eventually match a load.
@@ -40,8 +40,8 @@ private module Cached {
}
cached
newtype TTypeBackTracker =
MkTypeBackTracker(Boolean hasReturn, OptionalTypeTrackerContent content) {
deprecated newtype TTypeBackTracker =
deprecated MkTypeBackTracker(Boolean hasReturn, OptionalTypeTrackerContent content) {
content = noContent()
or
// As in MkTypeTracker, restrict `content` to those that might eventually match a store.
@@ -57,11 +57,13 @@ private module Cached {
/** Gets a type tracker with no content and the call bit set to the given value. */
cached
TypeTracker noContentTypeTracker(boolean hasCall) { result = MkTypeTracker(hasCall, noContent()) }
deprecated TypeTracker noContentTypeTracker(boolean hasCall) {
result = MkTypeTracker(hasCall, noContent())
}
/** Gets the summary resulting from appending `step` to type-tracking summary `tt`. */
cached
TypeTracker append(TypeTracker tt, StepSummary step) {
deprecated TypeTracker append(TypeTracker tt, StepSummary step) {
exists(Boolean hasCall, OptionalTypeTrackerContent currentContents |
tt = MkTypeTracker(hasCall, currentContents)
|
@@ -108,13 +110,13 @@ private module Cached {
}
pragma[nomagic]
private TypeBackTracker noContentTypeBackTracker(boolean hasReturn) {
deprecated private TypeBackTracker noContentTypeBackTracker(boolean hasReturn) {
result = MkTypeBackTracker(hasReturn, noContent())
}
/** Gets the summary resulting from prepending `step` to this type-tracking summary. */
cached
TypeBackTracker prepend(TypeBackTracker tbt, StepSummary step) {
deprecated TypeBackTracker prepend(TypeBackTracker tbt, StepSummary step) {
exists(Boolean hasReturn, OptionalTypeTrackerContent content |
tbt = MkTypeBackTracker(hasReturn, content)
|
@@ -167,7 +169,9 @@ private module Cached {
* Steps contained in this predicate should _not_ depend on the call graph.
*/
cached
predicate stepNoCall(TypeTrackingNode nodeFrom, TypeTrackingNode nodeTo, StepSummary summary) {
deprecated predicate stepNoCall(
TypeTrackingNode nodeFrom, TypeTrackingNode nodeTo, StepSummary summary
) {
exists(Node mid | nodeFrom.flowsTo(mid) and smallstepNoCall(mid, nodeTo, summary))
}
@@ -176,12 +180,14 @@ private module Cached {
* inter-procedural step from `nodeFrom` to `nodeTo`.
*/
cached
predicate stepCall(TypeTrackingNode nodeFrom, TypeTrackingNode nodeTo, StepSummary summary) {
deprecated predicate stepCall(
TypeTrackingNode nodeFrom, TypeTrackingNode nodeTo, StepSummary summary
) {
exists(Node mid | nodeFrom.flowsTo(mid) and smallstepCall(mid, nodeTo, summary))
}
cached
predicate smallstepNoCall(Node nodeFrom, TypeTrackingNode nodeTo, StepSummary summary) {
deprecated predicate smallstepNoCall(Node nodeFrom, TypeTrackingNode nodeTo, StepSummary summary) {
jumpStep(nodeFrom, nodeTo) and
summary = JumpStep()
or
@@ -210,7 +216,7 @@ private module Cached {
}
cached
predicate smallstepCall(Node nodeFrom, TypeTrackingNode nodeTo, StepSummary summary) {
deprecated predicate smallstepCall(Node nodeFrom, TypeTrackingNode nodeTo, StepSummary summary) {
callStep(nodeFrom, nodeTo) and summary = CallStep()
or
returnStep(nodeFrom, nodeTo) and
@@ -223,25 +229,27 @@ private module Cached {
private import Cached
private predicate step(TypeTrackingNode nodeFrom, TypeTrackingNode nodeTo, StepSummary summary) {
deprecated private predicate step(
TypeTrackingNode nodeFrom, TypeTrackingNode nodeTo, StepSummary summary
) {
stepNoCall(nodeFrom, nodeTo, summary)
or
stepCall(nodeFrom, nodeTo, summary)
}
pragma[nomagic]
private predicate stepProj(TypeTrackingNode nodeFrom, StepSummary summary) {
deprecated private predicate stepProj(TypeTrackingNode nodeFrom, StepSummary summary) {
step(nodeFrom, _, summary)
}
private predicate smallstep(Node nodeFrom, TypeTrackingNode nodeTo, StepSummary summary) {
deprecated private predicate smallstep(Node nodeFrom, TypeTrackingNode nodeTo, StepSummary summary) {
smallstepNoCall(nodeFrom, nodeTo, summary)
or
smallstepCall(nodeFrom, nodeTo, summary)
}
pragma[nomagic]
private predicate smallstepProj(Node nodeFrom, StepSummary summary) {
deprecated private predicate smallstepProj(Node nodeFrom, StepSummary summary) {
smallstep(nodeFrom, _, summary)
}
@@ -270,7 +278,7 @@ private predicate smallstepProj(Node nodeFrom, StepSummary summary) {
* function. This means we will track the fact that `x.attr` can have the type of `y` into the
* assignment to `z` inside `bar`, even though this attribute write happens _after_ `bar` is called.
*/
private predicate flowsToStoreStep(
deprecated private predicate flowsToStoreStep(
Node nodeFrom, TypeTrackingNode nodeTo, TypeTrackerContent content
) {
exists(Node obj | nodeTo.flowsTo(obj) and basicStoreStep(nodeFrom, obj, content))
@@ -279,7 +287,7 @@ private predicate flowsToStoreStep(
/**
* Holds if `loadContent` is loaded from `nodeFrom` and written to `storeContent` of `nodeTo`.
*/
private predicate flowsToLoadStoreStep(
deprecated private predicate flowsToLoadStoreStep(
Node nodeFrom, TypeTrackingNode nodeTo, TypeTrackerContent loadContent,
TypeTrackerContent storeContent
) {
@@ -293,7 +301,7 @@ private predicate flowsToLoadStoreStep(
*
* A description of a step on an inter-procedural data flow path.
*/
class StepSummary extends TStepSummary {
deprecated class StepSummary extends TStepSummary {
/** Gets a textual representation of this step summary. */
string toString() {
this instanceof LevelStep and result = "level"
@@ -316,7 +324,7 @@ class StepSummary extends TStepSummary {
}
/** Provides predicates for updating step summaries (`StepSummary`s). */
module StepSummary {
deprecated module StepSummary {
predicate append = Cached::append/2;
/**
@@ -378,8 +386,6 @@ module StepSummary {
smallstepCall(nodeFrom, nodeTo, summary)
}
deprecated predicate localSourceStoreStep = flowsToStoreStep/3;
/** Gets the step summary for a level step. */
StepSummary levelStep() { result = LevelStep() }
@@ -411,6 +417,8 @@ module StepSummary {
}
/**
* DEPRECATED: Use `semmle.python.dataflow.new.TypeTracking` instead.
*
* A summary of the steps needed to track a value to a given dataflow node.
*
* This can be used to track objects that implement a certain API in order to
@@ -437,7 +445,7 @@ module StepSummary {
* `t = t2.step(myType(t2), result)`. If you additionally want to track individual
* intra-procedural steps, use `t = t2.smallstep(myCallback(t2), result)`.
*/
class TypeTracker extends TTypeTracker {
deprecated class TypeTracker extends TTypeTracker {
Boolean hasCall;
OptionalTypeTrackerContent content;
@@ -565,7 +573,7 @@ class TypeTracker extends TTypeTracker {
}
/** Provides predicates for implementing custom `TypeTracker`s. */
module TypeTracker {
deprecated module TypeTracker {
/**
* Gets a valid end point of type tracking.
*/
@@ -580,15 +588,17 @@ module TypeTracker {
}
pragma[nomagic]
private predicate backStepProj(TypeTrackingNode nodeTo, StepSummary summary) {
deprecated private predicate backStepProj(TypeTrackingNode nodeTo, StepSummary summary) {
step(_, nodeTo, summary)
}
private predicate backSmallstepProj(TypeTrackingNode nodeTo, StepSummary summary) {
deprecated private predicate backSmallstepProj(TypeTrackingNode nodeTo, StepSummary summary) {
smallstep(_, nodeTo, summary)
}
/**
* DEPRECATED: Use `semmle.python.dataflow.new.TypeTracking` instead.
*
* A summary of the steps needed to back-track a use of a value to a given dataflow node.
*
* This can for example be used to track callbacks that are passed to a certain API,
@@ -618,7 +628,7 @@ private predicate backSmallstepProj(TypeTrackingNode nodeTo, StepSummary summary
* `t2 = t.step(result, myCallback(t2))`. If you additionally want to track individual
* intra-procedural steps, use `t2 = t.smallstep(result, myCallback(t2))`.
*/
class TypeBackTracker extends TTypeBackTracker {
deprecated class TypeBackTracker extends TTypeBackTracker {
Boolean hasReturn;
OptionalTypeTrackerContent content;
@@ -747,7 +757,7 @@ class TypeBackTracker extends TTypeBackTracker {
}
/** Provides predicates for implementing custom `TypeBackTracker`s. */
module TypeBackTracker {
deprecated module TypeBackTracker {
/**
* Gets a valid end point of type back-tracking.
*/
@@ -768,14 +778,14 @@ module TypeBackTracker {
* `stepCall` relation (`stepNoCall` not being recursive, can be join-ordered in the
* same way as in `stepInlineLate`).
*/
module CallGraphConstruction {
deprecated module CallGraphConstruction {
/** The input to call graph construction. */
signature module InputSig {
/** A state to track during type tracking. */
class State;
/** Holds if type tracking should start at `start` in state `state`. */
predicate start(Node start, State state);
deprecated predicate start(Node start, State state);
/**
* Holds if type tracking should use the step from `nodeFrom` to `nodeTo`,
@@ -784,7 +794,7 @@ module CallGraphConstruction {
* Implementing this predicate using `StepSummary::[small]stepNoCall` yields
* standard type tracking.
*/
predicate stepNoCall(Node nodeFrom, Node nodeTo, StepSummary summary);
deprecated predicate stepNoCall(Node nodeFrom, Node nodeTo, StepSummary summary);
/**
* Holds if type tracking should use the step from `nodeFrom` to `nodeTo`,
@@ -793,7 +803,7 @@ module CallGraphConstruction {
* Implementing this predicate using `StepSummary::[small]stepCall` yields
* standard type tracking.
*/
predicate stepCall(Node nodeFrom, Node nodeTo, StepSummary summary);
deprecated predicate stepCall(Node nodeFrom, Node nodeTo, StepSummary summary);
/** A projection of an element from the state space. */
class StateProj;
@@ -802,25 +812,25 @@ module CallGraphConstruction {
StateProj stateProj(State state);
/** Holds if type tracking should stop at `n` when we are tracking projected state `stateProj`. */
predicate filter(Node n, StateProj stateProj);
deprecated predicate filter(Node n, StateProj stateProj);
}
/** Provides the `track` predicate for use in call graph construction. */
module Make<InputSig Input> {
pragma[nomagic]
private predicate stepNoCallProj(Node nodeFrom, StepSummary summary) {
deprecated private predicate stepNoCallProj(Node nodeFrom, StepSummary summary) {
Input::stepNoCall(nodeFrom, _, summary)
}
pragma[nomagic]
private predicate stepCallProj(Node nodeFrom, StepSummary summary) {
deprecated private predicate stepCallProj(Node nodeFrom, StepSummary summary) {
Input::stepCall(nodeFrom, _, summary)
}
bindingset[nodeFrom, t]
pragma[inline_late]
pragma[noopt]
private TypeTracker stepNoCallInlineLate(
deprecated private TypeTracker stepNoCallInlineLate(
TypeTracker t, TypeTrackingNode nodeFrom, TypeTrackingNode nodeTo
) {
exists(StepSummary summary |
@@ -837,7 +847,7 @@ module CallGraphConstruction {
}
pragma[nomagic]
private Node track(Input::State state, TypeTracker t) {
deprecated private Node track(Input::State state, TypeTracker t) {
t.start() and Input::start(result, state)
or
exists(Input::StateProj stateProj |
@@ -855,12 +865,12 @@ module CallGraphConstruction {
bindingset[t, summary]
pragma[inline_late]
private TypeTracker appendInlineLate(TypeTracker t, StepSummary summary) {
deprecated private TypeTracker appendInlineLate(TypeTracker t, StepSummary summary) {
result = t.append(summary)
}
pragma[nomagic]
private Node trackCall(Input::State state, TypeTracker t, StepSummary summary) {
deprecated private Node trackCall(Input::State state, TypeTracker t, StepSummary summary) {
exists(TypeTracker t2 |
// non-linear recursion
result = track(state, t2) and
@@ -871,7 +881,7 @@ module CallGraphConstruction {
/** Gets a node that can be reached from _some_ start node in state `state`. */
pragma[nomagic]
Node track(Input::State state) { result = track(state, TypeTracker::end()) }
deprecated Node track(Input::State state) { result = track(state, TypeTracker::end()) }
}
/** A simple version of `CallGraphConstruction` that uses standard type tracking. */
@@ -882,15 +892,15 @@ module CallGraphConstruction {
class State;
/** Holds if type tracking should start at `start` in state `state`. */
predicate start(Node start, State state);
deprecated predicate start(Node start, State state);
/** Holds if type tracking should stop at `n`. */
predicate filter(Node n);
deprecated predicate filter(Node n);
}
/** Provides the `track` predicate for use in call graph construction. */
module Make<InputSig Input> {
private module I implements CallGraphConstruction::InputSig {
deprecated private module I implements CallGraphConstruction::InputSig {
private import codeql.util.Unit
class State = Input::State;
@@ -915,7 +925,7 @@ module CallGraphConstruction {
}
}
import CallGraphConstruction::Make<I>
deprecated import CallGraphConstruction::Make<I>
}
}
}

View File

@@ -4,75 +4,54 @@
private import python
private import semmle.python.dataflow.new.internal.DataFlowPublic as DataFlowPublic
private import semmle.python.dataflow.new.internal.DataFlowPrivate as DataFlowPrivate
import semmle.python.internal.CachedStages
private import TypeTrackingImpl as TypeTrackingImpl
class Node = DataFlowPublic::Node;
deprecated class Node = DataFlowPublic::Node;
class TypeTrackingNode = DataFlowPublic::TypeTrackingNode;
deprecated class TypeTrackingNode = DataFlowPublic::TypeTrackingNode;
/** A content name for use by type trackers, or the empty string. */
class OptionalTypeTrackerContent extends string {
deprecated class OptionalTypeTrackerContent extends string {
OptionalTypeTrackerContent() {
this = ""
or
this = getPossibleContentName()
this instanceof TypeTrackingImpl::TypeTrackingInput::Content
}
}
/** A content name for use by type trackers. */
class TypeTrackerContent extends OptionalTypeTrackerContent {
deprecated class TypeTrackerContent extends OptionalTypeTrackerContent {
TypeTrackerContent() { this != "" }
}
/** Gets the content string representing no value. */
OptionalTypeTrackerContent noContent() { result = "" }
deprecated OptionalTypeTrackerContent noContent() { result = "" }
/**
* A label to use for `WithContent` and `WithoutContent` steps, restricting
* which `ContentSet` may pass through. Not currently used in Python.
*/
class ContentFilter extends Unit {
deprecated class ContentFilter extends Unit {
TypeTrackerContent getAMatchingContent() { none() }
}
pragma[inline]
predicate compatibleContents(TypeTrackerContent storeContent, TypeTrackerContent loadContent) {
deprecated predicate compatibleContents(
TypeTrackerContent storeContent, TypeTrackerContent loadContent
) {
storeContent = loadContent
}
predicate simpleLocalFlowStep = DataFlowPrivate::simpleLocalFlowStepForTypetracking/2;
deprecated predicate simpleLocalFlowStep =
TypeTrackingImpl::TypeTrackingInput::simpleLocalSmallStep/2;
predicate jumpStep(Node nodeFrom, Node nodeTo) {
DataFlowPrivate::jumpStepSharedWithTypeTracker(nodeFrom, nodeTo)
or
capturedJumpStep(nodeFrom, nodeTo)
}
predicate capturedJumpStep(Node nodeFrom, Node nodeTo) {
exists(SsaSourceVariable var, DefinitionNode def | var.hasDefiningNode(def) |
nodeTo.asVar().(ScopeEntryDefinition).getSourceVariable() = var and
nodeFrom.asCfgNode() = def.getValue() and
var.getScope().getScope*() = nodeFrom.getScope()
)
}
deprecated predicate jumpStep = TypeTrackingImpl::TypeTrackingInput::jumpStep/2;
/** Holds if there is a level step from `nodeFrom` to `nodeTo`, which may depend on the call graph. */
predicate levelStepCall(Node nodeFrom, Node nodeTo) { none() }
deprecated predicate levelStepCall(Node nodeFrom, Node nodeTo) { none() }
/** Holds if there is a level step from `nodeFrom` to `nodeTo`, which does not depend on the call graph. */
predicate levelStepNoCall(Node nodeFrom, Node nodeTo) {
TypeTrackerSummaryFlow::levelStepNoCall(nodeFrom, nodeTo)
}
/**
* Gets the name of a possible piece of content. For Python, this is currently only attribute names,
* using the name of the attribute for the corresponding content.
*/
string getPossibleContentName() {
Stages::TypeTracking::ref() and // the TypeTracking::append() etc. predicates that we want to cache depend on this predicate, so we can place the `ref()` call here to get around identical files.
result = any(DataFlowPublic::AttrRef a).getAttributeName()
}
deprecated predicate levelStepNoCall = TypeTrackingImpl::TypeTrackingInput::levelStepNoCall/2;
/**
* Holds if `nodeFrom` steps to `nodeTo` by being passed as a parameter in a call.
@@ -81,176 +60,43 @@ string getPossibleContentName() {
* recursion (or, at best, terrible performance), since identifying calls to library
* methods is done using API graphs (which uses type tracking).
*/
predicate callStep(DataFlowPublic::ArgumentNode nodeFrom, DataFlowPublic::ParameterNode nodeTo) {
exists(
DataFlowPrivate::DataFlowCall call, DataFlowPrivate::DataFlowCallable callable,
DataFlowPrivate::ArgumentPosition apos, DataFlowPrivate::ParameterPosition ppos
|
nodeFrom = call.getArgument(apos) and
nodeTo = callable.getParameter(ppos) and
DataFlowPrivate::parameterMatch(ppos, apos) and
callable = call.getCallable()
)
}
deprecated predicate callStep = TypeTrackingImpl::TypeTrackingInput::callStep/2;
/** Holds if `nodeFrom` steps to `nodeTo` by being returned from a call. */
predicate returnStep(DataFlowPrivate::ReturnNode nodeFrom, Node nodeTo) {
exists(DataFlowPrivate::ExtractedDataFlowCall call |
nodeFrom.getEnclosingCallable() = call.getCallable() and
nodeTo.(DataFlowPublic::CfgNode).getNode() = call.getNode()
)
}
deprecated predicate returnStep = TypeTrackingImpl::TypeTrackingInput::returnStep/2;
/**
* Holds if `nodeFrom` is being written to the `content` content of the object in `nodeTo`.
*/
predicate basicStoreStep(Node nodeFrom, Node nodeTo, string content) {
exists(DataFlowPublic::AttrWrite a |
a.mayHaveAttributeName(content) and
nodeFrom = a.getValue() and
nodeTo = a.getObject()
)
or
exists(DataFlowPublic::ContentSet contents |
contents.(DataFlowPublic::AttributeContent).getAttribute() = content
|
TypeTrackerSummaryFlow::basicStoreStep(nodeFrom, nodeTo, contents)
)
}
deprecated predicate basicStoreStep = TypeTrackingImpl::TypeTrackingInput::storeStep/3;
/**
* Holds if `nodeTo` is the result of accessing the `content` content of `nodeFrom`.
*/
predicate basicLoadStep(Node nodeFrom, Node nodeTo, string content) {
exists(DataFlowPublic::AttrRead a |
a.mayHaveAttributeName(content) and
nodeFrom = a.getObject() and
nodeTo = a
)
or
exists(DataFlowPublic::ContentSet contents |
contents.(DataFlowPublic::AttributeContent).getAttribute() = content
|
TypeTrackerSummaryFlow::basicLoadStep(nodeFrom, nodeTo, contents)
)
}
deprecated predicate basicLoadStep = TypeTrackingImpl::TypeTrackingInput::loadStep/3;
/**
* Holds if the `loadContent` of `nodeFrom` is stored in the `storeContent` of `nodeTo`.
*/
predicate basicLoadStoreStep(Node nodeFrom, Node nodeTo, string loadContent, string storeContent) {
exists(DataFlowPublic::ContentSet loadContents, DataFlowPublic::ContentSet storeContents |
loadContents.(DataFlowPublic::AttributeContent).getAttribute() = loadContent and
storeContents.(DataFlowPublic::AttributeContent).getAttribute() = storeContent
|
TypeTrackerSummaryFlow::basicLoadStoreStep(nodeFrom, nodeTo, loadContents, storeContents)
)
}
deprecated predicate basicLoadStoreStep = TypeTrackingImpl::TypeTrackingInput::loadStoreStep/4;
/**
* Holds if type-tracking should step from `nodeFrom` to `nodeTo` but block flow of contents matched by `filter` through here.
*/
predicate basicWithoutContentStep(Node nodeFrom, Node nodeTo, ContentFilter filter) { none() }
deprecated predicate basicWithoutContentStep(Node nodeFrom, Node nodeTo, ContentFilter filter) {
none()
}
/**
* Holds if type-tracking should step from `nodeFrom` to `nodeTo` if inside a content matched by `filter`.
*/
predicate basicWithContentStep(Node nodeFrom, Node nodeTo, ContentFilter filter) { none() }
deprecated predicate basicWithContentStep(Node nodeFrom, Node nodeTo, ContentFilter filter) {
none()
}
/**
* A utility class that is equivalent to `boolean` but does not require type joining.
*/
class Boolean extends boolean {
deprecated class Boolean extends boolean {
Boolean() { this = true or this = false }
}
private import SummaryTypeTracker as SummaryTypeTracker
private import semmle.python.dataflow.new.FlowSummary as FlowSummary
private import semmle.python.dataflow.new.internal.DataFlowDispatch as DataFlowDispatch
pragma[noinline]
private predicate argumentPositionMatch(
DataFlowPublic::CallCfgNode call, DataFlowPublic::Node arg,
DataFlowDispatch::ParameterPosition ppos
) {
exists(DataFlowDispatch::ArgumentPosition apos |
DataFlowDispatch::parameterMatch(ppos, apos) and
DataFlowDispatch::normalCallArg(call.getNode(), arg, apos)
)
}
private module SummaryTypeTrackerInput implements SummaryTypeTracker::Input {
// Dataflow nodes
class Node = DataFlowPublic::Node;
// Content
class TypeTrackerContent = DataFlowPublic::ContentSet;
class TypeTrackerContentFilter = ContentFilter;
TypeTrackerContentFilter getFilterFromWithoutContentStep(TypeTrackerContent content) { none() }
TypeTrackerContentFilter getFilterFromWithContentStep(TypeTrackerContent content) { none() }
// Callables
class SummarizedCallable = FlowSummary::SummarizedCallable;
// Summaries and their stacks
class SummaryComponent = FlowSummary::SummaryComponent;
class SummaryComponentStack = FlowSummary::SummaryComponentStack;
predicate singleton = FlowSummary::SummaryComponentStack::singleton/1;
predicate push = FlowSummary::SummaryComponentStack::push/2;
// Relating content to summaries
predicate content = FlowSummary::SummaryComponent::content/1;
SummaryComponent withoutContent(TypeTrackerContent contents) { none() }
SummaryComponent withContent(TypeTrackerContent contents) { none() }
predicate return = FlowSummary::SummaryComponent::return/0;
// Relating nodes to summaries
Node argumentOf(Node call, SummaryComponent arg, boolean isPostUpdate) {
exists(DataFlowDispatch::ParameterPosition pos |
arg = FlowSummary::SummaryComponent::argument(pos) and
argumentPositionMatch(call, result, pos) and
isPostUpdate = [false, true] // todo: implement when/if Python uses post-update nodes in type tracking
)
}
Node parameterOf(Node callable, SummaryComponent param) {
exists(
DataFlowDispatch::ArgumentPosition apos, DataFlowDispatch::ParameterPosition ppos, Parameter p
|
param = FlowSummary::SummaryComponent::parameter(apos) and
DataFlowDispatch::parameterMatch(ppos, apos) and
// pick the SsaNode rather than the CfgNode
result.asVar().getDefinition().(ParameterDefinition).getParameter() = p and
(
exists(int i | ppos.isPositional(i) |
p = callable.getALocalSource().asExpr().(CallableExpr).getInnerScope().getArg(i)
)
or
exists(string name | ppos.isKeyword(name) |
p = callable.getALocalSource().asExpr().(CallableExpr).getInnerScope().getArgByName(name)
)
)
)
}
Node returnOf(Node callable, SummaryComponent return) {
return = FlowSummary::SummaryComponent::return() and
// `result` should be the return value of a callable expression (lambda or function) referenced by `callable`
result.asCfgNode() =
callable.getALocalSource().asExpr().(CallableExpr).getInnerScope().getAReturnValueFlowNode()
}
// Relating callables to nodes
Node callTo(SummarizedCallable callable) { result = callable.getACallSimple() }
}
private module TypeTrackerSummaryFlow = SummaryTypeTracker::SummaryFlow<SummaryTypeTrackerInput>;

View File

@@ -0,0 +1,274 @@
import codeql.util.Unit
import codeql.typetracking.TypeTracking as Shared
import codeql.typetracking.internal.TypeTrackingImpl as SharedImpl
private import python
private import semmle.python.internal.CachedStages
private import semmle.python.dataflow.new.internal.DataFlowPublic as DataFlowPublic
private import semmle.python.dataflow.new.internal.DataFlowPrivate as DataFlowPrivate
private import codeql.typetracking.internal.SummaryTypeTracker as SummaryTypeTracker
private import semmle.python.dataflow.new.internal.FlowSummaryImpl as FlowSummaryImpl
private import semmle.python.dataflow.new.internal.DataFlowDispatch as DataFlowDispatch
private module SummaryTypeTrackerInput implements SummaryTypeTracker::Input {
// Dataflow nodes
class Node = DataFlowPublic::Node;
// Content
class Content = DataFlowPublic::ContentSet;
class ContentFilter = TypeTrackingInput::ContentFilter;
ContentFilter getFilterFromWithoutContentStep(Content content) { none() }
ContentFilter getFilterFromWithContentStep(Content content) { none() }
// Callables
class SummarizedCallable = FlowSummaryImpl::Private::SummarizedCallableImpl;
// Summaries and their stacks
class SummaryComponent = FlowSummaryImpl::Private::SummaryComponent;
class SummaryComponentStack = FlowSummaryImpl::Private::SummaryComponentStack;
predicate singleton = FlowSummaryImpl::Private::SummaryComponentStack::singleton/1;
predicate push = FlowSummaryImpl::Private::SummaryComponentStack::push/2;
// Relating content to summaries
predicate content = FlowSummaryImpl::Private::SummaryComponent::content/1;
SummaryComponent withoutContent(Content contents) { none() }
SummaryComponent withContent(Content contents) { none() }
predicate return = FlowSummaryImpl::Private::SummaryComponent::return/0;
pragma[noinline]
private predicate argumentPositionMatch(
DataFlowPublic::CallCfgNode call, DataFlowPublic::Node arg,
DataFlowDispatch::ParameterPosition ppos
) {
exists(DataFlowDispatch::ArgumentPosition apos |
DataFlowDispatch::parameterMatch(ppos, apos) and
DataFlowDispatch::normalCallArg(call.getNode(), arg, apos)
)
}
// Relating nodes to summaries
Node argumentOf(Node call, SummaryComponent arg, boolean isPostUpdate) {
exists(DataFlowDispatch::ParameterPosition pos |
arg = FlowSummaryImpl::Private::SummaryComponent::argument(pos) and
argumentPositionMatch(call, result, pos) and
isPostUpdate = [false, true] // todo: implement when/if Python uses post-update nodes in type tracking
)
}
Node parameterOf(Node callable, SummaryComponent param) {
exists(
DataFlowDispatch::ArgumentPosition apos, DataFlowDispatch::ParameterPosition ppos, Parameter p
|
param = FlowSummaryImpl::Private::SummaryComponent::parameter(apos) and
DataFlowDispatch::parameterMatch(ppos, apos) and
result.asCfgNode().getNode() = p and
(
exists(int i | ppos.isPositional(i) |
p = callable.getALocalSource().asExpr().(CallableExpr).getInnerScope().getArg(i)
)
or
exists(string name | ppos.isKeyword(name) |
p = callable.getALocalSource().asExpr().(CallableExpr).getInnerScope().getArgByName(name)
)
)
)
}
Node returnOf(Node callable, SummaryComponent return) {
return = FlowSummaryImpl::Private::SummaryComponent::return() and
// `result` should be the return value of a callable expression (lambda or function) referenced by `callable`
result.asCfgNode() =
callable.getALocalSource().asExpr().(CallableExpr).getInnerScope().getAReturnValueFlowNode()
}
// Relating callables to nodes
Node callTo(SummarizedCallable callable) {
result = callable.(DataFlowDispatch::LibraryCallable).getACallSimple()
}
}
private module TypeTrackerSummaryFlow = SummaryTypeTracker::SummaryFlow<SummaryTypeTrackerInput>;
/**
* Gets the name of a possible piece of content. For Python, this is currently only attribute names,
* using the name of the attribute for the corresponding content.
*/
private string getPossibleContentName() {
Stages::TypeTracking::ref() and // the TypeTracking::append() etc. predicates that we want to cache depend on this predicate, so we can place the `ref()` call here to get around identical files.
result = any(DataFlowPublic::AttrRef a).getAttributeName()
}
module TypeTrackingInput implements Shared::TypeTrackingInput {
class Node = DataFlowPublic::Node;
class LocalSourceNode = DataFlowPublic::LocalSourceNode;
class Content instanceof string {
Content() { this = getPossibleContentName() }
string toString() { result = this }
}
/**
* A label to use for `WithContent` and `WithoutContent` steps, restricting
* which `ContentSet` may pass through.
*/
class ContentFilter extends Unit {
Content getAMatchingContent() { none() }
}
/**
* Holds if a value stored with `storeContents` can be read back with `loadContents`.
*/
pragma[inline]
predicate compatibleContents(Content storeContents, Content loadContents) {
storeContents = loadContents
}
/** Holds if there is a simple local flow step from `nodeFrom` to `nodeTo` */
predicate simpleLocalSmallStep = DataFlowPrivate::simpleLocalFlowStepForTypetracking/2;
/** Holds if there is a level step from `nodeFrom` to `nodeTo`, which may depend on the call graph. */
predicate levelStepCall(Node nodeFrom, LocalSourceNode nodeTo) { none() }
/** Holds if there is a level step from `nodeFrom` to `nodeTo`, which does not depend on the call graph. */
predicate levelStepNoCall(Node nodeFrom, LocalSourceNode nodeTo) {
TypeTrackerSummaryFlow::levelStepNoCall(nodeFrom, nodeTo)
}
/**
* Holds if `nodeFrom` steps to `nodeTo` by being passed as a parameter in a call.
*
* Flow into summarized library methods is not included, as that will lead to negative
* recursion (or, at best, terrible performance), since identifying calls to library
* methods is done using API graphs (which uses type tracking).
*/
predicate callStep(Node nodeFrom, LocalSourceNode nodeTo) {
exists(
DataFlowPrivate::DataFlowCall call, DataFlowPrivate::DataFlowCallable callable,
DataFlowPrivate::ArgumentPosition apos, DataFlowPrivate::ParameterPosition ppos
|
nodeFrom = call.getArgument(apos) and
nodeTo = callable.getParameter(ppos) and
DataFlowPrivate::parameterMatch(ppos, apos) and
callable = call.getCallable()
)
}
/**
* Holds if `nodeFrom` steps to `nodeTo` by being returned from a call.
*
* Flow out of summarized library methods is not included, as that will lead to negative
* recursion (or, at best, terrible performance), since identifying calls to library
* methods is done using API graphs (which uses type tracking).
*/
predicate returnStep(Node nodeFrom, LocalSourceNode nodeTo) {
exists(DataFlowPrivate::ExtractedDataFlowCall call |
nodeFrom.(DataFlowPrivate::ReturnNode).getEnclosingCallable() = call.getCallable() and
nodeTo.(DataFlowPublic::CfgNode).getNode() = call.getNode()
)
}
/**
* Holds if `nodeFrom` is being written to the `content` content of the object in `nodeTo`.
*/
predicate storeStep(Node nodeFrom, Node nodeTo, Content content) {
exists(DataFlowPublic::AttrWrite a |
a.mayHaveAttributeName(content) and
nodeFrom = a.getValue() and
nodeTo = a.getObject()
)
or
exists(DataFlowPublic::ContentSet contents |
contents.(DataFlowPublic::AttributeContent).getAttribute() = content
|
TypeTrackerSummaryFlow::basicStoreStep(nodeFrom, nodeTo, contents)
)
}
/**
* Holds if `nodeTo` is the result of accessing the `content` content of `nodeFrom`.
*/
predicate loadStep(Node nodeFrom, LocalSourceNode nodeTo, Content content) {
exists(DataFlowPublic::AttrRead a |
a.mayHaveAttributeName(content) and
nodeFrom = a.getObject() and
nodeTo = a
)
or
exists(DataFlowPublic::ContentSet contents |
contents.(DataFlowPublic::AttributeContent).getAttribute() = content
|
TypeTrackerSummaryFlow::basicLoadStep(nodeFrom, nodeTo, contents)
)
}
/**
* Holds if the `loadContent` of `nodeFrom` is stored in the `storeContent` of `nodeTo`.
*/
predicate loadStoreStep(Node nodeFrom, Node nodeTo, Content loadContent, Content storeContent) {
exists(DataFlowPublic::ContentSet loadContents, DataFlowPublic::ContentSet storeContents |
loadContents.(DataFlowPublic::AttributeContent).getAttribute() = loadContent and
storeContents.(DataFlowPublic::AttributeContent).getAttribute() = storeContent
|
TypeTrackerSummaryFlow::basicLoadStoreStep(nodeFrom, nodeTo, loadContents, storeContents)
)
}
/**
* Holds if type-tracking should step from `nodeFrom` to `nodeTo` if inside a content matched by `filter`.
*/
predicate withContentStep(Node nodeFrom, LocalSourceNode nodeTo, ContentFilter filter) {
TypeTrackerSummaryFlow::basicWithContentStep(nodeFrom, nodeTo, filter)
}
/**
* Holds if type-tracking should step from `nodeFrom` to `nodeTo` but block flow of contents matched by `filter` through here.
*/
predicate withoutContentStep(Node nodeFrom, LocalSourceNode nodeTo, ContentFilter filter) {
TypeTrackerSummaryFlow::basicWithoutContentStep(nodeFrom, nodeTo, filter)
}
private predicate capturedJumpStep(Node nodeFrom, Node nodeTo) {
// Jump into a capturing scope.
//
// var = expr
// ...
// def f():
// ..var is used..
//
// nodeFrom is `expr`
// nodeTo is entry node for `f`
exists(ScopeEntryDefinition e, SsaSourceVariable var, DefinitionNode def |
e.getSourceVariable() = var and
var.hasDefiningNode(def)
|
nodeTo.(DataFlowPublic::ScopeEntryDefinitionNode).getDefinition() = e and
nodeFrom.asCfgNode() = def.getValue() and
var.getScope().getScope*() = nodeFrom.getScope()
)
}
/**
* Holds if data can flow from `node1` to `node2` in a way that discards call contexts.
*/
predicate jumpStep(Node nodeFrom, LocalSourceNode nodeTo) {
DataFlowPrivate::jumpStepSharedWithTypeTracker(nodeFrom, nodeTo)
or
capturedJumpStep(nodeFrom, nodeTo)
}
predicate hasFeatureBacktrackStoreTarget() { any() }
predicate nonStandardFlowsTo(LocalSourceNode localSource, Node dst) { localSource.flowsTo(dst) }
}
import SharedImpl::TypeTracking<TypeTrackingInput>

View File

@@ -0,0 +1,207 @@
/** Provides logic related to captured variables. */
private import python
private import DataFlowPublic
private import semmle.python.dataflow.new.internal.DataFlowPrivate
private import codeql.dataflow.VariableCapture as Shared
// Note: The Javascript implementation (on the branch https://github.com/github/codeql/pull/14412)
// had some tweaks related to performance. See these two commits:
// - JS: Capture flow: https://github.com/github/codeql/pull/14412/commits/7bcf8b858babfea0a3e36ce61145954c249e13ac
// - JS: Disallow consecutive captured contents: https://github.com/github/codeql/pull/14412/commits/46e4cdc6232604ea7f58138a336d5a222fad8567
// The first is the main implementation, the second is a performance motivated restriction.
// The restriction is to clear any `CapturedVariableContent` before writing a new one
// to avoid long access paths (see the link for a nice explanation).
private module CaptureInput implements Shared::InputSig<Location> {
private import python as PY
additional class ExprCfgNode extends ControlFlowNode {
ExprCfgNode() { isExpressionNode(this) }
}
class Callable extends Scope {
predicate isConstructor() { none() }
}
class BasicBlock extends PY::BasicBlock {
Callable getEnclosingCallable() { result = this.getScope() }
// Note `PY:BasicBlock` does not have a `getLocation`.
// (Instead it has a complicated location info logic.)
// Using the location of the first node is simple
// and we just need a way to identify the basic block
// during debugging, so this will be serviceable.
Location getLocation() { result = super.getNode(0).getLocation() }
}
BasicBlock getImmediateBasicBlockDominator(BasicBlock bb) { result = bb.getImmediateDominator() }
BasicBlock getABasicBlockSuccessor(BasicBlock bb) { result = bb.getASuccessor() }
class CapturedVariable extends LocalVariable {
Function f;
CapturedVariable() {
// note: captured variables originating on module scope is currently
// covered by global variable handling.
this.getScope() = f and
this.getAnAccess().getScope() != f
}
Callable getCallable() { result = f }
Location getLocation() { result = f.getLocation() }
/** Gets a scope that captures this variable. */
Scope getACapturingScope() {
result = this.getAnAccess().getScope().getScope*() and
result.getScope+() = f
}
}
class CapturedParameter extends CapturedVariable {
CapturedParameter() { this.isParameter() }
ControlFlowNode getCfgNode() { result.getNode().(Parameter) = this.getAnAccess() }
}
class Expr extends ExprCfgNode {
predicate hasCfgNode(BasicBlock bb, int i) { this = bb.getNode(i) }
}
class VariableWrite extends ControlFlowNode {
CapturedVariable v;
VariableWrite() { this = v.getAStore().getAFlowNode().(DefinitionNode).getValue() }
CapturedVariable getVariable() { result = v }
predicate hasCfgNode(BasicBlock bb, int i) { this = bb.getNode(i) }
}
class VariableRead extends Expr {
CapturedVariable v;
VariableRead() { this = v.getALoad().getAFlowNode() }
CapturedVariable getVariable() { result = v }
}
private predicate closureFlowStep(ExprCfgNode nodeFrom, ExprCfgNode nodeTo) {
// TODO: Other languages have an extra case here looking like
// simpleAstFlowStep(nodeFrom, nodeTo)
// we should investigate the potential benefit of adding that.
exists(SsaVariable def |
def.getAUse() = nodeTo and
def.getAnUltimateDefinition().getDefinition().(DefinitionNode).getValue() = nodeFrom
)
}
class ClosureExpr extends Expr {
ClosureExpr() {
this.getNode() instanceof CallableExpr
or
this.getNode() instanceof Comp
}
predicate hasBody(Callable body) {
body = this.getNode().(CallableExpr).getInnerScope()
or
body = this.getNode().(Comp).getFunction()
}
predicate hasAliasedAccess(Expr f) { closureFlowStep+(this, f) and not closureFlowStep(f, _) }
}
}
class CapturedVariable = CaptureInput::CapturedVariable;
class ClosureExpr = CaptureInput::ClosureExpr;
module Flow = Shared::Flow<Location, CaptureInput>;
private Flow::ClosureNode asClosureNode(Node n) {
result = n.(SynthCaptureNode).getSynthesizedCaptureNode()
or
result.(Flow::ExprNode).getExpr() = n.(CfgNode).getNode()
or
result.(Flow::VariableWriteSourceNode).getVariableWrite() = n.(CfgNode).getNode()
or
result.(Flow::ExprPostUpdateNode).getExpr() =
n.(PostUpdateNode).getPreUpdateNode().(CfgNode).getNode()
or
result.(Flow::ParameterNode).getParameter().getCfgNode() = n.(CfgNode).getNode()
or
result.(Flow::ThisParameterNode).getCallable() =
n.(SynthCapturedVariablesParameterNode).getCallable()
}
predicate storeStep(Node nodeFrom, CapturedVariableContent c, Node nodeTo) {
Flow::storeStep(asClosureNode(nodeFrom), c.getVariable(), asClosureNode(nodeTo))
}
predicate readStep(Node nodeFrom, CapturedVariableContent c, Node nodeTo) {
Flow::readStep(asClosureNode(nodeFrom), c.getVariable(), asClosureNode(nodeTo))
}
predicate valueStep(Node nodeFrom, Node nodeTo) {
Flow::localFlowStep(asClosureNode(nodeFrom), asClosureNode(nodeTo))
}
/**
* Provides predicates to understand the behavior of the variable capture
* library instantiation on Python code bases.
*
* The predicates in here are meant to be run by quick-eval on databases of
* interest. The `unmapped*`-predicates should ideally be empty.
*/
private module Debug {
predicate flowStoreStep(
Node nodeFrom, Flow::ClosureNode closureNodeFrom, CapturedVariable v,
Flow::ClosureNode closureNodeTo, Node nodeTo
) {
closureNodeFrom = asClosureNode(nodeFrom) and
closureNodeTo = asClosureNode(nodeTo) and
Flow::storeStep(closureNodeFrom, v, closureNodeTo)
}
predicate unmappedFlowStoreStep(
Flow::ClosureNode closureNodeFrom, CapturedVariable v, Flow::ClosureNode closureNodeTo
) {
Flow::storeStep(closureNodeFrom, v, closureNodeTo) and
not flowStoreStep(_, closureNodeFrom, v, closureNodeTo, _)
}
predicate flowReadStep(
Node nodeFrom, Flow::ClosureNode closureNodeFrom, CapturedVariable v,
Flow::ClosureNode closureNodeTo, Node nodeTo
) {
closureNodeFrom = asClosureNode(nodeFrom) and
closureNodeTo = asClosureNode(nodeTo) and
Flow::readStep(closureNodeFrom, v, closureNodeTo)
}
predicate unmappedFlowReadStep(
Flow::ClosureNode closureNodeFrom, CapturedVariable v, Flow::ClosureNode closureNodeTo
) {
Flow::readStep(closureNodeFrom, v, closureNodeTo) and
not flowReadStep(_, closureNodeFrom, v, closureNodeTo, _)
}
predicate flowValueStep(
Node nodeFrom, Flow::ClosureNode closureNodeFrom, Flow::ClosureNode closureNodeTo, Node nodeTo
) {
closureNodeFrom = asClosureNode(nodeFrom) and
closureNodeTo = asClosureNode(nodeTo) and
Flow::localFlowStep(closureNodeFrom, closureNodeTo)
}
predicate unmappedFlowValueStep(Flow::ClosureNode closureNodeFrom, Flow::ClosureNode closureNodeTo) {
Flow::localFlowStep(closureNodeFrom, closureNodeTo) and
not flowValueStep(_, closureNodeFrom, closureNodeTo, _)
}
predicate unmappedFlowClosureNode(Flow::ClosureNode closureNode) {
not closureNode = asClosureNode(_)
}
}

View File

@@ -1,4 +1,6 @@
/**
* DEPRECATED: Use `Global` and `GlobalWithState` instead.
*
* Provides an implementation of global (interprocedural) taint tracking.
* This file re-exports the local (intraprocedural) taint-tracking analysis
* from `TaintTrackingParameter::Public` and adds a global analysis, mainly
@@ -12,6 +14,8 @@ import TaintTrackingParameter::Public
private import TaintTrackingParameter::Private
/**
* DEPRECATED: Use `Global` and `GlobalWithState` instead.
*
* A configuration of interprocedural taint tracking analysis. This defines
* sources, sinks, and any other configurable aspect of the analysis. Each
* use of the taint tracking library must define its own unique extension of
@@ -51,7 +55,7 @@ private import TaintTrackingParameter::Private
* Instead, the dependency should go to a `TaintTracking2::Configuration` or a
* `DataFlow2::Configuration`, `DataFlow3::Configuration`, etc.
*/
abstract class Configuration extends DataFlow::Configuration {
abstract deprecated class Configuration extends DataFlow::Configuration {
bindingset[this]
Configuration() { any() }

View File

@@ -1,4 +1,6 @@
/**
* DEPRECATED: Use `Global` and `GlobalWithState` instead.
*
* Provides an implementation of global (interprocedural) taint tracking.
* This file re-exports the local (intraprocedural) taint-tracking analysis
* from `TaintTrackingParameter::Public` and adds a global analysis, mainly
@@ -12,6 +14,8 @@ import TaintTrackingParameter::Public
private import TaintTrackingParameter::Private
/**
* DEPRECATED: Use `Global` and `GlobalWithState` instead.
*
* A configuration of interprocedural taint tracking analysis. This defines
* sources, sinks, and any other configurable aspect of the analysis. Each
* use of the taint tracking library must define its own unique extension of
@@ -51,7 +55,7 @@ private import TaintTrackingParameter::Private
* Instead, the dependency should go to a `TaintTracking2::Configuration` or a
* `DataFlow2::Configuration`, `DataFlow3::Configuration`, etc.
*/
abstract class Configuration extends DataFlow::Configuration {
abstract deprecated class Configuration extends DataFlow::Configuration {
bindingset[this]
Configuration() { any() }

View File

@@ -1,4 +1,6 @@
/**
* DEPRECATED: Use `Global` and `GlobalWithState` instead.
*
* Provides an implementation of global (interprocedural) taint tracking.
* This file re-exports the local (intraprocedural) taint-tracking analysis
* from `TaintTrackingParameter::Public` and adds a global analysis, mainly
@@ -12,6 +14,8 @@ import TaintTrackingParameter::Public
private import TaintTrackingParameter::Private
/**
* DEPRECATED: Use `Global` and `GlobalWithState` instead.
*
* A configuration of interprocedural taint tracking analysis. This defines
* sources, sinks, and any other configurable aspect of the analysis. Each
* use of the taint tracking library must define its own unique extension of
@@ -51,7 +55,7 @@ private import TaintTrackingParameter::Private
* Instead, the dependency should go to a `TaintTracking2::Configuration` or a
* `DataFlow2::Configuration`, `DataFlow3::Configuration`, etc.
*/
abstract class Configuration extends DataFlow::Configuration {
abstract deprecated class Configuration extends DataFlow::Configuration {
bindingset[this]
Configuration() { any() }

View File

@@ -1,4 +1,6 @@
/**
* DEPRECATED: Use `Global` and `GlobalWithState` instead.
*
* Provides an implementation of global (interprocedural) taint tracking.
* This file re-exports the local (intraprocedural) taint-tracking analysis
* from `TaintTrackingParameter::Public` and adds a global analysis, mainly
@@ -12,6 +14,8 @@ import TaintTrackingParameter::Public
private import TaintTrackingParameter::Private
/**
* DEPRECATED: Use `Global` and `GlobalWithState` instead.
*
* A configuration of interprocedural taint tracking analysis. This defines
* sources, sinks, and any other configurable aspect of the analysis. Each
* use of the taint tracking library must define its own unique extension of
@@ -51,7 +55,7 @@ private import TaintTrackingParameter::Private
* Instead, the dependency should go to a `TaintTracking2::Configuration` or a
* `DataFlow2::Configuration`, `DataFlow3::Configuration`, etc.
*/
abstract class Configuration extends DataFlow::Configuration {
abstract deprecated class Configuration extends DataFlow::Configuration {
bindingset[this]
Configuration() { any() }

View File

@@ -88,15 +88,6 @@ module SsaSource {
lhs.getBasicBlock().dominates(defn.getBasicBlock())
}
/** Holds if `v` is defined by a `for` statement, the definition being `defn` */
cached
deprecated predicate iteration_defined_variable(
Variable v, ControlFlowNode defn, ControlFlowNode sequence
) {
exists(ForNode for | for.iterates(defn, sequence)) and
defn.(NameNode).defines(v)
}
/** Holds if `v` is a parameter variable and `defn` is the CFG node for that parameter. */
cached
predicate parameter_definition(Variable v, ControlFlowNode defn) {

View File

@@ -9,6 +9,7 @@ private import python
private import semmle.python.Concepts
private import semmle.python.ApiGraphs
private import semmle.python.frameworks.ClickhouseDriver
private import semmle.python.frameworks.data.ModelsAsData
/**
* INTERNAL: Do not use.
@@ -24,6 +25,8 @@ module Aioch {
/** Gets a reference to the `aioch.Client` class or any subclass. */
API::Node subclassRef() {
result = API::moduleImport("aioch").getMember("Client").getASubclass*()
or
result = ModelOutput::getATypeNode("aioch.Client~Subclass").getASubclass*()
}
/** Gets a reference to an instance of `clickhouse_driver.Client` or any subclass. */

View File

@@ -14,6 +14,7 @@ private import semmle.python.frameworks.internal.SelfRefMixin
private import semmle.python.frameworks.Multidict
private import semmle.python.frameworks.Yarl
private import semmle.python.frameworks.internal.InstanceTaintStepsHelper
private import semmle.python.frameworks.data.ModelsAsData
/**
* INTERNAL: Do not use.
@@ -31,6 +32,8 @@ module AiohttpWebModel {
/** Gets a reference to the `aiohttp.web.View` class or any subclass. */
API::Node subclassRef() {
result = API::moduleImport("aiohttp").getMember("web").getMember("View").getASubclass*()
or
result = ModelOutput::getATypeNode("aiohttp.web.View~Subclass").getASubclass*()
}
}
@@ -706,10 +709,12 @@ module AiohttpWebModel {
}
/**
* INTERNAL: Do not use.
*
* Provides models for the web server part (`aiohttp.client`) of the `aiohttp` PyPI package.
* See https://docs.aiohttp.org/en/stable/client.html
*/
private module AiohttpClientModel {
module AiohttpClientModel {
/**
* Provides models for the `aiohttp.ClientSession` class
*
@@ -717,8 +722,10 @@ private module AiohttpClientModel {
*/
module ClientSession {
/** Gets a reference to the `aiohttp.ClientSession` class. */
private API::Node classRef() {
API::Node classRef() {
result = API::moduleImport("aiohttp").getMember("ClientSession")
or
result = ModelOutput::getATypeNode("aiohttp.ClientSession~Subclass").getASubclass*()
}
/** Gets a reference to an instance of `aiohttp.ClientSession`. */

View File

@@ -9,6 +9,7 @@ private import python
private import semmle.python.Concepts
private import semmle.python.ApiGraphs
private import semmle.python.frameworks.PEP249
private import semmle.python.frameworks.data.ModelsAsData
/**
* INTERNAL: Do not use.
@@ -37,6 +38,9 @@ module ClickhouseDriver {
or
// commonly used alias
classRef = API::moduleImport("clickhouse_driver").getMember("Client")
or
// Models-as-Data subclass
classRef = ModelOutput::getATypeNode("clickhouse_driver.client.Client~Subclass")
|
result = classRef.getASubclass*()
)

View File

@@ -16,6 +16,7 @@ private import semmle.python.frameworks.internal.PoorMansFunctionResolution
private import semmle.python.frameworks.internal.SelfRefMixin
private import semmle.python.frameworks.internal.InstanceTaintStepsHelper
private import semmle.python.security.dataflow.UrlRedirectCustomizations
private import semmle.python.frameworks.data.ModelsAsData
/**
* INTERNAL: Do not use.
@@ -85,6 +86,10 @@ module Django {
}
}
private class MaDSubclass extends ModeledSubclass {
MaDSubclass() { this = ModelOutput::getATypeNode("Django.Views.View~Subclass") }
}
/** Gets a reference to the `django.views.generic.View` class or any subclass. */
API::Node subclassRef() { result = any(ModeledSubclass subclass).getASubclass*() }
}
@@ -185,6 +190,10 @@ module Django {
}
}
private class MaDSubclass extends ModeledSubclass {
MaDSubclass() { this = ModelOutput::getATypeNode("django.forms.BaseForm~Subclass") }
}
/** Gets a reference to the `django.forms.forms.BaseForm` class or any subclass. */
API::Node subclassRef() { result = any(ModeledSubclass subclass).getASubclass*() }
}
@@ -290,6 +299,10 @@ module Django {
}
}
private class MaDSubclass extends ModeledSubclass {
MaDSubclass() { this = ModelOutput::getATypeNode("Django.Forms.Field~Subclass") }
}
/** Gets a reference to the `django.forms.fields.Field` class or any subclass. */
API::Node subclassRef() { result = any(ModeledSubclass subclass).getASubclass*() }
}
@@ -596,6 +609,8 @@ module PrivateDjango {
.getMember("models")
.getMember("PolymorphicModel")
.getASubclass*()
or
result = ModelOutput::getATypeNode("Django.db.models.Model~Subclass").getASubclass*()
}
/**
@@ -766,6 +781,9 @@ module PrivateDjango {
.getMember(className)
.getASubclass*()
)
or
result =
ModelOutput::getATypeNode("django.db.models.FileField~Subclass").getASubclass*()
}
}
@@ -823,6 +841,10 @@ module PrivateDjango {
or
// Commonly used alias
result = models().getMember("RawSQL")
or
result =
ModelOutput::getATypeNode("django.db.models.expressions.RawSQL~Subclass")
.getASubclass*()
}
/**
@@ -1132,9 +1154,6 @@ module PrivateDjango {
/** Gets a reference to the `django.http` module. */
API::Node http() { result = django().getMember("http") }
/** DEPRECATED: Alias for `DjangoHttp` */
deprecated module http = DjangoHttp;
/** Provides models for the `django.http` module */
module DjangoHttp {
// ---------------------------------------------------------------------------
@@ -1157,6 +1176,9 @@ module PrivateDjango {
or
// handle django.http.HttpRequest alias
result = http().getMember("HttpRequest")
or
result =
ModelOutput::getATypeNode("django.http.request.HttpRequest~Subclass").getASubclass*()
}
/**
@@ -1322,7 +1344,13 @@ module PrivateDjango {
}
/** Gets a reference to the `django.http.response.HttpResponse` class or any subclass. */
API::Node classRef() { result = baseClassRef().getASubclass*() }
API::Node classRef() {
result = baseClassRef().getASubclass*()
or
result =
ModelOutput::getATypeNode("django.http.response.HttpResponse~Subclass")
.getASubclass*()
}
/**
* A source of instances of `django.http.response.HttpResponse`, extend this class to model new instances.
@@ -1383,7 +1411,12 @@ module PrivateDjango {
}
/** Gets a reference to a subclass of the `django.http.response.HttpResponseRedirect` class. */
API::Node classRef() { result = baseClassRef().getASubclass*() }
API::Node classRef() {
result = baseClassRef().getASubclass*() or
result =
ModelOutput::getATypeNode("django.http.response.HttpResponseRedirect~Subclass")
.getASubclass*()
}
/**
* A source of instances of `django.http.response.HttpResponseRedirect`, extend this class to model new instances.
@@ -1446,7 +1479,12 @@ module PrivateDjango {
}
/** Gets a reference to the `django.http.response.HttpResponsePermanentRedirect` class. */
API::Node classRef() { result = baseClassRef().getASubclass*() }
API::Node classRef() {
result = baseClassRef().getASubclass*() or
result =
ModelOutput::getATypeNode("django.http.response.HttpResponsePermanentRedirect~Subclass")
.getASubclass*()
}
/**
* A source of instances of `django.http.response.HttpResponsePermanentRedirect`, extend this class to model new instances.
@@ -1510,7 +1548,12 @@ module PrivateDjango {
}
/** Gets a reference to the `django.http.response.HttpResponseNotModified` class. */
API::Node classRef() { result = baseClassRef().getASubclass*() }
API::Node classRef() {
result = baseClassRef().getASubclass*() or
result =
ModelOutput::getATypeNode("django.http.response.HttpResponseNotModified~Subclass")
.getASubclass*()
}
/**
* A source of instances of `django.http.response.HttpResponseNotModified`, extend this class to model new instances.
@@ -1562,7 +1605,12 @@ module PrivateDjango {
}
/** Gets a reference to the `django.http.response.HttpResponseBadRequest` class. */
API::Node classRef() { result = baseClassRef().getASubclass*() }
API::Node classRef() {
result = baseClassRef().getASubclass*() or
result =
ModelOutput::getATypeNode("django.http.response.HttpResponseBadRequest~Subclass")
.getASubclass*()
}
/**
* A source of instances of `django.http.response.HttpResponseBadRequest`, extend this class to model new instances.
@@ -1616,7 +1664,12 @@ module PrivateDjango {
}
/** Gets a reference to the `django.http.response.HttpResponseNotFound` class. */
API::Node classRef() { result = baseClassRef().getASubclass*() }
API::Node classRef() {
result = baseClassRef().getASubclass*() or
result =
ModelOutput::getATypeNode("django.http.response.HttpResponseNotFound~Subclass")
.getASubclass*()
}
/**
* A source of instances of `django.http.response.HttpResponseNotFound`, extend this class to model new instances.
@@ -1670,7 +1723,12 @@ module PrivateDjango {
}
/** Gets a reference to the `django.http.response.HttpResponseForbidden` class. */
API::Node classRef() { result = baseClassRef().getASubclass*() }
API::Node classRef() {
result = baseClassRef().getASubclass*() or
result =
ModelOutput::getATypeNode("django.http.response.HttpResponseForbidden~Subclass")
.getASubclass*()
}
/**
* A source of instances of `django.http.response.HttpResponseForbidden`, extend this class to model new instances.
@@ -1724,7 +1782,12 @@ module PrivateDjango {
}
/** Gets a reference to the `django.http.response.HttpResponseNotAllowed` class. */
API::Node classRef() { result = baseClassRef().getASubclass*() }
API::Node classRef() {
result = baseClassRef().getASubclass*() or
result =
ModelOutput::getATypeNode("django.http.response.HttpResponseNotAllowed~Subclass")
.getASubclass*()
}
/**
* A source of instances of `django.http.response.HttpResponseNotAllowed`, extend this class to model new instances.
@@ -1779,7 +1842,12 @@ module PrivateDjango {
}
/** Gets a reference to the `django.http.response.HttpResponseGone` class. */
API::Node classRef() { result = baseClassRef().getASubclass*() }
API::Node classRef() {
result = baseClassRef().getASubclass*() or
result =
ModelOutput::getATypeNode("django.http.response.HttpResponseGone~Subclass")
.getASubclass*()
}
/**
* A source of instances of `django.http.response.HttpResponseGone`, extend this class to model new instances.
@@ -1833,7 +1901,12 @@ module PrivateDjango {
}
/** Gets a reference to the `django.http.response.HttpResponseServerError` class. */
API::Node classRef() { result = baseClassRef().getASubclass*() }
API::Node classRef() {
result = baseClassRef().getASubclass*() or
result =
ModelOutput::getATypeNode("django.http.response.HttpResponseServerError~Subclass")
.getASubclass*()
}
/**
* A source of instances of `django.http.response.HttpResponseServerError`, extend this class to model new instances.
@@ -1887,7 +1960,12 @@ module PrivateDjango {
}
/** Gets a reference to the `django.http.response.JsonResponse` class. */
API::Node classRef() { result = baseClassRef().getASubclass*() }
API::Node classRef() {
result = baseClassRef().getASubclass*() or
result =
ModelOutput::getATypeNode("django.http.response.JsonResponse~Subclass")
.getASubclass*()
}
/**
* A source of instances of `django.http.response.JsonResponse`, extend this class to model new instances.
@@ -1944,7 +2022,12 @@ module PrivateDjango {
}
/** Gets a reference to the `django.http.response.StreamingHttpResponse` class. */
API::Node classRef() { result = baseClassRef().getASubclass*() }
API::Node classRef() {
result = baseClassRef().getASubclass*() or
result =
ModelOutput::getATypeNode("django.http.response.StreamingHttpResponse~Subclass")
.getASubclass*()
}
/**
* A source of instances of `django.http.response.StreamingHttpResponse`, extend this class to model new instances.
@@ -1998,7 +2081,12 @@ module PrivateDjango {
}
/** Gets a reference to the `django.http.response.FileResponse` class. */
API::Node classRef() { result = baseClassRef().getASubclass*() }
API::Node classRef() {
result = baseClassRef().getASubclass*() or
result =
ModelOutput::getATypeNode("django.http.response.FileResponse~Subclass")
.getASubclass*()
}
/**
* A source of instances of `django.http.response.FileResponse`, extend this class to model new instances.
@@ -2762,7 +2850,7 @@ module PrivateDjango {
this.asExpr() = list and
// we look for an assignment to the `MIDDLEWARE` setting
exists(DataFlow::Node mw |
mw.asVar().getName() = "MIDDLEWARE" and
mw.asExpr().(Name).getId() = "MIDDLEWARE" and
DataFlow::localFlow(this, mw)
|
// To only include results where CSRF protection matters, we only care about CSRF
@@ -2837,5 +2925,10 @@ module PrivateDjango {
DjangoAllowedUrl() {
this = DataFlow::BarrierGuard<djangoUrlHasAllowedHostAndScheme/3>::getABarrierNode()
}
override predicate sanitizes(UrlRedirect::FlowState state) {
// sanitize all flow states
any()
}
}
}

View File

@@ -12,6 +12,7 @@ private import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.Concepts
private import semmle.python.ApiGraphs
private import semmle.python.frameworks.data.ModelsAsData
/**
* Provides classes modeling security-relevant aspects of the `fabric` PyPI package, for
@@ -65,12 +66,14 @@ private module FabricV1 {
}
/**
* INTERNAL: Do not use.
*
* Provides classes modeling security-relevant aspects of the `fabric` PyPI package, for
* version 2.x.
*
* See http://docs.fabfile.org/en/2.5/getting-st arted.html.
*/
private module FabricV2 {
module FabricV2 {
/** Gets a reference to the `fabric` module. */
API::Node fabric() { result = API::moduleImport("fabric") }
@@ -95,6 +98,9 @@ private module FabricV2 {
result = fabric().getMember("Connection")
or
result = connection().getMember("Connection")
or
result =
ModelOutput::getATypeNode("fabric.connection.Connection~Subclass").getASubclass*()
}
/**

View File

@@ -11,18 +11,23 @@ private import semmle.python.Concepts
private import semmle.python.ApiGraphs
private import semmle.python.frameworks.Pydantic
private import semmle.python.frameworks.Starlette
private import semmle.python.frameworks.data.ModelsAsData
/**
* INTERNAL: Do not use.
*
* Provides models for the `fastapi` PyPI package.
* See https://fastapi.tiangolo.com/.
*/
private module FastApi {
module FastApi {
/**
* Provides models for FastAPI applications (an instance of `fastapi.FastAPI`).
*/
module App {
API::Node cls() { result = API::moduleImport("fastapi").getMember("FastAPI") }
/** Gets a reference to a FastAPI application (an instance of `fastapi.FastAPI`). */
API::Node instance() { result = API::moduleImport("fastapi").getMember("FastAPI").getReturn() }
API::Node instance() { result = cls().getReturn() }
}
/**
@@ -31,10 +36,14 @@ private module FastApi {
* See https://fastapi.tiangolo.com/tutorial/bigger-applications/.
*/
module ApiRouter {
/** Gets a reference to an instance of `fastapi.ApiRouter`. */
API::Node instance() {
result = API::moduleImport("fastapi").getMember("APIRouter").getASubclass*().getReturn()
API::Node cls() {
result = API::moduleImport("fastapi").getMember("APIRouter").getASubclass*()
or
result = ModelOutput::getATypeNode("fastapi.APIRouter~Subclass").getASubclass*()
}
/** Gets a reference to an instance of `fastapi.ApiRouter`. */
API::Node instance() { result = cls().getReturn() }
}
// ---------------------------------------------------------------------------

View File

@@ -14,6 +14,7 @@ private import semmle.python.ApiGraphs
private import semmle.python.frameworks.internal.InstanceTaintStepsHelper
private import semmle.python.security.dataflow.PathInjectionCustomizations
private import semmle.python.dataflow.new.FlowSummary
private import semmle.python.frameworks.data.ModelsAsData
/**
* Provides models for the `flask` PyPI package.
@@ -39,6 +40,10 @@ module Flask {
"MethodView"
])
.getASubclass*()
or
result = ModelOutput::getATypeNode("flask.View~Subclass").getASubclass*()
or
result = ModelOutput::getATypeNode("flask.MethodView~Subclass").getASubclass*()
}
}
@@ -52,6 +57,8 @@ module Flask {
API::Node subclassRef() {
result =
API::moduleImport("flask").getMember("views").getMember("MethodView").getASubclass*()
or
result = ModelOutput::getATypeNode("flask.MethodView~Subclass").getASubclass*()
}
}
}
@@ -63,7 +70,10 @@ module Flask {
*/
module FlaskApp {
/** Gets a reference to the `flask.Flask` class. */
API::Node classRef() { result = API::moduleImport("flask").getMember("Flask") }
API::Node classRef() {
result = API::moduleImport("flask").getMember("Flask") or
result = ModelOutput::getATypeNode("flask.Flask~Subclass").getASubclass*()
}
/** Gets a reference to an instance of `flask.Flask` (a flask application). */
API::Node instance() { result = classRef().getReturn() }
@@ -80,6 +90,8 @@ module Flask {
result = API::moduleImport("flask").getMember("Blueprint")
or
result = API::moduleImport("flask").getMember("blueprints").getMember("Blueprint")
or
result = ModelOutput::getATypeNode("flask.Blueprint~Subclass").getASubclass*()
}
/** Gets a reference to an instance of `flask.Blueprint`. */
@@ -87,7 +99,9 @@ module Flask {
}
/** Gets a reference to the `flask.request` object. */
API::Node request() { result = API::moduleImport("flask").getMember("request") }
API::Node request() {
result = API::moduleImport(["flask", "flask_restful"]).getMember("request")
}
/**
* Provides models for the `flask.Response` class
@@ -104,6 +118,8 @@ module Flask {
result = API::moduleImport("flask").getMember("Response")
or
result = [FlaskApp::classRef(), FlaskApp::instance()].getMember("response_class")
or
result = ModelOutput::getATypeNode("flask.Response~Subclass").getASubclass*()
}
/**
@@ -337,7 +353,7 @@ module Flask {
}
}
/** A request handler defined in a django view class, that has no known route. */
/** A request handler defined in a flask view class, that has no known route. */
private class FlaskViewClassHandlerWithoutKnownRoute extends Http::Server::RequestHandler::Range {
FlaskViewClassHandlerWithoutKnownRoute() {
exists(FlaskViewClass vc | vc.getARequestHandler() = this) and
@@ -624,7 +640,7 @@ module Flask {
.getAValueReachableFromSource()
}
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
input = "Argument[0]" and
output = "ReturnValue" and
preservesValue = false
@@ -650,7 +666,7 @@ module Flask {
.getAValueReachableFromSource()
}
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
input = "Argument[0]" and
// Technically it's `Iterator[str]`, but list will do :)
output = "ReturnValue.ListElement" and

View File

@@ -9,15 +9,18 @@
private import python
private import semmle.python.Concepts
private import semmle.python.ApiGraphs
private import semmle.python.frameworks.data.ModelsAsData
/**
* INTERNAL: Do not use.
*
* Provides models for the `httpx` PyPI package.
*
* See
* - https://pypi.org/project/httpx/
* - https://www.python-httpx.org/
*/
private module HttpxModel {
module HttpxModel {
/**
* An outgoing HTTP request, from the `httpx` library.
*
@@ -59,8 +62,10 @@ private module HttpxModel {
*/
module Client {
/** Get a reference to the `httpx.Client` or `httpx.AsyncClient` class. */
private API::Node classRef() {
API::Node classRef() {
result = API::moduleImport("httpx").getMember(["Client", "AsyncClient"])
or
result = ModelOutput::getATypeNode("httpx.Client~Subclass").getASubclass*()
}
/** A method call on a Client that sends off a request */

View File

@@ -7,12 +7,15 @@ private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.Concepts
private import semmle.python.ApiGraphs
private import semmle.python.frameworks.data.ModelsAsData
/**
* INTERNAL: Do not use.
*
* Provides models for the `invoke` PyPI package.
* See https://www.pyinvoke.org/.
*/
private module Invoke {
module Invoke {
// ---------------------------------------------------------------------------
// invoke
// ---------------------------------------------------------------------------
@@ -30,6 +33,8 @@ private module Invoke {
result = API::moduleImport("invoke").getMember("context").getMember("Context")
or
result = API::moduleImport("invoke").getMember("Context")
or
result = ModelOutput::getATypeNode("invoke.context.Context~Subclass").getASubclass*()
}
/** Gets a reference to an instance of `invoke.context.Context`. */

View File

@@ -10,15 +10,25 @@ private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.Concepts
private import semmle.python.ApiGraphs
private import semmle.python.frameworks.data.ModelsAsData
/**
* INTERNAL: Do not use.
*
* Provides classes modeling security-relevant aspects of the `lxml` PyPI package
*
* See
* - https://pypi.org/project/lxml/
* - https://lxml.de/tutorial.html
*/
private module Lxml {
module Lxml {
/** Gets a reference to the `lxml.etree` module */
API::Node etreeRef() {
result = API::moduleImport("lxml").getMember("etree")
or
result = ModelOutput::getATypeNode("lxml.etree~Alias")
}
// ---------------------------------------------------------------------------
// XPath
// ---------------------------------------------------------------------------
@@ -34,9 +44,7 @@ private module Lxml {
* - https://lxml.de/apidoc/lxml.etree.html#lxml.etree.ETXPath
*/
private class XPathClassCall extends XML::XPathConstruction::Range, DataFlow::CallCfgNode {
XPathClassCall() {
this = API::moduleImport("lxml").getMember("etree").getMember(["XPath", "ETXPath"]).getACall()
}
XPathClassCall() { this = etreeRef().getMember(["XPath", "ETXPath"]).getACall() }
override DataFlow::Node getXPath() { result in [this.getArg(0), this.getArgByName("path")] }
@@ -62,20 +70,11 @@ private module Lxml {
XPathCall() {
exists(API::Node parseResult |
parseResult =
API::moduleImport("lxml")
.getMember("etree")
.getMember(["parse", "fromstring", "fromstringlist", "HTML", "XML"])
.getReturn()
etreeRef().getMember(["parse", "fromstring", "fromstringlist", "HTML", "XML"]).getReturn()
or
// TODO: lxml.etree.parseid(<text>)[0] will contain the root element from parsing <text>
// but we don't really have a way to model that nicely.
parseResult =
API::moduleImport("lxml")
.getMember("etree")
.getMember("XMLParser")
.getReturn()
.getMember("close")
.getReturn()
parseResult = etreeRef().getMember("XMLParser").getReturn().getMember("close").getReturn()
|
this = parseResult.getMember("xpath").getACall()
)
@@ -87,14 +86,7 @@ private module Lxml {
}
class XPathEvaluatorCall extends XML::XPathExecution::Range, DataFlow::CallCfgNode {
XPathEvaluatorCall() {
this =
API::moduleImport("lxml")
.getMember("etree")
.getMember("XPathEvaluator")
.getReturn()
.getACall()
}
XPathEvaluatorCall() { this = etreeRef().getMember("XPathEvaluator").getReturn().getACall() }
override DataFlow::Node getXPath() { result = this.getArg(0) }
@@ -130,9 +122,7 @@ private module Lxml {
* See https://lxml.de/apidoc/lxml.etree.html?highlight=xmlparser#lxml.etree.XMLParser
*/
private class LxmlParser extends InstanceSource, API::CallNode {
LxmlParser() {
this = API::moduleImport("lxml").getMember("etree").getMember("XMLParser").getACall()
}
LxmlParser() { this = etreeRef().getMember("XMLParser").getACall() }
// NOTE: it's not possible to change settings of a parser after constructing it
override predicate vulnerableTo(XML::XmlParsingVulnerabilityKind kind) {
@@ -162,10 +152,7 @@ private module Lxml {
* See https://lxml.de/apidoc/lxml.etree.html?highlight=xmlparser#lxml.etree.get_default_parser
*/
private class LxmlDefaultParser extends InstanceSource, DataFlow::CallCfgNode {
LxmlDefaultParser() {
this =
API::moduleImport("lxml").getMember("etree").getMember("get_default_parser").getACall()
}
LxmlDefaultParser() { this = etreeRef().getMember("get_default_parser").getACall() }
override predicate vulnerableTo(XML::XmlParsingVulnerabilityKind kind) {
// as highlighted by
@@ -240,7 +227,7 @@ private module Lxml {
LxmlParsing() {
functionName in ["fromstring", "fromstringlist", "XML", "XMLID", "parse", "parseid"] and
this = API::moduleImport("lxml").getMember("etree").getMember(functionName).getACall()
this = etreeRef().getMember(functionName).getACall()
}
override DataFlow::Node getAnInput() {
@@ -309,9 +296,7 @@ private module Lxml {
private class LxmlIterparseCall extends API::CallNode, XML::XmlParsing::Range,
FileSystemAccess::Range
{
LxmlIterparseCall() {
this = API::moduleImport("lxml").getMember("etree").getMember("iterparse").getACall()
}
LxmlIterparseCall() { this = etreeRef().getMember("iterparse").getACall() }
override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("source")] }

View File

@@ -9,12 +9,15 @@ private import semmle.python.dataflow.new.TaintTracking
private import semmle.python.Concepts
private import semmle.python.ApiGraphs
private import semmle.python.frameworks.internal.InstanceTaintStepsHelper
private import semmle.python.frameworks.data.ModelsAsData
/**
* INTERNAL: Do not use.
*
* Provides models for the `MarkupSafe` PyPI package.
* See https://markupsafe.palletsprojects.com/en/2.0.x/.
*/
private module MarkupSafeModel {
module MarkupSafeModel {
/**
* Provides models for the `markupsafe.Markup` class
*
@@ -26,6 +29,8 @@ private module MarkupSafeModel {
result = API::moduleImport("markupsafe").getMember("Markup")
or
result = API::moduleImport("flask").getMember("Markup")
or
result = ModelOutput::getATypeNode("markupsafe.Markup~Subclass").getASubclass*()
}
/**

View File

@@ -9,6 +9,7 @@ private import semmle.python.dataflow.new.TaintTracking
private import semmle.python.Concepts
private import semmle.python.ApiGraphs
private import semmle.python.frameworks.internal.InstanceTaintStepsHelper
private import semmle.python.frameworks.data.ModelsAsData
/**
* INTERNAL: Do not use.
@@ -28,6 +29,8 @@ module Multidict {
/** Gets a reference to a `MultiDictProxy` class. */
API::Node classRef() {
result = API::moduleImport("multidict").getMember(["MultiDictProxy", "CIMultiDictProxy"])
or
result = ModelOutput::getATypeNode("multidict.MultiDictProxy~Subclass").getASubclass*()
}
/**

View File

@@ -11,14 +11,17 @@ private import semmle.python.dataflow.new.TaintTracking
private import semmle.python.Concepts
private import semmle.python.ApiGraphs
private import semmle.python.frameworks.PEP249
private import semmle.python.frameworks.data.ModelsAsData
/**
* INTERNAL: Do not use.
*
* Provides models for the `peewee` PyPI package.
* See
* - https://pypi.org/project/peewee/
* - https://docs.peewee-orm.com/en/latest/index.html
*/
private module Peewee {
module Peewee {
/** Provides models for the `peewee.Database` class and subclasses. */
module Database {
/** Gets a reference to the `peewee.Database` class or any subclass. */
@@ -31,7 +34,7 @@ private module Peewee {
.getMember(["SqliteDatabase", "MySQLDatabase", "PostgresqlDatabase"])
.getASubclass*()
or
// Ohter known subclasses, semi auto generated by using
// Other known subclasses, semi auto generated by using
// ```ql
// class DBClass extends Class, SelfRefMixin {
// DBClass() {
@@ -153,6 +156,8 @@ private module Peewee {
.getMember("sqliteq")
.getMember("SqliteQueueDatabase")
.getASubclass*()
or
result = ModelOutput::getATypeNode("peewee.Database~Subclass").getASubclass*()
}
/** Gets a reference to an instance of `peewee.Database` or any subclass. */

View File

@@ -9,15 +9,18 @@
private import python
private import semmle.python.Concepts
private import semmle.python.ApiGraphs
private import semmle.python.frameworks.data.ModelsAsData
/**
* INTERNAL: Do not use.
*
* Provides models for the `pycurl` PyPI package.
*
* See
* - https://pypi.org/project/pycurl/
* - https://pycurl.io/docs/latest/
*/
private module Pycurl {
module Pycurl {
/**
* Provides models for the `pycurl.Curl` class
*
@@ -25,7 +28,11 @@ private module Pycurl {
*/
module Curl {
/** Gets a reference to the `pycurl.Curl` class. */
private API::Node classRef() { result = API::moduleImport("pycurl").getMember("Curl") }
API::Node classRef() {
result = API::moduleImport("pycurl").getMember("Curl")
or
result = ModelOutput::getATypeNode("pycurl.Curl~Subclass").getASubclass*()
}
/** Gets a reference to an instance of `pycurl.Curl`. */
private API::Node instance() { result = classRef().getReturn() }

View File

@@ -11,6 +11,7 @@ private import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.TaintTracking
private import semmle.python.Concepts
private import semmle.python.ApiGraphs
private import semmle.python.frameworks.data.ModelsAsData
/**
* INTERNAL: Do not use.
@@ -31,6 +32,8 @@ module Pydantic {
/** Gets a reference to a `pydantic.BaseModel` subclass (a pydantic model). */
API::Node subclassRef() {
result = API::moduleImport("pydantic").getMember("BaseModel").getASubclass+()
or
result = ModelOutput::getATypeNode("pydantic.BaseModel~Subclass").getASubclass*()
}
/**

View File

@@ -12,6 +12,7 @@ private import semmle.python.ApiGraphs
private import semmle.python.dataflow.new.TaintTracking
private import semmle.python.frameworks.internal.InstanceTaintStepsHelper
private import semmle.python.frameworks.Stdlib
private import semmle.python.frameworks.data.ModelsAsData
/**
* INTERNAL: Do not use.
@@ -22,7 +23,7 @@ private import semmle.python.frameworks.Stdlib
* - https://pypi.org/project/requests/
* - https://requests.readthedocs.io/en/latest/
*/
private module Requests {
module Requests {
/**
* An outgoing HTTP request, from the `requests` library.
*
@@ -91,10 +92,12 @@ private module Requests {
*/
module Response {
/** Gets a reference to the `requests.models.Response` class. */
private API::Node classRef() {
API::Node classRef() {
result = API::moduleImport("requests").getMember("models").getMember("Response")
or
result = API::moduleImport("requests").getMember("Response")
or
result = ModelOutput::getATypeNode("requests.models.Response~Subclass").getASubclass*()
}
/**

View File

@@ -16,6 +16,7 @@ private import semmle.python.ApiGraphs
private import semmle.python.frameworks.internal.InstanceTaintStepsHelper
private import semmle.python.frameworks.Django
private import semmle.python.frameworks.Stdlib
private import semmle.python.frameworks.data.ModelsAsData
/**
* INTERNAL: Do not use.
@@ -27,7 +28,7 @@ private import semmle.python.frameworks.Stdlib
* - https://www.django-rest-framework.org/
* - https://pypi.org/project/djangorestframework/
*/
private module RestFramework {
module RestFramework {
// ---------------------------------------------------------------------------
// rest_framework.views.APIView handling
// ---------------------------------------------------------------------------
@@ -215,8 +216,10 @@ private module RestFramework {
*/
module Request {
/** Gets a reference to the `rest_framework.request.Request` class. */
private API::Node classRef() {
API::Node classRef() {
result = API::moduleImport("rest_framework").getMember("request").getMember("Request")
or
result = ModelOutput::getATypeNode("rest_framework.request.Request~Subclass").getASubclass*()
}
/**
@@ -299,8 +302,11 @@ private module RestFramework {
*/
module Response {
/** Gets a reference to the `rest_framework.response.Response` class. */
private API::Node classRef() {
API::Node classRef() {
result = API::moduleImport("rest_framework").getMember("response").getMember("Response")
or
result =
ModelOutput::getATypeNode("rest_framework.response.Response~Subclass").getASubclass*()
}
/** A direct instantiation of `rest_framework.response.Response`. */
@@ -328,6 +334,23 @@ private module RestFramework {
* See https://www.django-rest-framework.org/api-guide/exceptions/#api-reference
*/
module ApiException {
API::Node classRef() {
exists(string className |
className in [
"APIException", "ValidationError", "ParseError", "AuthenticationFailed",
"NotAuthenticated", "PermissionDenied", "NotFound", "NotAcceptable"
] and
result =
API::moduleImport("rest_framework")
.getMember("exceptions")
.getMember(className)
.getASubclass*()
)
or
result =
ModelOutput::getATypeNode("rest_framework.exceptions.APIException~Subclass").getASubclass*()
}
/** A direct instantiation of `rest_framework.exceptions.ApiException` or subclass. */
private class ClassInstantiation extends Http::Server::HttpResponse::Range,
DataFlow::CallCfgNode
@@ -345,6 +368,8 @@ private module RestFramework {
.getMember("exceptions")
.getMember(className)
.getACall()
or
this = classRef().getACall() and className = "APIException"
}
override DataFlow::Node getBody() {

View File

@@ -13,6 +13,7 @@ private import semmle.python.Concepts
// This import is done like this to avoid importing the deprecated top-level things that
// would pollute the namespace
private import semmle.python.frameworks.PEP249::PEP249 as PEP249
private import semmle.python.frameworks.data.ModelsAsData
/**
* INTERNAL: Do not use.
@@ -34,10 +35,12 @@ module SqlAlchemy {
*/
module Engine {
/** Gets a reference to a SQLAlchemy Engine class. */
private API::Node classRef() {
API::Node classRef() {
result = API::moduleImport("sqlalchemy").getMember("engine").getMember("Engine")
or
result = API::moduleImport("sqlalchemy").getMember("future").getMember("Engine")
or
result = ModelOutput::getATypeNode("sqlalchemy.engine.Engine~Subclass").getASubclass*()
}
/**
@@ -87,7 +90,7 @@ module SqlAlchemy {
*/
module Connection {
/** Gets a reference to a SQLAlchemy Connection class. */
private API::Node classRef() {
API::Node classRef() {
result =
API::moduleImport("sqlalchemy")
.getMember("engine")
@@ -95,6 +98,8 @@ module SqlAlchemy {
.getMember("Connection")
or
result = API::moduleImport("sqlalchemy").getMember("future").getMember("Connection")
or
result = ModelOutput::getATypeNode("sqlalchemy.engine.Connection~Subclass").getASubclass*()
}
/**
@@ -178,8 +183,10 @@ module SqlAlchemy {
*/
module Session {
/** Gets a reference to the `sqlalchemy.orm.Session` class. */
private API::Node classRef() {
API::Node classRef() {
result = API::moduleImport("sqlalchemy").getMember("orm").getMember("Session")
or
result = ModelOutput::getATypeNode("sqlalchemy.orm.Session~Subclass").getASubclass*()
}
/**

View File

@@ -13,6 +13,7 @@ private import semmle.python.Concepts
private import semmle.python.ApiGraphs
private import semmle.python.frameworks.internal.InstanceTaintStepsHelper
private import semmle.python.frameworks.Stdlib
private import semmle.python.frameworks.data.ModelsAsData
/**
* INTERNAL: Do not use.
@@ -35,6 +36,8 @@ module Starlette {
result = API::moduleImport("starlette").getMember("websockets").getMember("WebSocket")
or
result = API::moduleImport("fastapi").getMember("WebSocket")
or
result = ModelOutput::getATypeNode("starlette.websockets.WebSocket~Subclass").getASubclass*()
}
/**
@@ -100,8 +103,10 @@ module Starlette {
*/
module Url {
/** Gets a reference to the `starlette.requests.URL` class. */
private API::Node classRef() {
API::Node classRef() {
result = API::moduleImport("starlette").getMember("requests").getMember("URL")
or
result = ModelOutput::getATypeNode("starlette.requests.URL~Subclass").getASubclass*()
}
/**
@@ -161,9 +166,6 @@ module Starlette {
}
}
/** DEPRECATED: Alias for Url */
deprecated module URL = Url;
/**
* A call to the `starlette.responses.FileResponse` constructor as a sink for Filesystem access.
*/

View File

@@ -17,6 +17,7 @@ private import semmle.python.frameworks.internal.InstanceTaintStepsHelper
// modeling split over multiple files to keep this file from becoming too big
private import semmle.python.frameworks.Stdlib.Urllib
private import semmle.python.frameworks.Stdlib.Urllib2
private import semmle.python.frameworks.data.ModelsAsData
/** Provides models for the Python standard library. */
module Stdlib {
@@ -181,8 +182,10 @@ module Stdlib {
*/
module SplitResult {
/** Gets a reference to the `urllib.parse.SplitResult` class. */
private API::Node classRef() {
API::Node classRef() {
result = API::moduleImport("urllib").getMember("parse").getMember("SplitResult")
or
result = ModelOutput::getATypeNode("urllib.parse.SplitResult~Subclass").getASubclass*()
}
/**
@@ -252,8 +255,10 @@ module Stdlib {
*/
module Logger {
/** Gets a reference to the `logging.Logger` class or any subclass. */
private API::Node subclassRef() {
API::Node subclassRef() {
result = API::moduleImport("logging").getMember("Logger").getASubclass*()
or
result = ModelOutput::getATypeNode("logging.Logger~Subclass").getASubclass*()
}
/**
@@ -292,13 +297,15 @@ module Stdlib {
}
/**
* INTERNAL: Do not use.
*
* Provides models for the Python standard library.
*
* This module is marked private as exposing it means committing to 1-year deprecation
* policy, and the code is not in a polished enough state that we want to do so -- at
* least not without having convincing use-cases for it :)
*/
private module StdlibPrivate {
module StdlibPrivate {
// ---------------------------------------------------------------------------
// os
// ---------------------------------------------------------------------------
@@ -1293,14 +1300,36 @@ private module StdlibPrivate {
// pickle
// ---------------------------------------------------------------------------
/** Gets a reference to any of the `pickle` modules. */
API::Node pickle() { result = API::moduleImport(["pickle", "cPickle", "_pickle"]) }
API::Node pickle() {
result = API::moduleImport(["pickle", "cPickle", "_pickle"])
or
result = ModelOutput::getATypeNode("pickle~Alias")
}
/**
* Gets a reference to `pickle.load`
*/
API::Node pickle_load() {
result = pickle().getMember("load")
or
result = ModelOutput::getATypeNode("pickle.load~Alias")
}
/**
* Gets a reference to `pickle.loads`
*/
API::Node pickle_loads() {
result = pickle().getMember("loads")
or
result = ModelOutput::getATypeNode("pickle.loads~Alias")
}
/**
* A call to `pickle.load`
* See https://docs.python.org/3/library/pickle.html#pickle.load
*/
private class PickleLoadCall extends Decoding::Range, DataFlow::CallCfgNode {
PickleLoadCall() { this = pickle().getMember("load").getACall() }
private class PickleLoadCall extends Decoding::Range, API::CallNode {
PickleLoadCall() { this = pickle_load().getACall() }
override predicate mayExecuteInput() { any() }
@@ -1315,8 +1344,8 @@ private module StdlibPrivate {
* A call to `pickle.loads`
* See https://docs.python.org/3/library/pickle.html#pickle.loads
*/
private class PickleLoadsCall extends Decoding::Range, DataFlow::CallCfgNode {
PickleLoadsCall() { this = pickle().getMember("loads").getACall() }
private class PickleLoadsCall extends Decoding::Range, API::CallNode {
PickleLoadsCall() { this = pickle_loads().getACall() }
override predicate mayExecuteInput() { any() }
@@ -1729,8 +1758,21 @@ private module StdlibPrivate {
* See https://docs.python.org/3/library/cgi.html.
*/
module FieldStorage {
/** Gets a reference to the `cgi.FieldStorage` class. */
API::Node classRef() { result = cgi().getMember("FieldStorage") }
/**
* DEPRECATED: Use `subclassRef` predicate instead.
*
* Gets a reference to the `cgi.FieldStorage` class.
*/
deprecated API::Node classRef() {
result = API::moduleImport("cgi").getMember("FieldStorage")
}
/** Gets a reference to the `cgi.FieldStorage` class or any subclass. */
API::Node subclassRef() {
result = API::moduleImport("cgi").getMember("FieldStorage").getASubclass*()
or
result = ModelOutput::getATypeNode("cgi.FieldStorage~Subclass").getASubclass*()
}
/**
* A source of instances of `cgi.FieldStorage`, extend this class to model new instances.
@@ -1753,13 +1795,13 @@ private module StdlibPrivate {
private class ClassInstantiation extends InstanceSource, RemoteFlowSource::Range,
DataFlow::CallCfgNode
{
ClassInstantiation() { this = classRef().getACall() }
ClassInstantiation() { this = subclassRef().getACall() }
override string getSourceType() { result = "cgi.FieldStorage" }
}
/** Gets a reference to an instance of `cgi.FieldStorage`. */
API::Node instance() { result = classRef().getReturn() }
API::Node instance() { result = subclassRef().getReturn() }
/** Gets a reference to the `getvalue` method on a `cgi.FieldStorage` instance. */
API::Node getvalueRef() { result = instance().getMember("getvalue") }
@@ -1925,14 +1967,8 @@ private module StdlibPrivate {
result = cgiHttpServer().getMember("CGIHTTPRequestHandler")
}
}
/** DEPRECATED: Alias for CgiHttpRequestHandler */
deprecated module CGIHTTPRequestHandler = CgiHttpRequestHandler;
}
/** DEPRECATED: Alias for CgiHttpServer */
deprecated module CGIHTTPServer = CgiHttpServer;
// ---------------------------------------------------------------------------
// http (Python 3 only)
// ---------------------------------------------------------------------------
@@ -2000,9 +2036,6 @@ private module StdlibPrivate {
*/
deprecated API::Node classRef() { result = server().getMember("CGIHTTPRequestHandler") }
}
/** DEPRECATED: Alias for CgiHttpRequestHandler */
deprecated module CGIHTTPRequestHandler = CgiHttpRequestHandler;
}
}
@@ -2013,7 +2046,7 @@ private module StdlibPrivate {
* - https://docs.python.org/3.9/library/http.server.html#http.server.BaseHTTPRequestHandler
* - https://docs.python.org/2.7/library/basehttpserver.html#BaseHTTPServer.BaseHTTPRequestHandler
*/
private module HttpRequestHandler {
module BaseHttpRequestHandler {
/** Gets a reference to the `BaseHttpRequestHandler` class or any subclass. */
API::Node subclassRef() {
result =
@@ -2027,6 +2060,9 @@ private module StdlibPrivate {
API::moduleImport("http").getMember("server").getMember("SimpleHTTPRequestHandler"),
API::moduleImport("http").getMember("server").getMember("CGIHTTPRequestHandler"),
].getASubclass*()
or
result =
ModelOutput::getATypeNode("http.server.BaseHTTPRequestHandler~Subclass").getASubclass*()
}
/** A HttpRequestHandler class definition (most likely in project code). */
@@ -2121,17 +2157,20 @@ private module StdlibPrivate {
// wsgiref.simple_server
// ---------------------------------------------------------------------------
/** Provides models for the `wsgiref.simple_server` module. */
private module WsgirefSimpleServer {
module WsgirefSimpleServer {
API::Node subclassRef() {
result =
API::moduleImport("wsgiref")
.getMember("simple_server")
.getMember("WSGIServer")
.getASubclass*()
or
result =
ModelOutput::getATypeNode("wsgiref.simple_server.WSGIServer~Subclass").getASubclass*()
}
class WsgiServerSubclass extends Class, SelfRefMixin {
WsgiServerSubclass() {
this.getParent() =
API::moduleImport("wsgiref")
.getMember("simple_server")
.getMember("WSGIServer")
.getASubclass*()
.asSource()
.asExpr()
}
WsgiServerSubclass() { this.getParent() = subclassRef().asSource().asExpr() }
}
/**
@@ -2148,13 +2187,7 @@ private module StdlibPrivate {
exists(DataFlow::Node appArg, DataFlow::CallCfgNode setAppCall |
(
setAppCall =
API::moduleImport("wsgiref")
.getMember("simple_server")
.getMember("WSGIServer")
.getASubclass*()
.getReturn()
.getMember("set_app")
.getACall()
WsgirefSimpleServer::subclassRef().getReturn().getMember("set_app").getACall()
or
setAppCall
.(DataFlow::MethodCallNode)
@@ -2191,9 +2224,6 @@ private module StdlibPrivate {
}
}
/** DEPRECATED: Alias for WsgiEnvirontParameter */
deprecated class WSGIEnvirontParameter = WsgiEnvirontParameter;
/**
* Gets a reference to the parameter of a `WsgirefSimpleServerApplication` that
* takes the `start_response` function.
@@ -2292,7 +2322,7 @@ private module StdlibPrivate {
*/
module HttpConnection {
/** Gets a reference to the `http.client.HttpConnection` class. */
private API::Node classRef() {
API::Node classRef() {
exists(string className | className in ["HTTPConnection", "HTTPSConnection"] |
// Python 3
result = API::moduleImport("http").getMember("client").getMember(className)
@@ -2303,6 +2333,8 @@ private module StdlibPrivate {
result =
API::moduleImport("six").getMember("moves").getMember("http_client").getMember(className)
)
or
result = ModelOutput::getATypeNode("http.client.HTTPConnection~Subclass").getASubclass*()
}
/**
@@ -2414,8 +2446,10 @@ private module StdlibPrivate {
*/
module HttpResponse {
/** Gets a reference to the `http.client.HttpResponse` class. */
private API::Node classRef() {
API::Node classRef() {
result = API::moduleImport("http").getMember("client").getMember("HTTPResponse")
or
result = ModelOutput::getATypeNode("http.client.HTTPResponse~Subclass").getASubclass*()
}
/**
@@ -3085,7 +3119,7 @@ private module StdlibPrivate {
result = API::moduleImport("re").getMember("compile").getAValueReachableFromSource()
}
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
input in ["Argument[0]", "Argument[pattern:]"] and
output = "ReturnValue.Attribute[pattern]" and
preservesValue = true
@@ -3116,7 +3150,7 @@ private module StdlibPrivate {
override DataFlow::ArgumentNode getACallback() { none() }
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
exists(string arg |
this = "re.Match" and arg = "Argument[1]"
or
@@ -3173,7 +3207,7 @@ private module StdlibPrivate {
override DataFlow::ArgumentNode getACallback() { none() }
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
methodName = "expand" and
preservesValue = false and
(
@@ -3229,7 +3263,7 @@ private module StdlibPrivate {
override DataFlow::ArgumentNode getACallback() { none() }
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
exists(int offset |
// for non-compiled regex the first argument is the pattern, so we need to
// account for this difference
@@ -3534,8 +3568,10 @@ private module StdlibPrivate {
*/
module StringIO {
/** Gets a reference to the `io.StringIO` class. */
private API::Node classRef() {
API::Node classRef() {
result = API::moduleImport("io").getMember(["StringIO", "BytesIO"])
or
result = ModelOutput::getATypeNode("io.StringIO~Subclass").getASubclass*()
}
/**
@@ -3585,6 +3621,12 @@ private module StdlibPrivate {
// ---------------------------------------------------------------------------
// xml.etree.ElementTree
// ---------------------------------------------------------------------------
/** Gets a reference to the `xml.etree.ElementTree` class */
API::Node elementTreeClassRef() {
result = API::moduleImport("xml").getMember("etree").getMember("ElementTree").getASubclass*() or
result = ModelOutput::getATypeNode("xml.etree.ElementTree~Subclass").getASubclass*()
}
/**
* An instance of `xml.etree.ElementTree.ElementTree`.
*
@@ -3592,20 +3634,10 @@ private module StdlibPrivate {
*/
private API::Node elementTreeInstance() {
//parse to a tree
result =
API::moduleImport("xml")
.getMember("etree")
.getMember("ElementTree")
.getMember("parse")
.getReturn()
result = elementTreeClassRef().getMember("parse").getReturn()
or
// construct a tree without parsing
result =
API::moduleImport("xml")
.getMember("etree")
.getMember("ElementTree")
.getMember("ElementTree")
.getReturn()
result = elementTreeClassRef().getMember("ElementTree").getReturn()
}
/**
@@ -3618,21 +3650,9 @@ private module StdlibPrivate {
result = elementTreeInstance().getMember(["parse", "getroot"]).getReturn()
or
// parse directly to an element
result =
API::moduleImport("xml")
.getMember("etree")
.getMember("ElementTree")
.getMember(["fromstring", "fromstringlist", "XML"])
.getReturn()
result = elementTreeClassRef().getMember(["fromstring", "fromstringlist", "XML"]).getReturn()
or
result =
API::moduleImport("xml")
.getMember("etree")
.getMember("ElementTree")
.getMember("XMLParser")
.getReturn()
.getMember("close")
.getReturn()
result = elementTreeClassRef().getMember("XMLParser").getReturn().getMember("close").getReturn()
}
/**
@@ -3677,12 +3697,7 @@ private module StdlibPrivate {
/** A direct instantiation of `xml.etree` parsers. */
private class ClassInstantiation extends InstanceSource, DataFlow::CallCfgNode {
ClassInstantiation() {
this =
API::moduleImport("xml")
.getMember("etree")
.getMember("ElementTree")
.getMember(["XMLParser", "XMLPullParser"])
.getACall()
this = elementTreeClassRef().getMember(["XMLParser", "XMLPullParser"]).getACall()
}
}
@@ -3739,9 +3754,7 @@ private module StdlibPrivate {
private class XmlEtreeParsing extends DataFlow::CallCfgNode, XML::XmlParsing::Range {
XmlEtreeParsing() {
this =
API::moduleImport("xml")
.getMember("etree")
.getMember("ElementTree")
elementTreeClassRef()
.getMember(["fromstring", "fromstringlist", "XML", "XMLID", "parse", "iterparse"])
.getACall()
or
@@ -3789,12 +3802,7 @@ private module StdlibPrivate {
*/
private class FileAccessFromXmlEtreeParsing extends XmlEtreeParsing, FileSystemAccess::Range {
FileAccessFromXmlEtreeParsing() {
this =
API::moduleImport("xml")
.getMember("etree")
.getMember("ElementTree")
.getMember(["parse", "iterparse"])
.getACall()
this = elementTreeClassRef().getMember(["parse", "iterparse"]).getACall()
or
this = elementTreeInstance().getMember("parse").getACall()
// I considered whether we should try to reduce FPs from people passing file-like
@@ -4079,7 +4087,7 @@ private module StdlibPrivate {
result = API::builtin("dict").getAValueReachableFromSource()
}
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
exists(DataFlow::DictionaryElementContent dc, string key | key = dc.getKey() |
input = "Argument[0].DictionaryElement[" + key + "]" and
output = "ReturnValue.DictionaryElement[" + key + "]" and
@@ -4108,7 +4116,7 @@ private module StdlibPrivate {
result = API::builtin("list").getAValueReachableFromSource()
}
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
(
input = "Argument[0].ListElement"
or
@@ -4138,7 +4146,7 @@ private module StdlibPrivate {
result = API::builtin("tuple").getAValueReachableFromSource()
}
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
exists(DataFlow::TupleElementContent tc, int i | i = tc.getIndex() |
input = "Argument[0].TupleElement[" + i.toString() + "]" and
output = "ReturnValue.TupleElement[" + i.toString() + "]" and
@@ -4163,7 +4171,7 @@ private module StdlibPrivate {
result = API::builtin("set").getAValueReachableFromSource()
}
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
(
input = "Argument[0].ListElement"
or
@@ -4193,8 +4201,8 @@ private module StdlibPrivate {
result = API::builtin("frozenset").getAValueReachableFromSource()
}
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
any(SetSummary s).propagatesFlowExt(input, output, preservesValue)
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
any(SetSummary s).propagatesFlow(input, output, preservesValue)
}
}
@@ -4211,7 +4219,7 @@ private module StdlibPrivate {
result = API::builtin("reversed").getAValueReachableFromSource()
}
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
(
input = "Argument[0].ListElement"
or
@@ -4241,7 +4249,7 @@ private module StdlibPrivate {
result = API::builtin("sorted").getAValueReachableFromSource()
}
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
exists(string content |
content = "ListElement"
or
@@ -4273,7 +4281,7 @@ private module StdlibPrivate {
result = API::builtin("iter").getAValueReachableFromSource()
}
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
(
input = "Argument[0].ListElement"
or
@@ -4303,7 +4311,7 @@ private module StdlibPrivate {
result = API::builtin("next").getAValueReachableFromSource()
}
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
(
input = "Argument[0].ListElement"
or
@@ -4336,7 +4344,7 @@ private module StdlibPrivate {
override DataFlow::ArgumentNode getACallback() { none() }
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
exists(string content |
content = "ListElement"
or
@@ -4378,7 +4386,7 @@ private module StdlibPrivate {
override DataFlow::ArgumentNode getACallback() { none() }
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
input = "Argument[self].ListElement" and
output = "ReturnValue" and
preservesValue = true
@@ -4415,7 +4423,7 @@ private module StdlibPrivate {
override DataFlow::ArgumentNode getACallback() { none() }
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
input = "Argument[self].DictionaryElement[" + key + "]" and
output = "ReturnValue" and
preservesValue = true
@@ -4438,7 +4446,7 @@ private module StdlibPrivate {
override DataFlow::ArgumentNode getACallback() { none() }
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
input = "Argument[self].DictionaryElement[" + key + "]" and
output = "ReturnValue" and
preservesValue = true
@@ -4460,7 +4468,7 @@ private module StdlibPrivate {
override DataFlow::ArgumentNode getACallback() { none() }
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
// default value
input = "Argument[1]" and
output = "ReturnValue" and
@@ -4483,7 +4491,7 @@ private module StdlibPrivate {
override DataFlow::ArgumentNode getACallback() { none() }
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
exists(DataFlow::DictionaryElementContent dc, string key | key = dc.getKey() |
input = "Argument[self].DictionaryElement[" + key + "]" and
output = "ReturnValue.TupleElement[1]" and
@@ -4509,7 +4517,7 @@ private module StdlibPrivate {
result.(DataFlow::AttrRead).getAttributeName() = "setdefault"
}
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
// store/read steps with dictionary content of this is modeled in DataFlowPrivate
input = "Argument[1]" and
output = "ReturnValue" and
@@ -4538,7 +4546,7 @@ private module StdlibPrivate {
override DataFlow::ArgumentNode getACallback() { none() }
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
// If key is in the dictionary, return its value.
input = "Argument[self].DictionaryElement[" + key + "]" and
output = "ReturnValue" and
@@ -4567,7 +4575,7 @@ private module StdlibPrivate {
result.(DataFlow::AttrRead).getAttributeName() = "values"
}
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
exists(DataFlow::DictionaryElementContent dc, string key | key = dc.getKey() |
input = "Argument[self].DictionaryElement[" + key + "]" and
output = "ReturnValue.ListElement" and
@@ -4594,7 +4602,7 @@ private module StdlibPrivate {
result.(DataFlow::AttrRead).getAttributeName() = "keys"
}
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
// TODO: Once we have DictKeyContent, we need to transform that into ListElementContent
input = "Argument[self]" and
output = "ReturnValue" and
@@ -4618,7 +4626,7 @@ private module StdlibPrivate {
result.(DataFlow::AttrRead).getAttributeName() = "items"
}
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
exists(DataFlow::DictionaryElementContent dc, string key | key = dc.getKey() |
input = "Argument[self].DictionaryElement[" + key + "]" and
output = "ReturnValue.ListElement.TupleElement[1]" and
@@ -4648,7 +4656,7 @@ private module StdlibPrivate {
result.(DataFlow::AttrRead).getAttributeName() = "append"
}
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
// newly added element added to this
input = "Argument[0]" and
output = "Argument[self].ListElement" and
@@ -4675,7 +4683,7 @@ private module StdlibPrivate {
result.(DataFlow::AttrRead).getAttributeName() = "add"
}
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
// newly added element added to this
input = "Argument[0]" and
output = "Argument[self].SetElement" and
@@ -4705,7 +4713,7 @@ private module StdlibPrivate {
API::moduleImport("os").getMember(["getenv", "getenvb"]).getAValueReachableFromSource()
}
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
input in ["Argument[1]", "Argument[default:]"] and
output = "ReturnValue" and
preservesValue = true

View File

@@ -10,6 +10,7 @@
private import python
private import semmle.python.Concepts
private import semmle.python.ApiGraphs
private import semmle.python.security.dataflow.UrlRedirectCustomizations
/**
* Provides models for the `urllib` module, part of
@@ -70,4 +71,55 @@ private module Urllib {
}
}
}
/**
* Provides models for the `urllib.parse` extension library.
*/
module Parse {
/**
* A call to `urllib.parse.urlparse`.
*/
private DataFlow::CallCfgNode getUrlParseCall() {
result = API::moduleImport("urllib").getMember("parse").getMember("urlparse").getACall()
}
/**
* A read of the `netloc` attribute of a parsed URL as returned by `urllib.parse.urlparse`,
* which is being checked in a way that is relevant for URL redirection vulnerabilities.
*/
private predicate netlocCheck(DataFlow::GuardNode g, ControlFlowNode node, boolean branch) {
exists(DataFlow::CallCfgNode urlParseCall, DataFlow::AttrRead netlocRead |
urlParseCall = getUrlParseCall() and
netlocRead = urlParseCall.getAnAttributeRead("netloc") and
node = urlParseCall.getArg(0).asCfgNode()
|
// either a simple check of the netloc attribute
g = netlocRead.asCfgNode() and
branch = false
or
// or a comparison (we don't care against what)
exists(Compare cmp, string op |
cmp = g.getNode() and
op = unique(Cmpop opp | opp = cmp.getAnOp()).getSymbol() and
cmp.getASubExpression() = netlocRead.asExpr()
|
op in ["==", "is", "in"] and branch = true
or
op in ["!=", "is not", "not in"] and branch = false
)
)
}
/**
* A check of `urllib.parse.urlparse().netloc`, considered as a sanitizer-guard for URL redirection.
*/
private class NetlocCheck extends UrlRedirect::Sanitizer {
NetlocCheck() { this = DataFlow::BarrierGuard<netlocCheck/3>::getABarrierNode() }
override predicate sanitizes(UrlRedirect::FlowState state) {
// `urlparse` does not handle backslashes
state instanceof UrlRedirect::NoBackslashes
}
}
}
}

View File

@@ -12,6 +12,7 @@ private import semmle.python.ApiGraphs
private import semmle.python.regex
private import semmle.python.frameworks.Stdlib
private import semmle.python.frameworks.internal.InstanceTaintStepsHelper
private import semmle.python.frameworks.data.ModelsAsData
/**
* INTERNAL: Do not use.
@@ -87,7 +88,11 @@ module Tornado {
*/
module RequestHandler {
/** Gets a reference to the `tornado.web.RequestHandler` class or any subclass. */
API::Node subclassRef() { result = web().getMember("RequestHandler").getASubclass*() }
API::Node subclassRef() {
result = web().getMember("RequestHandler").getASubclass*()
or
result = ModelOutput::getATypeNode("tornado.web.RequestHandler~Subclass").getASubclass*()
}
/** A RequestHandler class (most likely in project code). */
class RequestHandlerClass extends Class {
@@ -213,7 +218,11 @@ module Tornado {
*/
module Application {
/** Gets a reference to the `tornado.web.Application` class. */
API::Node classRef() { result = web().getMember("Application") }
API::Node classRef() {
result = web().getMember("Application")
or
result = ModelOutput::getATypeNode("tornado.web.Application~Subclass").getASubclass*()
}
/**
* A source of instances of `tornado.web.Application`, extend this class to model new instances.
@@ -270,7 +279,12 @@ module Tornado {
*/
module HttpServerRequest {
/** Gets a reference to the `tornado.httputil.HttpServerRequest` class. */
API::Node classRef() { result = httputil().getMember("HttpServerRequest") }
API::Node classRef() {
result = httputil().getMember("HttpServerRequest")
or
result =
ModelOutput::getATypeNode("tornado.httputil.HttpServerRequest~Subclass").getASubclass*()
}
/**
* A source of instances of `tornado.httputil.HttpServerRequest`, extend this class to model new instances.

View File

@@ -9,15 +9,18 @@
private import python
private import semmle.python.Concepts
private import semmle.python.ApiGraphs
private import semmle.python.frameworks.data.ModelsAsData
/**
* INTERNAL: Do not use.
*
* Provides models for the `urllib3` PyPI package.
*
* See
* - https://pypi.org/project/urllib3/
* - https://urllib3.readthedocs.io/en/stable/reference/
*/
private module Urllib3 {
module Urllib3 {
/**
* Provides models for the `urllib3.request.RequestMethods` class and subclasses, such
* as the `urllib3.PoolManager` class
@@ -30,7 +33,7 @@ private module Urllib3 {
*/
module PoolManager {
/** Gets a reference to the `urllib3.PoolManager` class. */
private API::Node classRef() {
API::Node classRef() {
result =
API::moduleImport("urllib3")
.getMember(["PoolManager", "ProxyManager", "HTTPConnectionPool", "HTTPSConnectionPool"])
@@ -40,6 +43,8 @@ private module Urllib3 {
.getMember("request")
.getMember("RequestMethods")
.getASubclass+()
or
result = ModelOutput::getATypeNode("urllib3.PoolManager~Subclass").getASubclass*()
}
/**

View File

@@ -10,6 +10,7 @@ private import semmle.python.Concepts
private import semmle.python.ApiGraphs
private import semmle.python.frameworks.Multidict
private import semmle.python.frameworks.internal.InstanceTaintStepsHelper
private import semmle.python.security.dataflow.UrlRedirectCustomizations
/**
* INTERNAL: Do not use.
@@ -108,5 +109,32 @@ module Yarl {
this.(DataFlow::AttrRead).getAttributeName() = "query"
}
}
private predicate yarlUrlIsAbsoluteCall(
DataFlow::GuardNode g, ControlFlowNode node, boolean branch
) {
exists(ClassInstantiation instance, DataFlow::MethodCallNode call |
call.calls(instance, "is_absolute") and
g = call.asCfgNode() and
node = instance.getArg(0).asCfgNode() and
branch = false
)
}
/**
* A call to `yarl.URL.is_absolute`, considered as a sanitizer-guard for URL redirection.
*
* See https://yarl.aio-libs.org/en/latest/api/#absolute-and-relative-urls.
*/
private class YarlIsAbsoluteUrl extends UrlRedirect::Sanitizer {
YarlIsAbsoluteUrl() {
this = DataFlow::BarrierGuard<yarlUrlIsAbsoluteCall/3>::getABarrierNode()
}
override predicate sanitizes(UrlRedirect::FlowState state) {
// `is_absolute` does not handle backslashes
state instanceof UrlRedirect::NoBackslashes
}
}
}
}

View File

@@ -70,8 +70,8 @@ private module API = Specific::API;
private module DataFlow = Specific::DataFlow;
private import Specific::AccessPathSyntax
private import ApiGraphModelsExtensions as Extensions
private import codeql.dataflow.internal.AccessPathSyntax
/** Module containing hooks for providing input data to be interpreted as a model. */
module ModelInput {
@@ -327,29 +327,29 @@ predicate isRelevantFullPath(string type, string path) {
}
/** A string from a CSV row that should be parsed as an access path. */
private class AccessPathRange extends AccessPath::Range {
AccessPathRange() {
isRelevantFullPath(_, this)
or
exists(string type | isRelevantType(type) |
summaryModel(type, _, this, _, _) or
summaryModel(type, _, _, this, _)
)
or
typeVariableModel(_, this)
}
private predicate accessPathRange(string s) {
isRelevantFullPath(_, s)
or
exists(string type | isRelevantType(type) |
summaryModel(type, _, s, _, _) or
summaryModel(type, _, _, s, _)
)
or
typeVariableModel(_, s)
}
import AccessPath<accessPathRange/1>
/**
* Gets a successor of `node` in the API graph.
*/
bindingset[token]
API::Node getSuccessorFromNode(API::Node node, AccessPathToken token) {
API::Node getSuccessorFromNode(API::Node node, AccessPathTokenBase token) {
// API graphs use the same label for arguments and parameters. An edge originating from a
// use-node represents an argument, and an edge originating from a def-node represents a parameter.
// We just map both to the same thing.
token.getName() = ["Argument", "Parameter"] and
result = node.getParameter(AccessPath::parseIntUnbounded(token.getAnArgument()))
result = node.getParameter(parseIntUnbounded(token.getAnArgument()))
or
token.getName() = "ReturnValue" and
result = node.getReturn()
@@ -362,11 +362,9 @@ API::Node getSuccessorFromNode(API::Node node, AccessPathToken token) {
* Gets an API-graph successor for the given invocation.
*/
bindingset[token]
API::Node getSuccessorFromInvoke(Specific::InvokeNode invoke, AccessPathToken token) {
API::Node getSuccessorFromInvoke(Specific::InvokeNode invoke, AccessPathTokenBase token) {
token.getName() = "Argument" and
result =
invoke
.getParameter(AccessPath::parseIntWithArity(token.getAnArgument(), invoke.getNumArgument()))
result = invoke.getParameter(parseIntWithArity(token.getAnArgument(), invoke.getNumArgument()))
or
token.getName() = "ReturnValue" and
result = invoke.getReturn()
@@ -378,10 +376,12 @@ API::Node getSuccessorFromInvoke(Specific::InvokeNode invoke, AccessPathToken to
/**
* Holds if `invoke` invokes a call-site filter given by `token`.
*/
pragma[inline]
private predicate invocationMatchesCallSiteFilter(Specific::InvokeNode invoke, AccessPathToken token) {
bindingset[token]
private predicate invocationMatchesCallSiteFilter(
Specific::InvokeNode invoke, AccessPathTokenBase token
) {
token.getName() = "WithArity" and
invoke.getNumArgument() = AccessPath::parseIntUnbounded(token.getAnArgument())
invoke.getNumArgument() = parseIntUnbounded(token.getAnArgument())
or
Specific::invocationMatchesExtraCallSiteFilter(invoke, token)
}

View File

@@ -4,14 +4,14 @@
* It must export the following members:
* ```ql
* class Unit // a unit type
* module AccessPathSyntax // a re-export of the AccessPathSyntax module
*
* class InvokeNode // a type representing an invocation connected to the API graph
* module API // the API graph module
* predicate isPackageUsed(string package)
* API::Node getExtraNodeFromPath(string package, string type, string path, int n)
* API::Node getExtraSuccessorFromNode(API::Node node, AccessPathToken token)
* API::Node getExtraSuccessorFromInvoke(API::InvokeNode node, AccessPathToken token)
* predicate invocationMatchesExtraCallSiteFilter(API::InvokeNode invoke, AccessPathToken token)
* API::Node getExtraSuccessorFromNode(API::Node node, AccessPathTokenBase token)
* API::Node getExtraSuccessorFromInvoke(API::InvokeNode node, AccessPathTokenBase token)
* predicate invocationMatchesExtraCallSiteFilter(API::InvokeNode invoke, AccessPathTokenBase token)
* InvokeNode getAnInvocationOf(API::Node node)
* predicate isExtraValidTokenNameInIdentifyingAccessPath(string name)
* predicate isExtraValidNoArgumentTokenInIdentifyingAccessPath(string name)
@@ -21,11 +21,10 @@
private import python as PY
private import ApiGraphModels
private import codeql.dataflow.internal.AccessPathSyntax
import semmle.python.ApiGraphs::API as API
// Re-export libraries needed by ApiGraphModels.qll
import semmle.python.dataflow.new.internal.AccessPathSyntax as AccessPathSyntax
import semmle.python.dataflow.new.DataFlow::DataFlow as DataFlow
private import AccessPathSyntax
/**
* Holds if models describing `type` may be relevant for the analysis of this database.
@@ -49,7 +48,7 @@ API::Node getExtraNodeFromType(string type) { result = API::moduleImport(type) }
* Gets a Python-specific API graph successor of `node` reachable by resolving `token`.
*/
bindingset[token]
API::Node getExtraSuccessorFromNode(API::Node node, AccessPathToken token) {
API::Node getExtraSuccessorFromNode(API::Node node, AccessPathTokenBase token) {
token.getName() = "Member" and
result = node.getMember(token.getAnArgument())
or
@@ -89,7 +88,7 @@ API::Node getExtraSuccessorFromNode(API::Node node, AccessPathToken token) {
* Gets a Python-specific API graph successor of `node` reachable by resolving `token`.
*/
bindingset[token]
API::Node getExtraSuccessorFromInvoke(API::CallNode node, AccessPathToken token) {
API::Node getExtraSuccessorFromInvoke(API::CallNode node, AccessPathTokenBase token) {
token.getName() = "Instance" and
result = node.getReturn()
or
@@ -129,7 +128,7 @@ API::Node getAFuzzySuccessor(API::Node node) {
* Holds if `invoke` matches the PY-specific call site filter in `token`.
*/
bindingset[token]
predicate invocationMatchesExtraCallSiteFilter(API::CallNode invoke, AccessPathToken token) {
predicate invocationMatchesExtraCallSiteFilter(API::CallNode invoke, AccessPathTokenBase token) {
token.getName() = "Call" and exists(invoke) // there is only one kind of call in Python.
}

File diff suppressed because it is too large Load Diff

View File

@@ -6,6 +6,7 @@
private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.internal.DataFlowDispatch
/**
* INTERNAL: Do not use.
@@ -16,13 +17,16 @@ abstract class SelfRefMixin extends Class {
/**
* Gets a reference to instances of this class, originating from a self parameter of
* a method defined on this class.
*
* Note: TODO: This doesn't take MRO into account
* Note: TODO: This doesn't take staticmethod/classmethod into account
*/
private DataFlow::TypeTrackingNode getASelfRef(DataFlow::TypeTracker t) {
t.start() and
result.(DataFlow::ParameterNode).getParameter() = this.getAMethod().getArg(0)
exists(Class cls, Function meth |
cls = getADirectSuperclass*(this) and
meth = cls.getAMethod() and
not isStaticmethod(meth) and
not isClassmethod(meth) and
result.(DataFlow::ParameterNode).getParameter() = meth.getArg(0)
)
or
exists(DataFlow::TypeTracker t2 | result = this.getASelfRef(t2).track(t2, t))
}
@@ -30,9 +34,6 @@ abstract class SelfRefMixin extends Class {
/**
* Gets a reference to instances of this class, originating from a self parameter of
* a method defined on this class.
*
* Note: TODO: This doesn't take MRO into account
* Note: TODO: This doesn't take staticmethod/classmethod into account
*/
DataFlow::Node getASelfRef() { this.getASelfRef(DataFlow::TypeTracker::end()).flowsTo(result) }
}

View File

@@ -7,32 +7,23 @@
private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.internal.ImportResolution
private import semmle.python.ApiGraphs
private import semmle.python.filters.Tests
private import semmle.python.Module
// very much inspired by the draft at https://github.com/github/codeql/pull/5632
private module NotExposed {
module NotExposed {
// Instructions:
// This needs to be automated better, but for this prototype, here are some rough instructions:
// 0) get a database of the library you are about to model
// 1) fill out the `getAlreadyModeledClass` body below
// 2) quick-eval the `quickEvalMe` predicate below, and copy the output to your modeling predicate
class MySpec extends FindSubclassesSpec {
MySpec() { this = "MySpec" }
override API::Node getAlreadyModeledClass() {
// FILL ME OUT ! (but don't commit with any changes)
none()
// for example
// result = API::moduleImport("rest_framework").getMember("views").getMember("APIView")
}
}
predicate quickEvalMe(string newImport) {
newImport =
"// imports generated by python/frameworks/internal/SubclassFinder.qll\n" + "this = API::" +
concat(string newModelFullyQualified |
newModel(any(MySpec spec), newModelFullyQualified, _, _, _)
newModel(any(FindSubclassesSpec spec), newModelFullyQualified, _, _, _)
|
fullyQualifiedToApiGraphPath(newModelFullyQualified), " or this = API::"
)
@@ -75,14 +66,31 @@ private module NotExposed {
bindingset[this]
abstract class FindSubclassesSpec extends string {
/**
* Gets an API node for a class that has already been modeled. You can include
* `.getASubclass*()` without causing problems, but it is not needed.
*/
abstract API::Node getAlreadyModeledClass();
/**
* Gets the fully qualified name that this spec represents.
*
* This should be implemented by all classes for which `getSuperClass` is
* implemented, at least if they are defined in a different module than what they
* subclass.
*/
string getFullyQualifiedName() { none() }
FindSubclassesSpec getSuperClass() { none() }
final FindSubclassesSpec getSubClass() { result.getSuperClass() = this }
}
/**
* Holds if `newModelFullyQualified` describes either a new subclass, or a new alias, belonging to `spec` that we should include in our automated modeling.
* This new element is defined by `ast`, which is defined at `loc` in the module `mod`.
*/
query predicate newModel(
predicate newModel(
FindSubclassesSpec spec, string newModelFullyQualified, AstNode ast, Module mod, Location loc
) {
(
@@ -90,7 +98,8 @@ private module NotExposed {
or
newDirectAlias(spec, newModelFullyQualified, ast, mod, loc)
or
newImportStar(spec, newModelFullyQualified, ast, mod, _, _, loc)
newImportAlias(spec, newModelFullyQualified, mod, _, _, loc) and
ast = mod
)
}
@@ -103,15 +112,34 @@ private module NotExposed {
)
}
/**
* Holds if `fullyQualifiedName` is already explicitly modeled in the `spec`.
*
* For specs that do `.getASubclass*()`, items found by following a `.getASubclass`
* edge will not be considered explicitly modeled.
*/
bindingset[fullyQualifiedName]
predicate alreadyModeled(FindSubclassesSpec spec, string fullyQualifiedName) {
predicate alreadyExplicitlyModeled(FindSubclassesSpec spec, string fullyQualifiedName) {
fullyQualifiedToApiGraphPath(fullyQualifiedName) = spec.getAlreadyModeledClass().getPath()
}
predicate isNonTestProjectCode(AstNode ast) {
not ast.getScope*() instanceof TestScope and
not ast.getLocation().getFile().getRelativePath().matches("tests/%") and
exists(ast.getLocation().getFile().getRelativePath())
predicate isAllowedModule(Module mod) {
// for tests
mod.getName() = "find_subclass_test"
or
// don't include anything found in site-packages
exists(mod.getFile().getRelativePath()) and
not mod.getFile().getRelativePath().regexpMatch("(?i)((^|/)examples?|^docs)/.*") and
// to counter things like `my-example/app/foo.py` being allowed under `app.foo`
forall(string part | part = mod.getFile().getParent().getRelativePath().splitAt("/") |
legalShortName(part)
)
}
predicate isTestCode(AstNode ast) {
ast.getScope*() instanceof TestScope
or
ast.getLocation().getFile().getRelativePath().matches("tests/%")
}
predicate hasAllStatement(Module mod) {
@@ -144,40 +172,47 @@ private module NotExposed {
* ```
*/
predicate newDirectAlias(
FindSubclassesSpec spec, string newAliasFullyQualified, ImportMember importMember, Module mod,
Location loc
FindSubclassesSpec spec, string newAliasFullyQualified, Expr value, Module mod, Location loc
) {
importMember = newOrExistingModeling(spec).getAValueReachableFromSource().asExpr() and
importMember.getScope() = mod and
loc = importMember.getLocation() and
(
mod.isPackageInit() and
newAliasFullyQualified = mod.getPackageName() + "." + importMember.getName()
or
not mod.isPackageInit() and
newAliasFullyQualified = mod.getName() + "." + importMember.getName()
) and
(
not hasAllStatement(mod)
or
mod.declaredInAll(importMember.getName())
) and
not alreadyModeled(spec, newAliasFullyQualified) and
isNonTestProjectCode(importMember)
exists(Alias alias | value = alias.getValue() |
value = newOrExistingModeling(spec).getASubclass*().getAValueReachableFromSource().asExpr() and
value.getScope() = mod and
loc = value.getLocation() and
exists(string base |
mod.isPackageInit() and base = mod.getPackageName()
or
not mod.isPackageInit() and base = mod.getName()
|
newAliasFullyQualified = base + "." + alias.getAsname().(Name).getId()
) and
(
not hasAllStatement(mod)
or
mod.declaredInAll(alias.getAsname().(Name).getId())
) and
not alreadyExplicitlyModeled(spec, newAliasFullyQualified) and
not isTestCode(value) and
isAllowedModule(mod)
)
}
/** same as `newDirectAlias` predicate, but handling `from <module> import *`, considering all `<member>`, where `<module>.<member>` belongs to `spec`. */
predicate newImportStar(
FindSubclassesSpec spec, string newAliasFullyQualified, ImportStar importStar, Module mod,
API::Node relevantClass, string relevantName, Location loc
/**
* same as `newDirectAlias` predicate, but written in a generic way to handle any import (also import *).
*
* it might be safe to delete `newDirectAlias` with this in place, but have not done the testing yet.
*/
predicate newImportAlias(
FindSubclassesSpec spec, string newAliasFullyQualified, Module mod, DataFlow::Node def,
string relevantName, Location loc
) {
relevantClass = newOrExistingModeling(spec) and
loc = importStar.getLocation() and
importStar.getScope() = mod and
// WHAT A HACK :D :D
relevantClass.getPath() =
relevantClass.getAPredecessor().getPath() + ".getMember(\"" + relevantName + "\")" and
relevantClass.getAPredecessor().getAValueReachableFromSource().asExpr() = importStar.getModule() and
loc = mod.getLocation() and
exists(API::Node relevantClass, ControlFlowNode value |
relevantClass = newOrExistingModeling(spec).getASubclass*() and
ImportResolution::module_export(mod, relevantName, def) and
value = relevantClass.getAValueReachableFromSource().asCfgNode() and
value = def.asCfgNode()
// value could be a ClassExpr if a new class is defined, or a Name if defining an alias
) and
(
mod.isPackageInit() and
newAliasFullyQualified = mod.getPackageName() + "." + relevantName
@@ -190,8 +225,9 @@ private module NotExposed {
or
mod.declaredInAll(relevantName)
) and
not alreadyModeled(spec, newAliasFullyQualified) and
isNonTestProjectCode(importStar)
not alreadyExplicitlyModeled(spec, newAliasFullyQualified) and
not isTestCode(mod) and
isAllowedModule(mod)
}
/** Holds if `classExpr` defines a new subclass that belongs to `spec`, which has the fully qualified name `newSubclassQualified`. */
@@ -203,7 +239,8 @@ private module NotExposed {
classExpr.getScope() = mod and
newSubclassQualified = mod.getName() + "." + classExpr.getName() and
loc = classExpr.getLocation() and
not alreadyModeled(spec, newSubclassQualified) and
isNonTestProjectCode(classExpr)
not alreadyExplicitlyModeled(spec, newSubclassQualified) and
not isTestCode(classExpr) and
isAllowedModule(mod)
}
}

View File

@@ -111,6 +111,7 @@ module Stages {
predicate ref() { 1 = 1 }
private import semmle.python.dataflow.new.DataFlow::DataFlow as NewDataFlow
private import semmle.python.dataflow.new.internal.TypeTrackingImpl as TypeTrackingImpl
private import semmle.python.ApiGraphs::API as API
/**
@@ -121,7 +122,7 @@ module Stages {
predicate backref() {
1 = 1
or
exists(any(NewDataFlow::TypeTracker t).append(_))
exists(TypeTrackingImpl::append(_, _))
or
exists(any(API::Node n).getAMember().getAValueReachableFromSource())
}

View File

@@ -333,36 +333,6 @@ predicate call3(
arg2 = call.getArg(2)
}
bindingset[self, function]
deprecated predicate method_binding(
AttrNode instantiation, ObjectInternal self, CallableObjectInternal function,
PointsToContext context
) {
exists(ObjectInternal obj, string name | receiver(instantiation, context, obj, name) |
exists(ObjectInternal cls |
cls = obj.getClass() and
cls != ObjectInternal::superType() and
cls.attribute(name, function, _) and
self = obj
)
or
exists(SuperInstance sup, ClassObjectInternal decl |
sup = obj and
decl = Types::getMro(self.getClass()).startingAt(sup.getStartClass()).findDeclaringClass(name) and
Types::declaredAttribute(decl, name, function, _) and
self = sup.getSelf()
)
)
}
/** Helper for method_binding */
pragma[noinline]
deprecated predicate receiver(
AttrNode instantiation, PointsToContext context, ObjectInternal obj, string name
) {
PointsToInternal::pointsTo(instantiation.getObject(name), context, obj, _)
}
/** Helper self parameters: `def meth(self, ...): ...`. */
pragma[noinline]
private predicate self_parameter(

View File

@@ -13,123 +13,6 @@ import semmle.python.essa.SsaDefinitions
private import semmle.python.types.Builtins
private import semmle.python.internal.CachedStages
deprecated module BasePointsTo {
/** INTERNAL -- Use n.refersTo(value, _, origin) instead */
pragma[noinline]
predicate points_to(ControlFlowNode f, Object value, ControlFlowNode origin) {
(
f.isLiteral() and value = f and not f.getNode() instanceof ImmutableLiteral
or
f.isFunction() and value = f
) and
origin = f
}
}
/** Gets the kwargs parameter (`**kwargs`). In a function definition this is always a dict. */
deprecated predicate kwargs_points_to(ControlFlowNode f, ClassObject cls) {
exists(Function func | func.getKwarg() = f.getNode()) and
cls = theDictType()
}
/** Gets the varargs parameter (`*varargs`). In a function definition this is always a tuple. */
deprecated predicate varargs_points_to(ControlFlowNode f, ClassObject cls) {
exists(Function func | func.getVararg() = f.getNode()) and
cls = theTupleType()
}
/**
* Gets the class of the object for simple cases, namely constants, functions,
* comprehensions and built-in objects.
*
* This exists primarily for internal use. Use getAnInferredType() instead.
*/
pragma[noinline]
deprecated ClassObject simple_types(Object obj) {
result = comprehension(obj.getOrigin())
or
result = collection_literal(obj.getOrigin())
or
obj.getOrigin() instanceof CallableExpr and result = thePyFunctionType()
or
obj.getOrigin() instanceof Module and result = theModuleType()
or
result.asBuiltin() = obj.asBuiltin().getClass()
or
obj = unknownValue() and result = theUnknownType()
}
deprecated private ClassObject comprehension(Expr e) {
e instanceof ListComp and result = theListType()
or
e instanceof SetComp and result = theSetType()
or
e instanceof DictComp and result = theDictType()
or
e instanceof GeneratorExp and result = theGeneratorType()
}
deprecated private ClassObject collection_literal(Expr e) {
e instanceof List and result = theListType()
or
e instanceof Set and result = theSetType()
or
e instanceof Dict and result = theDictType()
or
e instanceof Tuple and result = theTupleType()
}
deprecated private int tuple_index_value(Object t, int i) {
result = t.(TupleNode).getElement(i).getNode().(Num).getN().toInt()
or
exists(Object item |
py_citems(t, i, item) and
result = item.(NumericObject).intValue()
)
}
pragma[noinline]
deprecated int version_tuple_value(Object t) {
not exists(tuple_index_value(t, 1)) and result = tuple_index_value(t, 0) * 10
or
not exists(tuple_index_value(t, 2)) and
result = tuple_index_value(t, 0) * 10 + tuple_index_value(t, 1)
or
tuple_index_value(t, 2) = 0 and result = tuple_index_value(t, 0) * 10 + tuple_index_value(t, 1)
or
tuple_index_value(t, 2) > 0 and
result = tuple_index_value(t, 0) * 10 + tuple_index_value(t, 1) + 1
}
/** Choose a version numbers that represent the extreme of supported versions. */
deprecated private int major_minor() {
if major_version() = 3
then (
result = 33 or result = 37
) else (
// 3.3 to 3.7
result = 25 or result = 27
) // 2.5 to 2.7
}
/** Compares the given tuple object to both the maximum and minimum possible sys.version_info values */
deprecated int version_tuple_compare(Object t) {
version_tuple_value(t) < major_minor() and result = -1
or
version_tuple_value(t) = major_minor() and result = 0
or
version_tuple_value(t) > major_minor() and result = 1
}
/** Holds if `cls` is a new-style class if it were to have no explicit base classes */
deprecated predicate baseless_is_new_style(ClassObject cls) {
cls.isBuiltin()
or
major_version() = 3 and exists(cls)
or
exists(cls.declaredMetaClass())
}
/*
* The following predicates exist in order to provide
* more precise type information than the underlying
@@ -159,49 +42,6 @@ private predicate class_defines_name(Class cls, string name) {
exists(SsaVariable var | name = var.getId() and var.getAUse() = cls.getANormalExit())
}
/** Gets a return value CFG node, provided that is safe to track across returns */
deprecated ControlFlowNode safe_return_node(PyFunctionObject func) {
result = func.getAReturnedNode() and
// Not a parameter
not exists(Parameter p, SsaVariable pvar |
p.asName().getAFlowNode() = pvar.getDefinition() and
result = pvar.getAUse()
) and
// No alternatives
not exists(ControlFlowNode branch | branch.isBranch() and branch.getScope() = func.getFunction())
}
/** Holds if it can be determined from the control flow graph alone that this function can never return */
deprecated predicate function_can_never_return(FunctionObject func) {
/*
* A Python function never returns if it has no normal exits that are not dominated by a
* call to a function which itself never returns.
*/
exists(Function f |
f = func.getFunction() and
not exists(f.getAnExitNode())
)
or
func = ModuleObject::named("sys").attr("exit")
}
/** Hold if outer contains inner, both are contained within a test and inner is a use is a plain use or an attribute lookup */
pragma[noinline]
deprecated predicate contains_interesting_expression_within_test(
ControlFlowNode outer, ControlFlowNode inner
) {
inner.isLoad() and
exists(ControlFlowNode test |
outer.getAChild*() = inner and
test_contains(test, outer) and
test_contains(test, inner)
|
inner instanceof NameNode or
inner instanceof AttrNode
)
}
/** Hold if `expr` is a test (a branch) and `use` is within that test */
predicate test_contains(ControlFlowNode expr, ControlFlowNode use) {
expr.getNode() instanceof Expr and
@@ -209,31 +49,6 @@ predicate test_contains(ControlFlowNode expr, ControlFlowNode use) {
expr.getAChild*() = use
}
/** Holds if `test` is a test (a branch), `use` is within that test and `def` is an edge from that test with `sense` */
deprecated predicate refinement_test(
ControlFlowNode test, ControlFlowNode use, boolean sense, PyEdgeRefinement def
) {
/*
* Because calls such as `len` may create a new variable, we need to go via the source variable
* That is perfectly safe as we are only dealing with calls that do not mutate their arguments.
*/
use = def.getInput().getSourceVariable().(Variable).getAUse() and
test = def.getPredecessor().getLastNode() and
test_contains(test, use) and
sense = def.getSense()
}
/** Holds if `f` is an import of the form `from .[...] import name` and the enclosing scope is an __init__ module */
pragma[noinline]
deprecated predicate live_import_from_dot_in_init(ImportMemberNode f, EssaVariable var) {
exists(string name |
import_from_dot_in_init(f.getModule(name)) and
var.getSourceVariable().getName() = name and
var.getAUse() = f
)
}
/** Holds if `f` is an import of the form `from .[...] import ...` and the enclosing scope is an __init__ module */
predicate import_from_dot_in_init(ImportExprNode f) {
f.getScope() = any(Module m).getInitModule() and
@@ -251,30 +66,6 @@ Object undefinedVariable() { py_special_objects(result, "_semmle_undefined_value
/** Gets the pseudo-object representing an unknown value */
Object unknownValue() { result.asBuiltin() = Builtin::unknown() }
deprecated BuiltinCallable theTypeNewMethod() {
result.asBuiltin() = theTypeType().asBuiltin().getMember("__new__")
}
/** Gets the `value, cls, origin` that `f` would refer to if it has not been assigned some other value */
pragma[noinline]
deprecated predicate potential_builtin_points_to(
NameNode f, Object value, ClassObject cls, ControlFlowNode origin
) {
f.isGlobal() and
f.isLoad() and
origin = f and
(
builtin_name_points_to(f.getId(), value, cls)
or
not exists(Object::builtin(f.getId())) and value = unknownValue() and cls = theUnknownType()
)
}
pragma[noinline]
deprecated predicate builtin_name_points_to(string name, Object value, ClassObject cls) {
value = Object::builtin(name) and cls.asBuiltin() = value.asBuiltin().getClass()
}
pragma[nomagic]
private predicate essa_var_scope(SsaSourceVariable var, Scope pred_scope, EssaVariable pred_var) {
BaseFlow::reaches_exit(pred_var) and
@@ -331,48 +122,3 @@ module BaseFlow {
scope_entry_value_transfer_through_init(pred_var, pred_scope, succ_def, succ_scope)
}
}
/** Points-to for syntactic elements where context is not relevant */
deprecated predicate simple_points_to(
ControlFlowNode f, Object value, ClassObject cls, ControlFlowNode origin
) {
kwargs_points_to(f, cls) and value = f and origin = f
or
varargs_points_to(f, cls) and value = f and origin = f
or
BasePointsTo::points_to(f, value, origin) and cls = simple_types(value)
or
value = f.getNode().(ImmutableLiteral).getLiteralObject() and
cls = simple_types(value) and
origin = f
}
/**
* Holds if `bit` is a binary expression node with a bitwise operator.
* Helper for `this_binary_expr_points_to`.
*/
deprecated predicate bitwise_expression_node(
BinaryExprNode bit, ControlFlowNode left, ControlFlowNode right
) {
exists(Operator op | op = bit.getNode().getOp() |
op instanceof BitAnd or
op instanceof BitOr or
op instanceof BitXor
) and
left = bit.getLeft() and
right = bit.getRight()
}
deprecated private Module theCollectionsAbcModule() {
result.getName() = "_abcoll"
or
result.getName() = "_collections_abc"
}
deprecated ClassObject collectionsAbcClass(string name) {
exists(Class cls |
result.getPyClass() = cls and
cls.getName() = name and
cls.getScope() = theCollectionsAbcModule()
)
}

View File

@@ -12,12 +12,6 @@ predicate hasattr(CallNode c, ControlFlowNode obj, string attr) {
c.getArg(1).getNode().(StrConst).getText() = attr
}
/** Holds if `c` is a call to `callable(obj)`. */
deprecated predicate is_callable(CallNode c, ControlFlowNode obj) {
c.getFunction().(NameNode).getId() = "callable" and
obj = c.getArg(0)
}
/** Holds if `c` is a call to `isinstance(use, cls)`. */
predicate isinstance(CallNode fc, ControlFlowNode cls, ControlFlowNode use) {
fc.getFunction().(NameNode).getId() = "isinstance" and
@@ -25,13 +19,6 @@ predicate isinstance(CallNode fc, ControlFlowNode cls, ControlFlowNode use) {
fc.getArg(0) = use
}
/** Holds if `c` is a call to `issubclass(use, cls)`. */
deprecated predicate issubclass(CallNode fc, ControlFlowNode cls, ControlFlowNode use) {
fc.getFunction().(NameNode).getId() = "issubclass" and
fc.getArg(0) = use and
cls = fc.getArg(1)
}
/** Holds if `c` is a test comparing `x` and `y`. `is` is true if the operator is `is` or `==`, it is false if the operator is `is not` or `!=`. */
predicate equality_test(CompareNode c, ControlFlowNode x, boolean is, ControlFlowNode y) {
exists(Cmpop op |

View File

@@ -122,10 +122,6 @@ private newtype TPointsToContext =
} or
TObjectContext(SelfInstanceInternal object)
deprecated module Context {
PointsToContext forObject(ObjectInternal object) { result = TObjectContext(object) }
}
/**
* A points-to context. Context can be one of:
* * "main": Used for scripts.

View File

@@ -1,19 +0,0 @@
import python
/** Retained for backwards compatibility use ClassObject.isIterator() instead. */
deprecated predicate is_iterator(ClassObject c) { c.isIterator() }
/** Retained for backwards compatibility use ClassObject.isIterable() instead. */
deprecated predicate is_iterable(ClassObject c) { c.isIterable() }
/** Retained for backwards compatibility use ClassObject.isCollection() instead. */
deprecated predicate is_collection(ClassObject c) { c.isCollection() }
/** Retained for backwards compatibility use ClassObject.isMapping() instead. */
deprecated predicate is_mapping(ClassObject c) { c.isMapping() }
/** Retained for backwards compatibility use ClassObject.isSequence() instead. */
deprecated predicate is_sequence(ClassObject c) { c.isSequence() }
/** Retained for backwards compatibility use ClassObject.isContextManager() instead. */
deprecated predicate is_context_manager(ClassObject c) { c.isContextManager() }

View File

@@ -1,8 +0,0 @@
/**
* Provides predicates for reasoning about bad tag filter vulnerabilities.
*/
private import semmle.python.RegexTreeView::RegexTreeView as TreeView
// BadTagFilterQuery should be used directly from the shared pack, and not from this file.
deprecated import codeql.regex.nfa.BadTagFilterQuery::Make<TreeView> as Dep
import Dep

View File

@@ -1,8 +0,0 @@
/**
* Classes and predicates for working with suspicious character ranges.
*/
private import semmle.python.RegexTreeView::RegexTreeView as TreeView
// OverlyLargeRangeQuery should be used directly from the shared pack, and not from this file.
deprecated import codeql.regex.OverlyLargeRangeQuery::Make<TreeView> as Dep
import Dep

View File

@@ -28,8 +28,8 @@ private class TracebackFunctionCall extends ExceptionInfo, DataFlow::CallCfgNode
/** A caught exception. */
private class CaughtException extends ExceptionInfo {
CaughtException() {
this.asVar().getDefinition().(EssaNodeDefinition).getDefiningNode().getNode() =
any(ExceptStmt s).getName()
this.asExpr() = any(ExceptStmt s).getName() and
this.asCfgNode() = any(EssaNodeDefinition def).getDefiningNode()
}
}

View File

@@ -71,11 +71,11 @@ module PathInjection {
// ```
//
// The same approach is used in the command injection query.
not exists(Module pathlib |
pathlib.getName() = "pathlib" and
this.getScope().getEnclosingModule() = pathlib and
// do allow this call if we're analyzing pathlib.py as part of CPython though
not exists(pathlib.getFile().getRelativePath())
not exists(Module inStdlib |
inStdlib.getName() in ["pathlib", "os"] and
this.getScope().getEnclosingModule() = inStdlib and
// do allow this call if we're analyzing, say, pathlib.py as part of CPython though
not exists(inStdlib.getFile().getRelativePath())
)
}
}

View File

@@ -63,13 +63,18 @@ deprecated class Configuration extends TaintTracking::Configuration {
}
}
abstract private class NormalizationState extends string {
bindingset[this]
NormalizationState() { any() }
}
/** A state signifying that the file path has not been normalized. */
class NotNormalized extends DataFlow::FlowState {
class NotNormalized extends NormalizationState {
NotNormalized() { this = "NotNormalized" }
}
/** A state signifying that the file path has been normalized, but not checked. */
class NormalizedUnchecked extends DataFlow::FlowState {
class NormalizedUnchecked extends NormalizationState {
NormalizedUnchecked() { this = "NormalizedUnchecked" }
}
@@ -85,7 +90,7 @@ class NormalizedUnchecked extends DataFlow::FlowState {
* Such checks are ineffective in the `NotNormalized` state.
*/
module PathInjectionConfig implements DataFlow::StateConfigSig {
class FlowState = DataFlow::FlowState;
class FlowState = NormalizationState;
predicate isSource(DataFlow::Node source, FlowState state) {
source instanceof Source and state instanceof NotNormalized

View File

@@ -84,26 +84,19 @@ module TarSlip {
* A sink capturing method calls to `extractall`.
*
* For a call to `file.extractall`, `file` is considered a sink if
*
* - there are no other arguments, or
* - there are other arguments (except `members`), and the extraction filter is unsafe.
* there is no `members` argument and the extraction filter is unsafe.
*/
class ExtractAllSink extends Sink {
ExtractAllSink() {
exists(DataFlow::CallCfgNode call |
exists(API::CallNode call |
call =
API::moduleImport("tarfile")
.getMember("open")
.getReturn()
.getMember("extractall")
.getACall() and
(
not exists(call.getArg(_)) and
not exists(call.getArgByName(_))
or
hasUnsafeFilter(call)
) and
not exists(call.getArgByName("members")) and
hasUnsafeFilter(call) and
not exists(call.getParameter(2, "members")) and
this = call.(DataFlow::MethodCallNode).getObject()
)
}

View File

@@ -50,7 +50,7 @@ module UnsafeShellCommandConstruction {
source = backtrackShellExec(TypeTracker::TypeBackTracker::end(), shellExec)
}
import semmle.python.dataflow.new.TypeTracker as TypeTracker
import semmle.python.dataflow.new.TypeTracking as TypeTracker
private DataFlow::LocalSourceNode backtrackShellExec(
TypeTracker::TypeBackTracker t, Concepts::SystemCommandExecution shellExec

View File

@@ -16,6 +16,29 @@ private import semmle.python.dataflow.new.BarrierGuards
* vulnerabilities, as well as extension points for adding your own.
*/
module UrlRedirect {
/**
* A state value to track whether the untrusted data may contain backslashes.
*/
abstract class FlowState extends string {
bindingset[this]
FlowState() { any() }
}
/**
* A state value signifying that the untrusted data may contain backslashes.
*/
class MayContainBackslashes extends UrlRedirect::FlowState {
MayContainBackslashes() { this = "MayContainBackslashes" }
}
/**
* A state value signifying that any backslashes in the untrusted data have
* been eliminated, but no other sanitization has happened.
*/
class NoBackslashes extends UrlRedirect::FlowState {
NoBackslashes() { this = "NoBackslashes" }
}
/**
* A data flow source for "URL redirection" vulnerabilities.
*/
@@ -29,7 +52,28 @@ module UrlRedirect {
/**
* A sanitizer for "URL redirection" vulnerabilities.
*/
abstract class Sanitizer extends DataFlow::Node { }
abstract class Sanitizer extends DataFlow::Node {
/**
* Holds if this sanitizer sanitizes flow in the given state.
*/
abstract predicate sanitizes(FlowState state);
}
/**
* An additional flow step for "URL redirection" vulnerabilities.
*/
abstract class AdditionalFlowStep extends DataFlow::Node {
/**
* Holds if there should be an additional flow step from `nodeFrom` in `stateFrom`
* to `nodeTo` in `stateTo`.
*
* For example, a call to `replace` that replaces backslashes with forward slashes
* takes flow from `MayContainBackslashes` to `NoBackslashes`.
*/
abstract predicate step(
DataFlow::Node nodeFrom, FlowState stateFrom, DataFlow::Node nodeTo, FlowState stateTo
);
}
/**
* A source of remote user input, considered as a flow source.
@@ -57,10 +101,46 @@ module UrlRedirect {
string_concat.getRight() = this.asCfgNode()
)
}
override predicate sanitizes(FlowState state) {
// sanitize all flow states
any()
}
}
/**
* A call that replaces backslashes with forward slashes or eliminates them
* altogether, considered as a partial sanitizer, as well as an additional
* flow step.
*/
class ReplaceBackslashesSanitizer extends Sanitizer, AdditionalFlowStep, DataFlow::MethodCallNode {
DataFlow::Node receiver;
ReplaceBackslashesSanitizer() {
this.calls(receiver, "replace") and
this.getArg(0).asExpr().(StrConst).getText() = "\\" and
this.getArg(1).asExpr().(StrConst).getText() in ["/", ""]
}
override predicate sanitizes(FlowState state) { state instanceof MayContainBackslashes }
override predicate step(
DataFlow::Node nodeFrom, FlowState stateFrom, DataFlow::Node nodeTo, FlowState stateTo
) {
nodeFrom = receiver and
stateFrom instanceof MayContainBackslashes and
nodeTo = this and
stateTo instanceof NoBackslashes
}
}
/**
* A comparison with a constant string, considered as a sanitizer-guard.
*/
class StringConstCompareAsSanitizerGuard extends Sanitizer, StringConstCompareBarrier { }
class StringConstCompareAsSanitizerGuard extends Sanitizer, StringConstCompareBarrier {
override predicate sanitizes(FlowState state) {
// sanitize all flow states
any()
}
}
}

View File

@@ -9,7 +9,7 @@
private import python
import semmle.python.dataflow.new.DataFlow
import semmle.python.dataflow.new.TaintTracking
import UrlRedirectCustomizations::UrlRedirect
import UrlRedirectCustomizations::UrlRedirect as UrlRedirect
/**
* DEPRECATED: Use `UrlRedirectFlow` module instead.
@@ -19,20 +19,48 @@ import UrlRedirectCustomizations::UrlRedirect
deprecated class Configuration extends TaintTracking::Configuration {
Configuration() { this = "UrlRedirect" }
override predicate isSource(DataFlow::Node source) { source instanceof Source }
override predicate isSource(DataFlow::Node source, DataFlow::FlowState state) {
source instanceof UrlRedirect::Source and state instanceof UrlRedirect::MayContainBackslashes
}
override predicate isSink(DataFlow::Node sink) { sink instanceof Sink }
override predicate isSink(DataFlow::Node sink, DataFlow::FlowState state) {
sink instanceof UrlRedirect::Sink and state instanceof UrlRedirect::FlowState
}
override predicate isSanitizer(DataFlow::Node node) { node instanceof Sanitizer }
override predicate isSanitizer(DataFlow::Node node, DataFlow::FlowState state) {
node.(UrlRedirect::Sanitizer).sanitizes(state)
}
override predicate isAdditionalTaintStep(
DataFlow::Node nodeFrom, DataFlow::FlowState stateFrom, DataFlow::Node nodeTo,
DataFlow::FlowState stateTo
) {
any(UrlRedirect::AdditionalFlowStep a).step(nodeFrom, stateFrom, nodeTo, stateTo)
}
}
private module UrlRedirectConfig implements DataFlow::ConfigSig {
predicate isSource(DataFlow::Node source) { source instanceof Source }
private module UrlRedirectConfig implements DataFlow::StateConfigSig {
class FlowState = UrlRedirect::FlowState;
predicate isSink(DataFlow::Node sink) { sink instanceof Sink }
predicate isSource(DataFlow::Node source, FlowState state) {
source instanceof UrlRedirect::Source and state instanceof UrlRedirect::MayContainBackslashes
}
predicate isBarrier(DataFlow::Node node) { node instanceof Sanitizer }
predicate isSink(DataFlow::Node sink, FlowState state) {
sink instanceof UrlRedirect::Sink and
exists(state)
}
predicate isBarrier(DataFlow::Node node, FlowState state) {
node.(UrlRedirect::Sanitizer).sanitizes(state)
}
predicate isAdditionalFlowStep(
DataFlow::Node nodeFrom, FlowState stateFrom, DataFlow::Node nodeTo, FlowState stateTo
) {
any(UrlRedirect::AdditionalFlowStep a).step(nodeFrom, stateFrom, nodeTo, stateTo)
}
}
/** Global taint-tracking for detecting "URL redirection" vulnerabilities. */
module UrlRedirectFlow = TaintTracking::Global<UrlRedirectConfig>;
module UrlRedirectFlow = TaintTracking::GlobalWithState<UrlRedirectConfig>;

View File

@@ -1,68 +0,0 @@
/**
* This library implements the analysis described in the following two papers:
*
* James Kirrage, Asiri Rathnayake, Hayo Thielecke: Static Analysis for
* Regular Expression Denial-of-Service Attacks. NSS 2013.
* (https://arxiv.org/abs/1301.0849)
* Asiri Rathnayake, Hayo Thielecke: Static Analysis for Regular Expression
* Exponential Runtime via Substructural Logics. 2014.
* (https://www.cs.bham.ac.uk/~hxt/research/redos_full.pdf)
*
* The basic idea is to search for overlapping cycles in the NFA, that is,
* states `q` such that there are two distinct paths from `q` to itself
* that consume the same word `w`.
*
* For any such state `q`, an attack string can be constructed as follows:
* concatenate a prefix `v` that takes the NFA to `q` with `n` copies of
* the word `w` that leads back to `q` along two different paths, followed
* by a suffix `x` that is _not_ accepted in state `q`. A backtracking
* implementation will need to explore at least 2^n different ways of going
* from `q` back to itself while trying to match the `n` copies of `w`
* before finally giving up.
*
* Now in order to identify overlapping cycles, all we have to do is find
* pumpable forks, that is, states `q` that can transition to two different
* states `r1` and `r2` on the same input symbol `c`, such that there are
* paths from both `r1` and `r2` to `q` that consume the same word. The latter
* condition is equivalent to saying that `(q, q)` is reachable from `(r1, r2)`
* in the product NFA.
*
* This is what the library does. It makes a simple attempt to construct a
* prefix `v` leading into `q`, but only to improve the alert message.
* And the library tries to prove the existence of a suffix that ensures
* rejection. This check might fail, which can cause false positives.
*
* Finally, sometimes it depends on the translation whether the NFA generated
* for a regular expression has a pumpable fork or not. We implement one
* particular translation, which may result in false positives or negatives
* relative to some particular JavaScript engine.
*
* More precisely, the library constructs an NFA from a regular expression `r`
* as follows:
*
* * Every sub-term `t` gives rise to an NFA state `Match(t,i)`, representing
* the state of the automaton before attempting to match the `i`th character in `t`.
* * There is one accepting state `Accept(r)`.
* * There is a special `AcceptAnySuffix(r)` state, which accepts any suffix string
* by using an epsilon transition to `Accept(r)` and an any transition to itself.
* * Transitions between states may be labelled with epsilon, or an abstract
* input symbol.
* * Each abstract input symbol represents a set of concrete input characters:
* either a single character, a set of characters represented by a
* character class, or the set of all characters.
* * The product automaton is constructed lazily, starting with pair states
* `(q, q)` where `q` is a fork, and proceeding along an over-approximate
* step relation.
* * The over-approximate step relation allows transitions along pairs of
* abstract input symbols where the symbols have overlap in the characters they accept.
* * Once a trace of pairs of abstract input symbols that leads from a fork
* back to itself has been identified, we attempt to construct a concrete
* string corresponding to it, which may fail.
* * Lastly we ensure that any state reached by repeating `n` copies of `w` has
* a suffix `x` (possible empty) that is most likely __not__ accepted.
*/
private import semmle.python.RegexTreeView::RegexTreeView as TreeView
// ExponentialBackTracking should be used directly from the shared pack, and not from this file.
deprecated private import codeql.regex.nfa.ExponentialBackTracking::Make<TreeView> as Dep
import Dep

View File

@@ -1,13 +0,0 @@
/**
* Provides classes and predicates for constructing an NFA from
* a regular expression, and various utilities for reasoning about
* the resulting NFA.
*
* These utilities are used both by the ReDoS queries and by
* other queries that benefit from reasoning about NFAs.
*/
private import semmle.python.RegexTreeView::RegexTreeView as TreeView
// NfaUtils should be used directly from the shared pack, and not from this file.
deprecated private import codeql.regex.nfa.NfaUtils::Make<TreeView> as Dep
import Dep

View File

@@ -1,9 +0,0 @@
/**
* Provides predicates for reasoning about which strings are matched by a regular expression,
* and for testing which capture groups are filled when a particular regexp matches a string.
*/
private import semmle.python.RegexTreeView::RegexTreeView as TreeView
// RegexpMatching should be used directly from the shared pack, and not from this file.
deprecated import codeql.regex.nfa.RegexpMatching::Make<TreeView> as Dep
import Dep

View File

@@ -1,41 +0,0 @@
/**
* This module implements the analysis described in the paper:
* Valentin Wustholz, Oswaldo Olivo, Marijn J. H. Heule, and Isil Dillig:
* Static Detection of DoS Vulnerabilities in
* Programs that use Regular Expressions
* (Extended Version).
* (https://arxiv.org/pdf/1701.04045.pdf)
*
* Theorem 3 from the paper describes the basic idea.
*
* The following explains the idea using variables and predicate names that are used in the implementation:
* We consider a pair of repetitions, which we will call `pivot` and `succ`.
*
* We create a product automaton of 3-tuples of states (see `StateTuple`).
* There exists a transition `(a,b,c) -> (d,e,f)` in the product automaton
* iff there exists three transitions in the NFA `a->d, b->e, c->f` where those three
* transitions all match a shared character `char`. (see `getAThreewayIntersect`)
*
* We start a search in the product automaton at `(pivot, pivot, succ)`,
* and search for a series of transitions (a `Trace`), such that we end
* at `(pivot, succ, succ)` (see `isReachableFromStartTuple`).
*
* For example, consider the regular expression `/^\d*5\w*$/`.
* The search will start at the tuple `(\d*, \d*, \w*)` and search
* for a path to `(\d*, \w*, \w*)`.
* This path exists, and consists of a single transition in the product automaton,
* where the three corresponding NFA edges all match the character `"5"`.
*
* The start-state in the NFA has an any-transition to itself, this allows us to
* flag regular expressions such as `/a*$/` - which does not have a start anchor -
* and can thus start matching anywhere.
*
* The implementation is not perfect.
* It has the same suffix detection issue as the `js/redos` query, which can cause false positives.
* It also doesn't find all transitions in the product automaton, which can cause false negatives.
*/
private import semmle.python.RegexTreeView::RegexTreeView as TreeView
// SuperlinearBackTracking should be used directly from the shared pack, and not from this file.
deprecated private import codeql.regex.nfa.SuperlinearBackTracking::Make<TreeView> as Dep
import Dep

View File

@@ -32,9 +32,6 @@ class XmlLocatable extends @xmllocatable, TXmlLocatable {
string toString() { none() } // overridden in subclasses
}
/** DEPRECATED: Alias for XmlLocatable */
deprecated class XMLLocatable = XmlLocatable;
/**
* An `XmlParent` is either an `XmlElement` or an `XmlFile`,
* both of which can contain other elements.
@@ -95,9 +92,6 @@ class XmlParent extends @xmlparent {
string toString() { result = this.getName() }
}
/** DEPRECATED: Alias for XmlParent */
deprecated class XMLParent = XmlParent;
/** An XML file. */
class XmlFile extends XmlParent, File {
XmlFile() { xmlEncoding(this, _) }
@@ -119,14 +113,8 @@ class XmlFile extends XmlParent, File {
/** Gets a DTD associated with this XML file. */
XmlDtd getADtd() { xmlDTDs(result, _, _, _, this) }
/** DEPRECATED: Alias for getADtd */
deprecated XmlDtd getADTD() { result = this.getADtd() }
}
/** DEPRECATED: Alias for XmlFile */
deprecated class XMLFile = XmlFile;
/**
* An XML document type definition (DTD).
*
@@ -163,9 +151,6 @@ class XmlDtd extends XmlLocatable, @xmldtd {
}
}
/** DEPRECATED: Alias for XmlDtd */
deprecated class XMLDTD = XmlDtd;
/**
* An XML element in an XML file.
*
@@ -221,9 +206,6 @@ class XmlElement extends @xmlelement, XmlParent, XmlLocatable {
override string toString() { result = this.getName() }
}
/** DEPRECATED: Alias for XmlElement */
deprecated class XMLElement = XmlElement;
/**
* An attribute that occurs inside an XML element.
*
@@ -254,9 +236,6 @@ class XmlAttribute extends @xmlattribute, XmlLocatable {
override string toString() { result = this.getName() + "=" + this.getValue() }
}
/** DEPRECATED: Alias for XmlAttribute */
deprecated class XMLAttribute = XmlAttribute;
/**
* A namespace used in an XML file.
*
@@ -273,9 +252,6 @@ class XmlNamespace extends XmlLocatable, @xmlnamespace {
/** Gets the URI of this namespace. */
string getUri() { xmlNs(this, _, result, _) }
/** DEPRECATED: Alias for getUri */
deprecated string getURI() { result = this.getUri() }
/** Holds if this namespace has no prefix. */
predicate isDefault() { this.getPrefix() = "" }
@@ -286,9 +262,6 @@ class XmlNamespace extends XmlLocatable, @xmlnamespace {
}
}
/** DEPRECATED: Alias for XmlNamespace */
deprecated class XMLNamespace = XmlNamespace;
/**
* A comment in an XML file.
*
@@ -309,9 +282,6 @@ class XmlComment extends @xmlcomment, XmlLocatable {
override string toString() { result = this.getText() }
}
/** DEPRECATED: Alias for XmlComment */
deprecated class XMLComment = XmlComment;
/**
* A sequence of characters that occurs between opening and
* closing tags of an XML element, excluding other elements.
@@ -335,6 +305,3 @@ class XmlCharacters extends @xmlcharacters, XmlLocatable {
/** Gets a printable representation of this XML character sequence. */
override string toString() { result = this.getCharacters() }
}
/** DEPRECATED: Alias for XmlCharacters */
deprecated class XMLCharacters = XmlCharacters;

View File

@@ -1,3 +1,13 @@
## 0.9.7
### Minor Analysis Improvements
- Added modeling of YARL's `is_absolute` method and checks of the `netloc` of a parsed URL as sanitizers for the `py/url-redirection` query, leading to fewer false positives.
## 0.9.6
No user-facing changes.
## 0.9.5
No user-facing changes.

View File

@@ -1,7 +1,6 @@
/**
* @name Successfully extracted Python files
* @description Lists all Python files in the source code directory that were extracted
* without encountering an error.
* @name Extracted Python files
* @description Lists all Python files in the source code directory that were extracted.
* @kind diagnostic
* @id py/diagnostics/successfully-extracted-files
* @tags successfully-extracted-files
@@ -10,7 +9,5 @@
import python
from File file
where
not exists(SyntaxError e | e.getFile() = file) and
exists(file.getRelativePath())
where exists(file.getRelativePath())
select file, ""

View File

@@ -16,6 +16,10 @@ To guard against untrusted URL redirection, it is advisable to avoid putting use
directly into a redirect URL. Instead, maintain a list of authorized
redirects on the server; then choose from that list based on the user input provided.
</p>
<p>
If this is not possible, then the user input should be validated in some other way,
for example, by verifying that the target URL does not include an explicit host name.
</p>
</recommendation>
<example>
@@ -27,16 +31,47 @@ without validating the input, which facilitates phishing attacks:
<sample src="examples/redirect_bad.py"/>
<p>
One way to remedy the problem is to validate the user input against a known fixed string
before doing the redirection:
If you know the set of valid redirect targets, you can maintain a list of them on the server
and check that the user input is in that list:
</p>
<sample src="examples/redirect_good.py"/>
<p>
Often this is not possible, so an alternative is to check that the target URL does not
specify an explicit host name. For example, you can use the <code>urlparse</code> function
from the Python standard library to parse the URL and check that the <code>netloc</code>
attribute is empty.
</p>
<p>
Note, however, that many browsers accept backslash characters (<code>\</code>) as equivalent
to forward slash characters (<code>/</code>) in URLs, but the <code>urlparse</code> function
does not. To account for this, you can first replace all backslashes with forward slashes,
as shown in the following example:
</p>
<sample src="examples/redirect_good2.py"/>
<p>
For Django application, you can use the function <code>url_has_allowed_host_and_scheme</code>
to check that a URL is safe to redirect to, as shown in the following example:
</p>
<sample src="examples/redirect_good3.py"/>
<p>
Note that <code>url_has_allowed_host_and_scheme</code> handles backslashes correctly, so no
additional processing is required.
</p>
</example>
<references>
<li>OWASP: <a href="https://cheatsheetseries.owasp.org/cheatsheets/Unvalidated_Redirects_and_Forwards_Cheat_Sheet.html">
XSS Unvalidated Redirects and Forwards Cheat Sheet</a>.</li>
<li>Python standard library: <a href="https://docs.python.org/3/library/urllib.parse.html">
urllib.parse</a>.</li>
</references>
</qhelp>

View File

@@ -10,4 +10,5 @@ def hello():
if target == VALID_REDIRECT:
return redirect(target, code=302)
else:
... # Error
# ignore the target and redirect to the home page
return redirect('/', code=302)

View File

@@ -0,0 +1,14 @@
from flask import Flask, request, redirect
from urllib.parse import urlparse
app = Flask(__name__)
@app.route('/')
def hello():
target = request.args.get('target', '')
target = target.replace('\\', '')
if not urlparse(target).netloc:
# relative path, safe to redirect
return redirect(target, code=302)
# ignore the target and redirect to the home page
return redirect('/', code=302)

View File

@@ -0,0 +1,13 @@
from django.http import HttpResponseRedirect
from django.shortcuts import redirect
from django.utils.http import url_has_allowed_host_and_scheme
from django.views import View
class RedirectView(View):
def get(self, request, *args, **kwargs):
target = request.GET.get('target', '')
if url_has_allowed_host_and_scheme(target, allowed_hosts=None):
return HttpResponseRedirect(target)
else:
# ignore the target and redirect to the home page
return redirect('/')

View File

@@ -0,0 +1,3 @@
## 0.9.6
No user-facing changes.

View File

@@ -0,0 +1,5 @@
## 0.9.7
### Minor Analysis Improvements
- Added modeling of YARL's `is_absolute` method and checks of the `netloc` of a parsed URL as sanitizers for the `py/url-redirection` query, leading to fewer false positives.

View File

@@ -1,2 +1,2 @@
---
lastReleaseVersion: 0.9.5
lastReleaseVersion: 0.9.7

View File

@@ -12,13 +12,18 @@ import semmle.python.dataflow.new.internal.TaintTrackingPrivate
import semmle.python.dataflow.new.RemoteFlowSources
import UnicodeBypassValidationCustomizations::UnicodeBypassValidation
abstract private class ValidationState extends string {
bindingset[this]
ValidationState() { any() }
}
/** A state signifying that a logical validation has not been performed. */
class PreValidation extends DataFlow::FlowState {
class PreValidation extends ValidationState {
PreValidation() { this = "PreValidation" }
}
/** A state signifying that a logical validation has been performed. */
class PostValidation extends DataFlow::FlowState {
class PostValidation extends ValidationState {
PostValidation() { this = "PostValidation" }
}
@@ -29,7 +34,7 @@ class PostValidation extends DataFlow::FlowState {
* to track the requirement that a logical validation has been performed before the Unicode Transformation.
*/
private module UnicodeBypassValidationConfig implements DataFlow::StateConfigSig {
class FlowState = DataFlow::FlowState;
class FlowState = ValidationState;
predicate isSource(DataFlow::Node source, FlowState state) {
source instanceof RemoteFlowSource and state instanceof PreValidation

Some files were not shown because too many files have changed in this diff Show More