Merge branch 'main' into py-shell

This commit is contained in:
erik-krogh
2023-03-13 14:56:04 +01:00
3731 changed files with 279060 additions and 217903 deletions

View File

@@ -1,3 +1,27 @@
## 0.8.1
### Major Analysis Improvements
* We use a new analysis for the call-graph (determining which function is called). This can lead to changed results. In most cases this is much more accurate than the old call-graph that was based on points-to, but we do lose a few valid edges in the call-graph, especially around methods that are not defined inside its class.
### Minor Analysis Improvements
* Fixed module resolution so we properly recognize definitions made within if-then-else statements.
* Added modeling of cryptographic operations in the `hmac` library.
## 0.8.0
### Breaking Changes
- Python 2 is no longer supported for extracting databases using the CodeQL CLI. As a consequence,
the previously deprecated support for `pyxl` and `spitfire` templates has also been removed. When
extracting Python 2 code, having Python 2 installed is still recommended, as this ensures the
correct version of the Python standard library is extracted.
### Minor Analysis Improvements
* Fixed module resolution so we properly recognize that in `from <pkg> import *`, where `<pkg>` is a package, the actual imports are made from the `<pkg>/__init__.py` file.
## 0.7.2
No user-facing changes.

View File

@@ -0,0 +1,9 @@
---
category: majorAnalysis
---
* The main data flow and taint tracking APIs have been changed. The old APIs
remain in place for now and translate to the new through a
backwards-compatible wrapper. If multiple configurations are in scope
simultaneously, then this may affect results slightly. The new API is quite
similar to the old, but makes use of a configuration module instead of a
configuration class.

View File

@@ -0,0 +1,4 @@
---
category: minorAnalysis
---
* Deleted the deprecated `getPath` and `getFolder` predicates from the `XmlFile` class.

View File

@@ -0,0 +1,4 @@
---
category: feature
---
* Added support for merging two `PathGraph`s via disjoint union to allow results from multiple data flow computations in a single `path-problem` query.

View File

@@ -0,0 +1,12 @@
## 0.8.0
### Breaking Changes
- Python 2 is no longer supported for extracting databases using the CodeQL CLI. As a consequence,
the previously deprecated support for `pyxl` and `spitfire` templates has also been removed. When
extracting Python 2 code, having Python 2 installed is still recommended, as this ensures the
correct version of the Python standard library is extracted.
### Minor Analysis Improvements
* Fixed module resolution so we properly recognize that in `from <pkg> import *`, where `<pkg>` is a package, the actual imports are made from the `<pkg>/__init__.py` file.

View File

@@ -0,0 +1,10 @@
## 0.8.1
### Major Analysis Improvements
* We use a new analysis for the call-graph (determining which function is called). This can lead to changed results. In most cases this is much more accurate than the old call-graph that was based on points-to, but we do lose a few valid edges in the call-graph, especially around methods that are not defined inside its class.
### Minor Analysis Improvements
* Fixed module resolution so we properly recognize definitions made within if-then-else statements.
* Added modeling of cryptographic operations in the `hmac` library.

View File

@@ -1,2 +1,2 @@
---
lastReleaseVersion: 0.7.2
lastReleaseVersion: 0.8.1

View File

@@ -1,5 +1,5 @@
name: codeql/python-all
version: 0.7.3-dev
version: 0.8.2-dev
groups: python
dbscheme: semmlecode.python.dbscheme
extractor: python

View File

@@ -1025,7 +1025,8 @@ module Http {
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `CsrfLocalProtectionSetting::Range` instead.
*/
class CsrfLocalProtectionSetting extends DataFlow::Node instanceof CsrfLocalProtectionSetting::Range {
class CsrfLocalProtectionSetting extends DataFlow::Node instanceof CsrfLocalProtectionSetting::Range
{
/**
* Gets a request handler whose CSRF protection is changed.
*/

View File

@@ -3,32 +3,34 @@
import python
/** the Python major version number */
int major_version() {
explicit_major_version(result)
or
not explicit_major_version(_) and
/* If there is more than one version, prefer 2 for backwards compatibility */
(if py_flags_versioned("version.major", "2", "2") then result = 2 else result = 3)
}
int major_version() { full_python_analysis_version(result, _, _) }
/** the Python minor version number */
int minor_version() {
exists(string v | py_flags_versioned("version.minor", v, major_version().toString()) |
result = v.toInt()
)
}
int minor_version() { full_python_analysis_version(_, result, _) }
/** the Python micro version number */
int micro_version() {
exists(string v | py_flags_versioned("version.micro", v, major_version().toString()) |
result = v.toInt()
)
int micro_version() { full_python_analysis_version(_, _, result) }
/** Gets the latest supported minor version for the given major version. */
private int latest_supported_minor_version(int major) {
major = 2 and result = 7
or
major = 3 and result = 11
}
private predicate explicit_major_version(int v) {
exists(string version | py_flags_versioned("language.version", version, _) |
version.charAt(0) = "2" and v = 2
or
version.charAt(0) = "3" and v = 3
private predicate full_python_analysis_version(int major, int minor, int micro) {
exists(string version_string | py_flags_versioned("language.version", version_string, _) |
major = version_string.regexpFind("\\d+", 0, _).toInt() and
(
minor = version_string.regexpFind("\\d+", 1, _).toInt()
or
not exists(version_string.regexpFind("\\d+", 1, _)) and
minor = latest_supported_minor_version(major)
) and
(
micro = version_string.regexpFind("\\d+", 2, _).toInt()
or
not exists(version_string.regexpFind("\\d+", 2, _)) and micro = 0
)
)
}

View File

@@ -125,7 +125,7 @@ class ControlFlowNode extends @py_flow_node {
/** Gets a textual representation of this element. */
cached
string toString() {
Stages::DataFlow::ref() and
Stages::AST::ref() and
exists(Scope s | s.getEntryNode() = this | result = "Entry node for " + s.toString())
or
exists(Scope s | s.getANormalExit() = this | result = "Exit node for " + s.toString())
@@ -411,6 +411,12 @@ class CallNode extends ControlFlowNode {
result.getNode() = this.getNode().getStarArg() and
result.getBasicBlock().dominates(this.getBasicBlock())
}
/** Gets a dictionary (**) argument of this call, if any. */
ControlFlowNode getKwargs() {
result.getNode() = this.getNode().getKwargs() and
result.getBasicBlock().dominates(this.getBasicBlock())
}
}
/** A control flow corresponding to an attribute expression, such as `value.attr` */

View File

@@ -26,6 +26,26 @@ private newtype TCryptographicAlgorithm =
isWeakPasswordHashingAlgorithm(name) and isWeak = true
}
/**
* Gets the most specific `CryptographicAlgorithm` that matches the given `name`.
* A matching algorithm is one where the name of the algorithm matches the start of name, with allowances made for different name formats.
* In the case that multiple `CryptographicAlgorithm`s match the given `name`, the algorithm(s) with the longest name will be selected. This is intended to select more specific versions of algorithms when multiple versions could match - for example "SHA3_224" matches against both "SHA3" and "SHA3224", but the latter is a more precise match.
*/
bindingset[name]
private CryptographicAlgorithm getBestAlgorithmForName(string name) {
result =
max(CryptographicAlgorithm algorithm |
algorithm.getName() =
[
name.toUpperCase(), // the full name
name.toUpperCase().regexpCapture("^([\\w]+)(?:-.*)?$", 1), // the name prior to any dashes or spaces
name.toUpperCase().regexpCapture("^([A-Z0-9]+)(?:(-|_).*)?$", 1) // the name prior to any dashes, spaces, or underscores
].regexpReplaceAll("[-_ ]", "") // strip dashes, underscores, and spaces
|
algorithm order by algorithm.getName().length()
)
}
/**
* A cryptographic algorithm.
*/
@@ -39,15 +59,11 @@ abstract class CryptographicAlgorithm extends TCryptographicAlgorithm {
abstract string getName();
/**
* Holds if the name of this algorithm matches `name` modulo case,
* white space, dashes, underscores, and anything after a dash in the name
* (to ignore modes of operation, such as CBC or ECB).
* Holds if the name of this algorithm is the most specific match for `name`.
* This predicate matches quite liberally to account for different ways of formatting algorithm names, e.g. using dashes, underscores, or spaces as separators, including or not including block modes of operation, etc.
*/
bindingset[name]
predicate matchesName(string name) {
[name.toUpperCase(), name.toUpperCase().regexpCapture("^(\\w+)(?:-.*)?$", 1)]
.regexpReplaceAll("[-_ ]", "") = getName()
}
predicate matchesName(string name) { this = getBestAlgorithmForName(name) }
/**
* Holds if this algorithm is weak.

View File

@@ -14,8 +14,20 @@
predicate isStrongHashingAlgorithm(string name) {
name =
[
// see https://cryptography.io/en/latest/hazmat/primitives/cryptographic-hashes/#blake2
// and https://www.blake2.net/
"BLAKE2", "BLAKE2B", "BLAKE2S",
// see https://github.com/BLAKE3-team/BLAKE3
"BLAKE3",
//
"DSA", "ED25519", "ES256", "ECDSA256", "ES384", "ECDSA384", "ES512", "ECDSA512", "SHA2",
"SHA224", "SHA256", "SHA384", "SHA512", "SHA3", "SHA3224", "SHA3256", "SHA3384", "SHA3512"
"SHA224", "SHA256", "SHA384", "SHA512", "SHA3", "SHA3224", "SHA3256", "SHA3384", "SHA3512",
// see https://cryptography.io/en/latest/hazmat/primitives/cryptographic-hashes/#cryptography.hazmat.primitives.hashes.SHAKE128
"SHAKE128", "SHAKE256",
// see https://cryptography.io/en/latest/hazmat/primitives/cryptographic-hashes/#sm3
"SM3",
// see https://security.stackexchange.com/a/216297
"WHIRLPOOL",
]
}

View File

@@ -22,5 +22,6 @@ private import python
* global (inter-procedural) data flow analyses.
*/
module DataFlow {
import internal.DataFlowImpl
import internal.DataFlow
import internal.DataFlowImpl1
}

View File

@@ -15,5 +15,6 @@ private import python
* global (inter-procedural) taint-tracking analyses.
*/
module TaintTracking {
import internal.tainttracking1.TaintTracking
import internal.tainttracking1.TaintTrackingImpl
}

View File

@@ -0,0 +1,353 @@
/**
* Provides an implementation of global (interprocedural) data flow. This file
* re-exports the local (intraprocedural) data flow analysis from
* `DataFlowImplSpecific::Public` and adds a global analysis, mainly exposed
* through the `Make` and `MakeWithState` modules.
*/
private import DataFlowImplCommon
private import DataFlowImplSpecific::Private
import DataFlowImplSpecific::Public
import DataFlowImplCommonPublic
private import DataFlowImpl
/** An input configuration for data flow. */
signature module ConfigSig {
/**
* Holds if `source` is a relevant data flow source.
*/
predicate isSource(Node source);
/**
* Holds if `sink` is a relevant data flow sink.
*/
predicate isSink(Node sink);
/**
* Holds if data flow through `node` is prohibited. This completely removes
* `node` from the data flow graph.
*/
default predicate isBarrier(Node node) { none() }
/** Holds if data flow into `node` is prohibited. */
default predicate isBarrierIn(Node node) { none() }
/** Holds if data flow out of `node` is prohibited. */
default predicate isBarrierOut(Node node) { none() }
/**
* Holds if data may flow from `node1` to `node2` in addition to the normal data-flow steps.
*/
default predicate isAdditionalFlowStep(Node node1, Node node2) { none() }
/**
* Holds if an arbitrary number of implicit read steps of content `c` may be
* taken at `node`.
*/
default predicate allowImplicitRead(Node node, ContentSet c) { none() }
/**
* Gets the virtual dispatch branching limit when calculating field flow.
* This can be overridden to a smaller value to improve performance (a
* value of 0 disables field flow), or a larger value to get more results.
*/
default int fieldFlowBranchLimit() { result = 2 }
/**
* Gets a data flow configuration feature to add restrictions to the set of
* valid flow paths.
*
* - `FeatureHasSourceCallContext`:
* Assume that sources have some existing call context to disallow
* conflicting return-flow directly following the source.
* - `FeatureHasSinkCallContext`:
* Assume that sinks have some existing call context to disallow
* conflicting argument-to-parameter flow directly preceding the sink.
* - `FeatureEqualSourceSinkCallContext`:
* Implies both of the above and additionally ensures that the entire flow
* path preserves the call context.
*
* These features are generally not relevant for typical end-to-end data flow
* queries, but should only be used for constructing paths that need to
* somehow be pluggable in another path context.
*/
default FlowFeature getAFeature() { none() }
/** Holds if sources should be grouped in the result of `hasFlowPath`. */
default predicate sourceGrouping(Node source, string sourceGroup) { none() }
/** Holds if sinks should be grouped in the result of `hasFlowPath`. */
default predicate sinkGrouping(Node sink, string sinkGroup) { none() }
/**
* Holds if hidden nodes should be included in the data flow graph.
*
* This feature should only be used for debugging or when the data flow graph
* is not visualized (as it is in a `path-problem` query).
*/
default predicate includeHiddenNodes() { none() }
}
/** An input configuration for data flow using flow state. */
signature module StateConfigSig {
bindingset[this]
class FlowState;
/**
* Holds if `source` is a relevant data flow source with the given initial
* `state`.
*/
predicate isSource(Node source, FlowState state);
/**
* Holds if `sink` is a relevant data flow sink accepting `state`.
*/
predicate isSink(Node sink, FlowState state);
/**
* Holds if data flow through `node` is prohibited. This completely removes
* `node` from the data flow graph.
*/
default predicate isBarrier(Node node) { none() }
/**
* Holds if data flow through `node` is prohibited when the flow state is
* `state`.
*/
predicate isBarrier(Node node, FlowState state);
/** Holds if data flow into `node` is prohibited. */
default predicate isBarrierIn(Node node) { none() }
/** Holds if data flow out of `node` is prohibited. */
default predicate isBarrierOut(Node node) { none() }
/**
* Holds if data may flow from `node1` to `node2` in addition to the normal data-flow steps.
*/
default predicate isAdditionalFlowStep(Node node1, Node node2) { none() }
/**
* Holds if data may flow from `node1` to `node2` in addition to the normal data-flow steps.
* This step is only applicable in `state1` and updates the flow state to `state2`.
*/
predicate isAdditionalFlowStep(Node node1, FlowState state1, Node node2, FlowState state2);
/**
* Holds if an arbitrary number of implicit read steps of content `c` may be
* taken at `node`.
*/
default predicate allowImplicitRead(Node node, ContentSet c) { none() }
/**
* Gets the virtual dispatch branching limit when calculating field flow.
* This can be overridden to a smaller value to improve performance (a
* value of 0 disables field flow), or a larger value to get more results.
*/
default int fieldFlowBranchLimit() { result = 2 }
/**
* Gets a data flow configuration feature to add restrictions to the set of
* valid flow paths.
*
* - `FeatureHasSourceCallContext`:
* Assume that sources have some existing call context to disallow
* conflicting return-flow directly following the source.
* - `FeatureHasSinkCallContext`:
* Assume that sinks have some existing call context to disallow
* conflicting argument-to-parameter flow directly preceding the sink.
* - `FeatureEqualSourceSinkCallContext`:
* Implies both of the above and additionally ensures that the entire flow
* path preserves the call context.
*
* These features are generally not relevant for typical end-to-end data flow
* queries, but should only be used for constructing paths that need to
* somehow be pluggable in another path context.
*/
default FlowFeature getAFeature() { none() }
/** Holds if sources should be grouped in the result of `hasFlowPath`. */
default predicate sourceGrouping(Node source, string sourceGroup) { none() }
/** Holds if sinks should be grouped in the result of `hasFlowPath`. */
default predicate sinkGrouping(Node sink, string sinkGroup) { none() }
/**
* Holds if hidden nodes should be included in the data flow graph.
*
* This feature should only be used for debugging or when the data flow graph
* is not visualized (as it is in a `path-problem` query).
*/
default predicate includeHiddenNodes() { none() }
}
/**
* Gets the exploration limit for `hasPartialFlow` and `hasPartialFlowRev`
* measured in approximate number of interprocedural steps.
*/
signature int explorationLimitSig();
/**
* The output of a data flow computation.
*/
signature module DataFlowSig {
/**
* A `Node` augmented with a call context (except for sinks) and an access path.
* Only those `PathNode`s that are reachable from a source, and which can reach a sink, are generated.
*/
class PathNode;
/**
* Holds if data can flow from `source` to `sink`.
*
* The corresponding paths are generated from the end-points and the graph
* included in the module `PathGraph`.
*/
predicate hasFlowPath(PathNode source, PathNode sink);
/**
* Holds if data can flow from `source` to `sink`.
*/
predicate hasFlow(Node source, Node sink);
/**
* Holds if data can flow from some source to `sink`.
*/
predicate hasFlowTo(Node sink);
/**
* Holds if data can flow from some source to `sink`.
*/
predicate hasFlowToExpr(DataFlowExpr sink);
}
/**
* Constructs a standard data flow computation.
*/
module Make<ConfigSig Config> implements DataFlowSig {
private module C implements FullStateConfigSig {
import DefaultState<Config>
import Config
}
import Impl<C>
}
/**
* Constructs a data flow computation using flow state.
*/
module MakeWithState<StateConfigSig Config> implements DataFlowSig {
private module C implements FullStateConfigSig {
import Config
}
import Impl<C>
}
signature class PathNodeSig {
/** Gets a textual representation of this element. */
string toString();
/**
* Holds if this element is at the specified location.
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
);
/** Gets the underlying `Node`. */
Node getNode();
}
signature module PathGraphSig<PathNodeSig PathNode> {
/** Holds if `(a,b)` is an edge in the graph of data flow path explanations. */
predicate edges(PathNode a, PathNode b);
/** Holds if `n` is a node in the graph of data flow path explanations. */
predicate nodes(PathNode n, string key, string val);
/**
* Holds if `(arg, par, ret, out)` forms a subpath-tuple, that is, flow through
* a subpath between `par` and `ret` with the connecting edges `arg -> par` and
* `ret -> out` is summarized as the edge `arg -> out`.
*/
predicate subpaths(PathNode arg, PathNode par, PathNode ret, PathNode out);
}
/**
* Constructs a `PathGraph` from two `PathGraph`s by disjoint union.
*/
module MergePathGraph<
PathNodeSig PathNode1, PathNodeSig PathNode2, PathGraphSig<PathNode1> Graph1,
PathGraphSig<PathNode2> Graph2>
{
private newtype TPathNode =
TPathNode1(PathNode1 p) or
TPathNode2(PathNode2 p)
/** A node in a graph of path explanations that is formed by disjoint union of the two given graphs. */
class PathNode extends TPathNode {
/** Gets this as a projection on the first given `PathGraph`. */
PathNode1 asPathNode1() { this = TPathNode1(result) }
/** Gets this as a projection on the second given `PathGraph`. */
PathNode2 asPathNode2() { this = TPathNode2(result) }
/** Gets a textual representation of this element. */
string toString() {
result = this.asPathNode1().toString() or
result = this.asPathNode2().toString()
}
/**
* Holds if this element is at the specified location.
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
) {
this.asPathNode1().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) or
this.asPathNode2().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
}
/** Gets the underlying `Node`. */
Node getNode() {
result = this.asPathNode1().getNode() or
result = this.asPathNode2().getNode()
}
}
/**
* Provides the query predicates needed to include a graph in a path-problem query.
*/
module PathGraph implements PathGraphSig<PathNode> {
/** Holds if `(a,b)` is an edge in the graph of data flow path explanations. */
query predicate edges(PathNode a, PathNode b) {
Graph1::edges(a.asPathNode1(), b.asPathNode1()) or
Graph2::edges(a.asPathNode2(), b.asPathNode2())
}
/** Holds if `n` is a node in the graph of data flow path explanations. */
query predicate nodes(PathNode n, string key, string val) {
Graph1::nodes(n.asPathNode1(), key, val) or
Graph2::nodes(n.asPathNode2(), key, val)
}
/**
* Holds if `(arg, par, ret, out)` forms a subpath-tuple, that is, flow through
* a subpath between `par` and `ret` with the connecting edges `arg -> par` and
* `ret -> out` is summarized as the edge `arg -> out`.
*/
query predicate subpaths(PathNode arg, PathNode par, PathNode ret, PathNode out) {
Graph1::subpaths(arg.asPathNode1(), par.asPathNode1(), ret.asPathNode1(), out.asPathNode1()) or
Graph2::subpaths(arg.asPathNode2(), par.asPathNode2(), ret.asPathNode2(), out.asPathNode2())
}
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -1,838 +0,0 @@
/**
* INTERNAL: Do not use.
*
* Points-to based call-graph.
*/
private import python
private import DataFlowPublic
private import semmle.python.SpecialMethods
private import FlowSummaryImpl as FlowSummaryImpl
/** A parameter position represented by an integer. */
class ParameterPosition extends int {
ParameterPosition() { exists(any(DataFlowCallable c).getParameter(this)) }
/** Holds if this position represents a positional parameter at position `pos`. */
predicate isPositional(int pos) { this = pos } // with the current representation, all parameters are positional
}
/** An argument position represented by an integer. */
class ArgumentPosition extends int {
ArgumentPosition() { this in [-2, -1] or exists(any(Call c).getArg(this)) }
/** Holds if this position represents a positional argument at position `pos`. */
predicate isPositional(int pos) { this = pos } // with the current representation, all arguments are positional
}
/** Holds if arguments at position `apos` match parameters at position `ppos`. */
pragma[inline]
predicate parameterMatch(ParameterPosition ppos, ArgumentPosition apos) { ppos = apos }
/**
* Computes routing of arguments to parameters
*
* When a call contains more positional arguments than there are positional parameters,
* the extra positional arguments are passed as a tuple to a starred parameter. This is
* achieved by synthesizing a node `TPosOverflowNode(call, callable)`
* that represents the tuple of extra positional arguments. There is a store step from each
* extra positional argument to this node.
*
* CURRENTLY NOT SUPPORTED:
* When a call contains an iterable unpacking argument, such as `func(*args)`, it is expanded into positional arguments.
*
* CURRENTLY NOT SUPPORTED:
* If a call contains an iterable unpacking argument, such as `func(*args)`, and the callee contains a starred argument, any extra
* positional arguments are passed to the starred argument.
*
* When a call contains keyword arguments that do not correspond to keyword parameters, these
* extra keyword arguments are passed as a dictionary to a doubly starred parameter. This is
* achieved by synthesizing a node `TKwOverflowNode(call, callable)`
* that represents the dictionary of extra keyword arguments. There is a store step from each
* extra keyword argument to this node.
*
* When a call contains a dictionary unpacking argument, such as `func(**kwargs)`, with entries corresponding to a keyword parameter,
* the value at such a key is unpacked and passed to the parameter. This is achieved
* by synthesizing an argument node `TKwUnpacked(call, callable, name)` representing the unpacked
* value. This node is used as the argument passed to the matching keyword parameter. There is a read
* step from the dictionary argument to the synthesized argument node.
*
* When a call contains a dictionary unpacking argument, such as `func(**kwargs)`, and the callee contains a doubly starred parameter,
* entries which are not unpacked are passed to the doubly starred parameter. This is achieved by
* adding a dataflow step from the dictionary argument to `TKwOverflowNode(call, callable)` and a
* step to clear content of that node at any unpacked keys.
*
* ## Examples:
* Assume that we have the callable
* ```python
* def f(x, y, *t, **d):
* pass
* ```
* Then the call
* ```python
* f(0, 1, 2, a=3)
* ```
* will be modeled as
* ```python
* f(0, 1, [*t], [**d])
* ```
* where `[` and `]` denotes synthesized nodes, so `[*t]` is the synthesized tuple argument
* `TPosOverflowNode` and `[**d]` is the synthesized dictionary argument `TKwOverflowNode`.
* There will be a store step from `2` to `[*t]` at pos `0` and one from `3` to `[**d]` at key
* `a`.
*
* For the call
* ```python
* f(0, **{"y": 1, "a": 3})
* ```
* no tuple argument is synthesized. It is modeled as
* ```python
* f(0, [y=1], [**d])
* ```
* where `[y=1]` is the synthesized unpacked argument `TKwUnpacked` (with `name` = `y`). There is
* a read step from `**{"y": 1, "a": 3}` to `[y=1]` at key `y` to get the value passed to the parameter
* `y`. There is a dataflow step from `**{"y": 1, "a": 3}` to `[**d]` to transfer the content and
* a clearing of content at key `y` for node `[**d]`, since that value has been unpacked.
*/
module ArgumentPassing {
/**
* Holds if `call` represents a `DataFlowCall` to a `DataFlowCallable` represented by `callable`.
*
* It _may not_ be the case that `call = callable.getACall()`, i.e. if `call` represents a `ClassCall`.
*
* Used to limit the size of predicates.
*/
predicate connects(CallNode call, CallableValue callable) {
exists(NormalCall c |
call = c.getNode() and
callable = c.getCallable().getCallableValue()
)
}
/**
* Gets the `n`th parameter of `callable`.
* If the callable has a starred parameter, say `*tuple`, that is matched with `n=-1`.
* If the callable has a doubly starred parameter, say `**dict`, that is matched with `n=-2`.
* Note that, unlike other languages, we do _not_ use -1 for the position of `self` in Python,
* as it is an explicit parameter at position 0.
*/
NameNode getParameter(CallableValue callable, int n) {
// positional parameter
result = callable.getParameter(n)
or
// starred parameter, `*tuple`
exists(Function f |
f = callable.getScope() and
n = -1 and
result = f.getVararg().getAFlowNode()
)
or
// doubly starred parameter, `**dict`
exists(Function f |
f = callable.getScope() and
n = -2 and
result = f.getKwarg().getAFlowNode()
)
}
/**
* A type representing a mapping from argument indices to parameter indices.
* We currently use two mappings: NoShift, the identity, used for ordinary
* function calls, and ShiftOneUp which is used for calls where an extra argument
* is inserted. These include method calls, constructor calls and class calls.
* In these calls, the argument at index `n` is mapped to the parameter at position `n+1`.
*/
newtype TArgParamMapping =
TNoShift() or
TShiftOneUp()
/** A mapping used for parameter passing. */
abstract class ArgParamMapping extends TArgParamMapping {
/** Gets the index of the parameter that corresponds to the argument at index `argN`. */
bindingset[argN]
abstract int getParamN(int argN);
/** Gets a textual representation of this element. */
abstract string toString();
}
/** A mapping that passes argument `n` to parameter `n`. */
class NoShift extends ArgParamMapping, TNoShift {
NoShift() { this = TNoShift() }
override string toString() { result = "NoShift [n -> n]" }
bindingset[argN]
override int getParamN(int argN) { result = argN }
}
/** A mapping that passes argument `n` to parameter `n+1`. */
class ShiftOneUp extends ArgParamMapping, TShiftOneUp {
ShiftOneUp() { this = TShiftOneUp() }
override string toString() { result = "ShiftOneUp [n -> n+1]" }
bindingset[argN]
override int getParamN(int argN) { result = argN + 1 }
}
/**
* Gets the node representing the argument to `call` that is passed to the parameter at
* (zero-based) index `paramN` in `callable`. If this is a positional argument, it must appear
* at an index, `argN`, in `call` which satisfies `paramN = mapping.getParamN(argN)`.
*
* `mapping` will be the identity for function calls, but not for method- or constructor calls,
* where the first parameter is `self` and the first positional argument is passed to the second positional parameter.
* Similarly for classmethod calls, where the first parameter is `cls`.
*
* NOT SUPPORTED: Keyword-only parameters.
*/
Node getArg(CallNode call, ArgParamMapping mapping, CallableValue callable, int paramN) {
connects(call, callable) and
(
// positional argument
exists(int argN |
paramN = mapping.getParamN(argN) and
result = TCfgNode(call.getArg(argN))
)
or
// keyword argument
// TODO: Since `getArgName` have no results for keyword-only parameters,
// these are currently not supported.
exists(Function f, string argName |
f = callable.getScope() and
f.getArgName(paramN) = argName and
result = TCfgNode(call.getArgByName(unbind_string(argName)))
)
or
// a synthesized argument passed to the starred parameter (at position -1)
callable.getScope().hasVarArg() and
paramN = -1 and
result = TPosOverflowNode(call, callable)
or
// a synthesized argument passed to the doubly starred parameter (at position -2)
callable.getScope().hasKwArg() and
paramN = -2 and
result = TKwOverflowNode(call, callable)
or
// argument unpacked from dict
exists(string name |
call_unpacks(call, mapping, callable, name, paramN) and
result = TKwUnpackedNode(call, callable, name)
)
)
}
/** Currently required in `getArg` in order to prevent a bad join. */
bindingset[result, s]
private string unbind_string(string s) { result <= s and s <= result }
/** Gets the control flow node that is passed as the `n`th overflow positional argument. */
ControlFlowNode getPositionalOverflowArg(CallNode call, CallableValue callable, int n) {
connects(call, callable) and
exists(Function f, int posCount, int argNr |
f = callable.getScope() and
f.hasVarArg() and
posCount = f.getPositionalParameterCount() and
result = call.getArg(argNr) and
argNr >= posCount and
argNr = posCount + n
)
}
/** Gets the control flow node that is passed as the overflow keyword argument with key `key`. */
ControlFlowNode getKeywordOverflowArg(CallNode call, CallableValue callable, string key) {
connects(call, callable) and
exists(Function f |
f = callable.getScope() and
f.hasKwArg() and
not exists(f.getArgByName(key)) and
result = call.getArgByName(key)
)
}
/**
* Holds if `call` unpacks a dictionary argument in order to pass it via `name`.
* It will then be passed to the parameter of `callable` at index `paramN`.
*/
predicate call_unpacks(
CallNode call, ArgParamMapping mapping, CallableValue callable, string name, int paramN
) {
connects(call, callable) and
exists(Function f |
f = callable.getScope() and
not exists(int argN | paramN = mapping.getParamN(argN) | exists(call.getArg(argN))) and // no positional argument available
name = f.getArgName(paramN) and
// not exists(call.getArgByName(name)) and // only matches keyword arguments not preceded by **
// TODO: make the below logic respect control flow splitting (by not going to the AST).
not call.getNode().getANamedArg().(Keyword).getArg() = name and // no keyword argument available
paramN >= 0 and
paramN < f.getPositionalParameterCount() + f.getKeywordOnlyParameterCount() and
exists(call.getNode().getKwargs()) // dict argument available
)
}
}
import ArgumentPassing
/** A callable defined in library code, identified by a unique string. */
abstract class LibraryCallable extends string {
bindingset[this]
LibraryCallable() { any() }
/** Gets a call to this library callable. */
abstract CallCfgNode getACall();
/** Gets a data-flow node, where this library callable is used as a call-back. */
abstract ArgumentNode getACallback();
}
/**
* IPA type for DataFlowCallable.
*
* A callable is either a function value, a class value, or a module (for enclosing `ModuleVariableNode`s).
* A module has no calls.
*/
newtype TDataFlowCallable =
TCallableValue(CallableValue callable) {
callable instanceof FunctionValue and
not callable.(FunctionValue).isLambda()
or
callable instanceof ClassValue
} or
TLambda(Function lambda) { lambda.isLambda() } or
TModule(Module m) or
TLibraryCallable(LibraryCallable callable)
/** A callable. */
class DataFlowCallable extends TDataFlowCallable {
/** Gets a textual representation of this element. */
string toString() { result = "DataFlowCallable" }
/** Gets a call to this callable. */
CallNode getACall() { none() }
/** Gets the scope of this callable */
Scope getScope() { none() }
/** Gets the specified parameter of this callable */
NameNode getParameter(int n) { none() }
/** Gets the name of this callable. */
string getName() { none() }
/** Gets a callable value for this callable, if any. */
CallableValue getCallableValue() { none() }
/** Gets the underlying library callable, if any. */
LibraryCallable asLibraryCallable() { this = TLibraryCallable(result) }
Location getLocation() { none() }
}
/** A class representing a callable value. */
class DataFlowCallableValue extends DataFlowCallable, TCallableValue {
CallableValue callable;
DataFlowCallableValue() { this = TCallableValue(callable) }
override string toString() { result = callable.toString() }
override CallNode getACall() { result = callable.getACall() }
override Scope getScope() { result = callable.getScope() }
override NameNode getParameter(int n) { result = getParameter(callable, n) }
override string getName() { result = callable.getName() }
override CallableValue getCallableValue() { result = callable }
}
/** A class representing a callable lambda. */
class DataFlowLambda extends DataFlowCallable, TLambda {
Function lambda;
DataFlowLambda() { this = TLambda(lambda) }
override string toString() { result = lambda.toString() }
override CallNode getACall() { result = this.getCallableValue().getACall() }
override Scope getScope() { result = lambda.getEvaluatingScope() }
override NameNode getParameter(int n) { result = getParameter(this.getCallableValue(), n) }
override string getName() { result = "Lambda callable" }
override FunctionValue getCallableValue() {
result.getOrigin().getNode() = lambda.getDefinition()
}
Expr getDefinition() { result = lambda.getDefinition() }
}
/** A class representing the scope in which a `ModuleVariableNode` appears. */
class DataFlowModuleScope extends DataFlowCallable, TModule {
Module mod;
DataFlowModuleScope() { this = TModule(mod) }
override string toString() { result = mod.toString() }
override CallNode getACall() { none() }
override Scope getScope() { result = mod }
override NameNode getParameter(int n) { none() }
override string getName() { result = mod.getName() }
override CallableValue getCallableValue() { none() }
}
class LibraryCallableValue extends DataFlowCallable, TLibraryCallable {
LibraryCallable callable;
LibraryCallableValue() { this = TLibraryCallable(callable) }
override string toString() { result = callable.toString() }
override CallNode getACall() { result = callable.getACall().getNode() }
/** Gets a data-flow node, where this library callable is used as a call-back. */
ArgumentNode getACallback() { result = callable.getACallback() }
override Scope getScope() { none() }
override NameNode getParameter(int n) { none() }
override string getName() { result = callable }
override LibraryCallable asLibraryCallable() { result = callable }
}
/**
* IPA type for DataFlowCall.
*
* Calls corresponding to `CallNode`s are either to callable values or to classes.
* The latter is directed to the callable corresponding to the `__init__` method of the class.
*
* An `__init__` method can also be called directly, so that the callable can be targeted by
* different types of calls. In that case, the parameter mappings will be different,
* as the class call will synthesize an argument node to be mapped to the `self` parameter.
*
* A call corresponding to a special method call is handled by the corresponding `SpecialMethodCallNode`.
*
* TODO: Add `TClassMethodCall` mapping `cls` appropriately.
*/
newtype TDataFlowCall =
/**
* Includes function calls, method calls, class calls and library calls.
* All these will be associated with a `CallNode`.
*/
TNormalCall(CallNode call) or
/**
* Includes calls to special methods.
* These will be associated with a `SpecialMethodCallNode`.
*/
TSpecialCall(SpecialMethodCallNode special) or
/** A synthesized call inside a summarized callable */
TSummaryCall(FlowSummaryImpl::Public::SummarizedCallable c, Node receiver) {
FlowSummaryImpl::Private::summaryCallbackRange(c, receiver)
}
/** A call found in the program source (as opposed to a synthesised summary call). */
class TExtractedDataFlowCall = TSpecialCall or TNormalCall;
/** A call that is taken into account by the global data flow computation. */
abstract class DataFlowCall extends TDataFlowCall {
/** Gets a textual representation of this element. */
abstract string toString();
/** Get the callable to which this call goes, if such exists. */
abstract DataFlowCallable getCallable();
/**
* Gets the argument to this call that will be sent
* to the `n`th parameter of the callable, if any.
*/
abstract Node getArg(int n);
/** Get the control flow node representing this call, if any. */
abstract ControlFlowNode getNode();
/** Gets the enclosing callable of this call. */
abstract DataFlowCallable getEnclosingCallable();
/** Gets the location of this dataflow call. */
abstract Location getLocation();
/**
* Holds if this element is at the specified location.
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
) {
this.getLocation().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
}
}
/** A call found in the program source (as opposed to a synthesised call). */
abstract class ExtractedDataFlowCall extends DataFlowCall, TExtractedDataFlowCall {
final override Location getLocation() { result = this.getNode().getLocation() }
abstract override DataFlowCallable getCallable();
abstract override Node getArg(int n);
abstract override ControlFlowNode getNode();
}
/** A call associated with a `CallNode`. */
class NormalCall extends ExtractedDataFlowCall, TNormalCall {
CallNode call;
NormalCall() { this = TNormalCall(call) }
override string toString() { result = call.toString() }
abstract override Node getArg(int n);
override CallNode getNode() { result = call }
abstract override DataFlowCallable getCallable();
override DataFlowCallable getEnclosingCallable() { result.getScope() = call.getNode().getScope() }
}
/**
* A call to a function.
* This excludes calls to bound methods, classes, and special methods.
* Bound method calls and class calls insert an argument for the explicit
* `self` parameter, and special method calls have special argument passing.
*/
class FunctionCall extends NormalCall {
DataFlowCallableValue callable;
FunctionCall() {
call = any(FunctionValue f).getAFunctionCall() and
call = callable.getACall()
}
override Node getArg(int n) { result = getArg(call, TNoShift(), callable.getCallableValue(), n) }
override DataFlowCallable getCallable() { result = callable }
}
/** A call to a lambda. */
class LambdaCall extends NormalCall {
DataFlowLambda callable;
LambdaCall() {
call = callable.getACall() and
callable = TLambda(any(Function f))
}
override Node getArg(int n) { result = getArg(call, TNoShift(), callable.getCallableValue(), n) }
override DataFlowCallable getCallable() { result = callable }
}
/**
* Represents a call to a bound method call.
* The node representing the instance is inserted as argument to the `self` parameter.
*/
class MethodCall extends NormalCall {
FunctionValue bm;
MethodCall() { call = bm.getAMethodCall() }
private CallableValue getCallableValue() { result = bm }
override Node getArg(int n) {
n > 0 and result = getArg(call, TShiftOneUp(), this.getCallableValue(), n)
or
n = 0 and result = TCfgNode(call.getFunction().(AttrNode).getObject())
}
override DataFlowCallable getCallable() { result = TCallableValue(this.getCallableValue()) }
}
/**
* Represents a call to a class.
* The pre-update node for the call is inserted as argument to the `self` parameter.
* That makes the call node be the post-update node holding the value of the object
* after the constructor has run.
*/
class ClassCall extends NormalCall {
ClassValue c;
ClassCall() {
not c.isAbsent() and
call = c.getACall()
}
private CallableValue getCallableValue() { c.getScope().getInitMethod() = result.getScope() }
override Node getArg(int n) {
n > 0 and result = getArg(call, TShiftOneUp(), this.getCallableValue(), n)
or
n = 0 and result = TSyntheticPreUpdateNode(TCfgNode(call))
}
override DataFlowCallable getCallable() { result = TCallableValue(this.getCallableValue()) }
}
/** A call to a special method. */
class SpecialCall extends ExtractedDataFlowCall, TSpecialCall {
SpecialMethodCallNode special;
SpecialCall() { this = TSpecialCall(special) }
override string toString() { result = special.toString() }
override Node getArg(int n) { result = TCfgNode(special.(SpecialMethod::Potential).getArg(n)) }
override ControlFlowNode getNode() { result = special }
override DataFlowCallable getCallable() {
result = TCallableValue(special.getResolvedSpecialMethod())
}
override DataFlowCallable getEnclosingCallable() {
result.getScope() = special.getNode().getScope()
}
}
/**
* A call to a summarized callable, a `LibraryCallable`.
*
* We currently exclude all resolved calls. This means that a call to, say, `map`, which
* is a `ClassCall`, cannot currently be given a summary.
* We hope to lift this restriction in the future and include all potential calls to summaries
* in this class.
*/
class LibraryCall extends NormalCall {
LibraryCall() {
// TODO: share this with `resolvedCall`
not (
call = any(DataFlowCallableValue cv).getACall()
or
call = any(DataFlowLambda l).getACall()
or
// TODO: this should be covered by `DataFlowCallableValue`, but a `ClassValue` is not a `CallableValue`.
call = any(ClassValue c).getACall()
)
}
// TODO: Implement Python calling convention?
override Node getArg(int n) { result = TCfgNode(call.getArg(n)) }
// We cannot refer to a `LibraryCallable` here,
// as that could in turn refer to type tracking.
// This call will be tied to a `LibraryCallable` via
// `getViableCallabe` when the global data flow is assembled.
override DataFlowCallable getCallable() { none() }
}
/**
* A synthesized call inside a callable with a flow summary.
*
* For example, in
* ```python
* map(lambda x: x + 1, [1, 2, 3])
* ```
*
* there is a synthesized call to the lambda argument inside `map`.
*/
class SummaryCall extends DataFlowCall, TSummaryCall {
private FlowSummaryImpl::Public::SummarizedCallable c;
private Node receiver;
SummaryCall() { this = TSummaryCall(c, receiver) }
/** Gets the data flow node that this call targets. */
Node getReceiver() { result = receiver }
override DataFlowCallable getEnclosingCallable() { result.asLibraryCallable() = c }
override DataFlowCallable getCallable() { none() }
override Node getArg(int n) { none() }
override ControlFlowNode getNode() { none() }
override string toString() { result = "[summary] call to " + receiver + " in " + c }
override Location getLocation() { none() }
}
/**
* The value of a parameter at function entry, viewed as a node in a data
* flow graph.
*/
abstract class ParameterNodeImpl extends Node {
abstract Parameter getParameter();
/**
* Holds if this node is the parameter of callable `c` at the
* (zero-based) index `i`.
*/
abstract predicate isParameterOf(DataFlowCallable c, int i);
}
/** A parameter for a library callable with a flow summary. */
class SummaryParameterNode extends ParameterNodeImpl, TSummaryParameterNode {
private FlowSummaryImpl::Public::SummarizedCallable sc;
private int pos;
SummaryParameterNode() { this = TSummaryParameterNode(sc, pos) }
override Parameter getParameter() { none() }
override predicate isParameterOf(DataFlowCallable c, int i) {
sc = c.asLibraryCallable() and i = pos
}
override DataFlowCallable getEnclosingCallable() { result.asLibraryCallable() = sc }
override string toString() { result = "parameter " + pos + " of " + sc }
// Hack to return "empty location"
override predicate hasLocationInfo(
string file, int startline, int startcolumn, int endline, int endcolumn
) {
file = "" and
startline = 0 and
startcolumn = 0 and
endline = 0 and
endcolumn = 0
}
}
/** A data-flow node used to model flow summaries. */
class SummaryNode extends Node, TSummaryNode {
private FlowSummaryImpl::Public::SummarizedCallable c;
private FlowSummaryImpl::Private::SummaryNodeState state;
SummaryNode() { this = TSummaryNode(c, state) }
override DataFlowCallable getEnclosingCallable() { result.asLibraryCallable() = c }
override string toString() { result = "[summary] " + state + " in " + c }
// Hack to return "empty location"
override predicate hasLocationInfo(
string file, int startline, int startcolumn, int endline, int endcolumn
) {
file = "" and
startline = 0 and
startcolumn = 0 and
endline = 0 and
endcolumn = 0
}
}
private class SummaryReturnNode extends SummaryNode, ReturnNode {
private ReturnKind rk;
SummaryReturnNode() { FlowSummaryImpl::Private::summaryReturnNode(this, rk) }
override ReturnKind getKind() { result = rk }
}
private class SummaryArgumentNode extends SummaryNode, ArgumentNode {
SummaryArgumentNode() { FlowSummaryImpl::Private::summaryArgumentNode(_, this, _) }
override predicate argumentOf(DataFlowCall call, ArgumentPosition pos) {
FlowSummaryImpl::Private::summaryArgumentNode(call, this, pos)
}
}
private class SummaryPostUpdateNode extends SummaryNode, PostUpdateNode {
private Node pre;
SummaryPostUpdateNode() { FlowSummaryImpl::Private::summaryPostUpdateNode(this, pre) }
override Node getPreUpdateNode() { result = pre }
}
/** Gets a viable run-time target for the call `call`. */
DataFlowCallable viableCallable(ExtractedDataFlowCall call) {
result = call.getCallable()
or
// A call to a library callable with a flow summary
// In this situation we can not resolve the callable from the call,
// as that would make data flow depend on type tracking.
// Instead we resolve the call from the summary.
exists(LibraryCallable callable |
result = TLibraryCallable(callable) and
call.getNode() = callable.getACall().getNode()
)
}
private newtype TReturnKind = TNormalReturnKind()
/**
* A return kind. A return kind describes how a value can be returned
* from a callable. For Python, this is simply a method return.
*/
class ReturnKind extends TReturnKind {
/** Gets a textual representation of this element. */
string toString() { result = "return" }
}
/** A data flow node that represents a value returned by a callable. */
abstract class ReturnNode extends Node {
/** Gets the kind of this return node. */
ReturnKind getKind() { any() }
}
/** A data flow node that represents a value returned by a callable. */
class ExtractedReturnNode extends ReturnNode, CfgNode {
// See `TaintTrackingImplementation::returnFlowStep`
ExtractedReturnNode() { node = any(Return ret).getValue().getAFlowNode() }
override ReturnKind getKind() { any() }
}
/** A data-flow node that represents the output of a call. */
abstract class OutNode extends Node {
/** Gets the underlying call, where this node is a corresponding output of kind `kind`. */
abstract DataFlowCall getCall(ReturnKind kind);
}
private module OutNodes {
/**
* A data-flow node that reads a value returned directly by a callable.
*/
class ExprOutNode extends OutNode, ExprNode {
private DataFlowCall call;
ExprOutNode() { call.(ExtractedDataFlowCall).getNode() = this.getNode() }
override DataFlowCall getCall(ReturnKind kind) {
result = call and
kind = kind
}
}
private class SummaryOutNode extends SummaryNode, OutNode {
SummaryOutNode() { FlowSummaryImpl::Private::summaryOutNode(_, this, _) }
override DataFlowCall getCall(ReturnKind kind) {
FlowSummaryImpl::Private::summaryOutNode(result, this, kind)
}
}
}
/**
* Gets a node that can read the value returned from `call` with return kind
* `kind`.
*/
OutNode getAnOutNode(DataFlowCall call, ReturnKind kind) { call = result.getCall(kind) }

View File

@@ -0,0 +1,396 @@
/**
* DEPRECATED: Use `Make` and `MakeWithState` instead.
*
* Provides a `Configuration` class backwards-compatible interface to the data
* flow library.
*/
private import DataFlowImplCommon
private import DataFlowImplSpecific::Private
import DataFlowImplSpecific::Public
private import DataFlowImpl
import DataFlowImplCommonPublic
import FlowStateString
/**
* A configuration of interprocedural data flow analysis. This defines
* sources, sinks, and any other configurable aspect of the analysis. Each
* use of the global data flow library must define its own unique extension
* of this abstract class. To create a configuration, extend this class with
* a subclass whose characteristic predicate is a unique singleton string.
* For example, write
*
* ```ql
* class MyAnalysisConfiguration extends DataFlow::Configuration {
* MyAnalysisConfiguration() { this = "MyAnalysisConfiguration" }
* // Override `isSource` and `isSink`.
* // Optionally override `isBarrier`.
* // Optionally override `isAdditionalFlowStep`.
* }
* ```
* Conceptually, this defines a graph where the nodes are `DataFlow::Node`s and
* the edges are those data-flow steps that preserve the value of the node
* along with any additional edges defined by `isAdditionalFlowStep`.
* Specifying nodes in `isBarrier` will remove those nodes from the graph, and
* specifying nodes in `isBarrierIn` and/or `isBarrierOut` will remove in-going
* and/or out-going edges from those nodes, respectively.
*
* Then, to query whether there is flow between some `source` and `sink`,
* write
*
* ```ql
* exists(MyAnalysisConfiguration cfg | cfg.hasFlow(source, sink))
* ```
*
* Multiple configurations can coexist, but two classes extending
* `DataFlow::Configuration` should never depend on each other. One of them
* should instead depend on a `DataFlow2::Configuration`, a
* `DataFlow3::Configuration`, or a `DataFlow4::Configuration`.
*/
abstract class Configuration extends string {
bindingset[this]
Configuration() { any() }
/**
* Holds if `source` is a relevant data flow source.
*/
predicate isSource(Node source) { none() }
/**
* Holds if `source` is a relevant data flow source with the given initial
* `state`.
*/
predicate isSource(Node source, FlowState state) { none() }
/**
* Holds if `sink` is a relevant data flow sink.
*/
predicate isSink(Node sink) { none() }
/**
* Holds if `sink` is a relevant data flow sink accepting `state`.
*/
predicate isSink(Node sink, FlowState state) { none() }
/**
* Holds if data flow through `node` is prohibited. This completely removes
* `node` from the data flow graph.
*/
predicate isBarrier(Node node) { none() }
/**
* Holds if data flow through `node` is prohibited when the flow state is
* `state`.
*/
predicate isBarrier(Node node, FlowState state) { none() }
/** Holds if data flow into `node` is prohibited. */
predicate isBarrierIn(Node node) { none() }
/** Holds if data flow out of `node` is prohibited. */
predicate isBarrierOut(Node node) { none() }
/**
* DEPRECATED: Use `isBarrier` and `BarrierGuard` module instead.
*
* Holds if data flow through nodes guarded by `guard` is prohibited.
*/
deprecated predicate isBarrierGuard(BarrierGuard guard) { none() }
/**
* DEPRECATED: Use `isBarrier` and `BarrierGuard` module instead.
*
* Holds if data flow through nodes guarded by `guard` is prohibited when
* the flow state is `state`
*/
deprecated predicate isBarrierGuard(BarrierGuard guard, FlowState state) { none() }
/**
* Holds if data may flow from `node1` to `node2` in addition to the normal data-flow steps.
*/
predicate isAdditionalFlowStep(Node node1, Node node2) { none() }
/**
* Holds if data may flow from `node1` to `node2` in addition to the normal data-flow steps.
* This step is only applicable in `state1` and updates the flow state to `state2`.
*/
predicate isAdditionalFlowStep(Node node1, FlowState state1, Node node2, FlowState state2) {
none()
}
/**
* Holds if an arbitrary number of implicit read steps of content `c` may be
* taken at `node`.
*/
predicate allowImplicitRead(Node node, ContentSet c) { none() }
/**
* Gets the virtual dispatch branching limit when calculating field flow.
* This can be overridden to a smaller value to improve performance (a
* value of 0 disables field flow), or a larger value to get more results.
*/
int fieldFlowBranchLimit() { result = 2 }
/**
* Gets a data flow configuration feature to add restrictions to the set of
* valid flow paths.
*
* - `FeatureHasSourceCallContext`:
* Assume that sources have some existing call context to disallow
* conflicting return-flow directly following the source.
* - `FeatureHasSinkCallContext`:
* Assume that sinks have some existing call context to disallow
* conflicting argument-to-parameter flow directly preceding the sink.
* - `FeatureEqualSourceSinkCallContext`:
* Implies both of the above and additionally ensures that the entire flow
* path preserves the call context.
*
* These features are generally not relevant for typical end-to-end data flow
* queries, but should only be used for constructing paths that need to
* somehow be pluggable in another path context.
*/
FlowFeature getAFeature() { none() }
/** Holds if sources should be grouped in the result of `hasFlowPath`. */
predicate sourceGrouping(Node source, string sourceGroup) { none() }
/** Holds if sinks should be grouped in the result of `hasFlowPath`. */
predicate sinkGrouping(Node sink, string sinkGroup) { none() }
/**
* Holds if data may flow from `source` to `sink` for this configuration.
*/
predicate hasFlow(Node source, Node sink) { hasFlow(source, sink, this) }
/**
* Holds if data may flow from `source` to `sink` for this configuration.
*
* The corresponding paths are generated from the end-points and the graph
* included in the module `PathGraph`.
*/
predicate hasFlowPath(PathNode source, PathNode sink) { hasFlowPath(source, sink, this) }
/**
* Holds if data may flow from some source to `sink` for this configuration.
*/
predicate hasFlowTo(Node sink) { hasFlowTo(sink, this) }
/**
* Holds if data may flow from some source to `sink` for this configuration.
*/
predicate hasFlowToExpr(DataFlowExpr sink) { this.hasFlowTo(exprNode(sink)) }
/**
* DEPRECATED: Use `FlowExploration<explorationLimit>` instead.
*
* Gets the exploration limit for `hasPartialFlow` and `hasPartialFlowRev`
* measured in approximate number of interprocedural steps.
*/
deprecated int explorationLimit() { none() }
/**
* Holds if hidden nodes should be included in the data flow graph.
*
* This feature should only be used for debugging or when the data flow graph
* is not visualized (for example in a `path-problem` query).
*/
predicate includeHiddenNodes() { none() }
}
/**
* This class exists to prevent mutual recursion between the user-overridden
* member predicates of `Configuration` and the rest of the data-flow library.
* Good performance cannot be guaranteed in the presence of such recursion, so
* it should be replaced by using more than one copy of the data flow library.
*/
abstract private class ConfigurationRecursionPrevention extends Configuration {
bindingset[this]
ConfigurationRecursionPrevention() { any() }
override predicate hasFlow(Node source, Node sink) {
strictcount(Node n | this.isSource(n)) < 0
or
strictcount(Node n | this.isSource(n, _)) < 0
or
strictcount(Node n | this.isSink(n)) < 0
or
strictcount(Node n | this.isSink(n, _)) < 0
or
strictcount(Node n1, Node n2 | this.isAdditionalFlowStep(n1, n2)) < 0
or
strictcount(Node n1, Node n2 | this.isAdditionalFlowStep(n1, _, n2, _)) < 0
or
super.hasFlow(source, sink)
}
}
/** A bridge class to access the deprecated `isBarrierGuard`. */
private class BarrierGuardGuardedNodeBridge extends Unit {
abstract predicate guardedNode(Node n, Configuration config);
abstract predicate guardedNode(Node n, FlowState state, Configuration config);
}
private class BarrierGuardGuardedNode extends BarrierGuardGuardedNodeBridge {
deprecated override predicate guardedNode(Node n, Configuration config) {
exists(BarrierGuard g |
config.isBarrierGuard(g) and
n = g.getAGuardedNode()
)
}
deprecated override predicate guardedNode(Node n, FlowState state, Configuration config) {
exists(BarrierGuard g |
config.isBarrierGuard(g, state) and
n = g.getAGuardedNode()
)
}
}
private FlowState relevantState(Configuration config) {
config.isSource(_, result) or
config.isSink(_, result) or
config.isBarrier(_, result) or
config.isAdditionalFlowStep(_, result, _, _) or
config.isAdditionalFlowStep(_, _, _, result)
}
private newtype TConfigState =
TMkConfigState(Configuration config, FlowState state) {
state = relevantState(config) or state instanceof FlowStateEmpty
}
private Configuration getConfig(TConfigState state) { state = TMkConfigState(result, _) }
private FlowState getState(TConfigState state) { state = TMkConfigState(_, result) }
private predicate singleConfiguration() { 1 = strictcount(Configuration c) }
private module Config implements FullStateConfigSig {
class FlowState = TConfigState;
predicate isSource(Node source, FlowState state) {
getConfig(state).isSource(source, getState(state))
or
getConfig(state).isSource(source) and getState(state) instanceof FlowStateEmpty
}
predicate isSink(Node sink, FlowState state) {
getConfig(state).isSink(sink, getState(state))
or
getConfig(state).isSink(sink) and getState(state) instanceof FlowStateEmpty
}
predicate isBarrier(Node node) { none() }
predicate isBarrier(Node node, FlowState state) {
getConfig(state).isBarrier(node, getState(state)) or
getConfig(state).isBarrier(node) or
any(BarrierGuardGuardedNodeBridge b).guardedNode(node, getState(state), getConfig(state)) or
any(BarrierGuardGuardedNodeBridge b).guardedNode(node, getConfig(state))
}
predicate isBarrierIn(Node node) { any(Configuration config).isBarrierIn(node) }
predicate isBarrierOut(Node node) { any(Configuration config).isBarrierOut(node) }
predicate isAdditionalFlowStep(Node node1, Node node2) {
singleConfiguration() and
any(Configuration config).isAdditionalFlowStep(node1, node2)
}
predicate isAdditionalFlowStep(Node node1, FlowState state1, Node node2, FlowState state2) {
getConfig(state1).isAdditionalFlowStep(node1, getState(state1), node2, getState(state2)) and
getConfig(state2) = getConfig(state1)
or
not singleConfiguration() and
getConfig(state1).isAdditionalFlowStep(node1, node2) and
state2 = state1
}
predicate allowImplicitRead(Node node, ContentSet c) {
any(Configuration config).allowImplicitRead(node, c)
}
int fieldFlowBranchLimit() { result = min(any(Configuration config).fieldFlowBranchLimit()) }
FlowFeature getAFeature() { result = any(Configuration config).getAFeature() }
predicate sourceGrouping(Node source, string sourceGroup) {
any(Configuration config).sourceGrouping(source, sourceGroup)
}
predicate sinkGrouping(Node sink, string sinkGroup) {
any(Configuration config).sinkGrouping(sink, sinkGroup)
}
predicate includeHiddenNodes() { any(Configuration config).includeHiddenNodes() }
}
private import Impl<Config> as I
import I
/**
* A `Node` augmented with a call context (except for sinks), an access path, and a configuration.
* Only those `PathNode`s that are reachable from a source, and which can reach a sink, are generated.
*/
class PathNode instanceof I::PathNode {
/** Gets a textual representation of this element. */
final string toString() { result = super.toString() }
/**
* Gets a textual representation of this element, including a textual
* representation of the call context.
*/
final string toStringWithContext() { result = super.toStringWithContext() }
/**
* Holds if this element is at the specified location.
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
final predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
) {
super.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
}
/** Gets the underlying `Node`. */
final Node getNode() { result = super.getNode() }
/** Gets the `FlowState` of this node. */
final FlowState getState() { result = getState(super.getState()) }
/** Gets the associated configuration. */
final Configuration getConfiguration() { result = getConfig(super.getState()) }
/** Gets a successor of this node, if any. */
final PathNode getASuccessor() { result = super.getASuccessor() }
/** Holds if this node is a source. */
final predicate isSource() { super.isSource() }
/** Holds if this node is a grouping of source nodes. */
final predicate isSourceGroup(string group) { super.isSourceGroup(group) }
/** Holds if this node is a grouping of sink nodes. */
final predicate isSinkGroup(string group) { super.isSinkGroup(group) }
}
private predicate hasFlow(Node source, Node sink, Configuration config) {
exists(PathNode source0, PathNode sink0 |
hasFlowPath(source0, sink0, config) and
source0.getNode() = source and
sink0.getNode() = sink
)
}
private predicate hasFlowPath(PathNode source, PathNode sink, Configuration config) {
hasFlowPath(source, sink) and source.getConfiguration() = config
}
private predicate hasFlowTo(Node sink, Configuration config) { hasFlow(_, sink, config) }
predicate flowsTo = hasFlow/3;

View File

@@ -3,15 +3,18 @@ private import DataFlowImplSpecific::Public
import Cached
module DataFlowImplCommonPublic {
/** A state value to track during data flow. */
class FlowState = string;
/** Provides `FlowState = string`. */
module FlowStateString {
/** A state value to track during data flow. */
class FlowState = string;
/**
* The default state, which is used when the state is unspecified for a source
* or a sink.
*/
class FlowStateEmpty extends FlowState {
FlowStateEmpty() { this = "" }
/**
* The default state, which is used when the state is unspecified for a source
* or a sink.
*/
class FlowStateEmpty extends FlowState {
FlowStateEmpty() { this = "" }
}
}
private newtype TFlowFeature =
@@ -707,8 +710,8 @@ private module Cached {
* Gets a viable dispatch target of `call` in the context `ctx`. This is
* restricted to those `call`s for which a context might make a difference.
*/
pragma[nomagic]
private DataFlowCallable viableImplInCallContextExt(DataFlowCall call, DataFlowCall ctx) {
cached
DataFlowCallable viableImplInCallContextExt(DataFlowCall call, DataFlowCall ctx) {
result = viableImplInCallContext(call, ctx) and
result = viableCallable(call)
or
@@ -1391,6 +1394,9 @@ class TypedContentApprox extends MkTypedContentApprox {
/** Gets a typed content approximated by this value. */
TypedContent getATypedContent() { result = getATypedContent(this) }
/** Gets the content. */
ContentApprox getContent() { result = c }
/** Gets the container type. */
DataFlowType getContainerType() { result = t }
@@ -1408,6 +1414,8 @@ abstract class ApproxAccessPathFront extends TApproxAccessPathFront {
abstract boolean toBoolNonEmpty();
TypedContentApprox getHead() { this = TApproxFrontHead(result) }
pragma[nomagic]
TypedContent getAHead() {
exists(TypedContentApprox cont |

View File

@@ -16,7 +16,7 @@ private import semmle.python.Frameworks
// make it more digestible.
import MatchUnpacking
import IterableUnpacking
import DataFlowDispatchPointsTo
import DataFlowDispatch
/** Gets the callable in which this node occurs. */
DataFlowCallable nodeGetEnclosingCallable(Node n) { result = n.getEnclosingCallable() }
@@ -39,162 +39,268 @@ predicate isArgumentNode(ArgumentNode arg, DataFlowCall c, ArgumentPosition pos)
//--------
predicate isExpressionNode(ControlFlowNode node) { node.getNode() instanceof Expr }
/** DEPRECATED: Alias for `SyntheticPreUpdateNode` */
deprecated module syntheticPreUpdateNode = SyntheticPreUpdateNode;
// =============================================================================
// SyntheticPreUpdateNode
// =============================================================================
class SyntheticPreUpdateNode extends Node, TSyntheticPreUpdateNode {
CallNode node;
/** A module collecting the different reasons for synthesising a pre-update node. */
module SyntheticPreUpdateNode {
class SyntheticPreUpdateNode extends Node, TSyntheticPreUpdateNode {
NeedsSyntheticPreUpdateNode post;
SyntheticPreUpdateNode() { this = TSyntheticPreUpdateNode(node) }
SyntheticPreUpdateNode() { this = TSyntheticPreUpdateNode(post) }
/** Gets the node for which this is a synthetic pre-update node. */
CfgNode getPostUpdateNode() { result.getNode() = node }
/** Gets the node for which this is a synthetic pre-update node. */
Node getPostUpdateNode() { result = post }
override string toString() { result = "[pre] " + node.toString() }
override string toString() { result = "[pre " + post.label() + "] " + post.toString() }
override Scope getScope() { result = node.getScope() }
override Scope getScope() { result = post.getScope() }
override Location getLocation() { result = post.getLocation() }
}
/** A data flow node for which we should synthesise an associated pre-update node. */
class NeedsSyntheticPreUpdateNode extends PostUpdateNode {
NeedsSyntheticPreUpdateNode() { this = objectCreationNode() }
override Node getPreUpdateNode() { result.(SyntheticPreUpdateNode).getPostUpdateNode() = this }
/**
* Gets the label for this kind of node. This will figure in the textual representation of the synthesized pre-update node.
*
* There is currently only one reason for needing a pre-update node, so we always use that as the label.
*/
string label() { result = "objCreate" }
}
/**
* Calls to constructors are treated as post-update nodes for the synthesized argument
* that is mapped to the `self` parameter. That way, constructor calls represent the value of the
* object after the constructor (currently only `__init__`) has run.
*/
CfgNode objectCreationNode() { result.getNode() = any(ClassCall c).getNode() }
override Location getLocation() { result = node.getLocation() }
}
import SyntheticPreUpdateNode
// =============================================================================
// *args (StarArgs) related
// =============================================================================
/**
* A (synthetic) data-flow parameter node to capture all positional arguments that
* should be passed to the `*args` parameter.
*
* To handle
* ```py
* def func(*args):
* for arg in args:
* sink(arg)
*
* func(source1, source2, ...)
* ```
*
* we add a synthetic parameter to `func` that accepts any positional argument at (or
* after) the index for the `*args` parameter. We add a store step (at any list index) to the real
* `*args` parameter. This means we can handle the code above, but if the code had done `sink(args[0])`
* we would (wrongly) add flow for `source2` as well.
*
* To solve this more precisely, we could add a synthetic argument with position `*args`
* that had store steps with the correct index (like we do for mapping keyword arguments to a
* `**kwargs` parameter). However, if a single call could go to 2 different
* targets with `*args` parameters at different positions, as in the example below, it's unclear what
* index to store `2` at. For the `foo` callable it should be 1, for the `bar` callable it should be 0.
* So this information would need to be encoded in the arguments of a `ArgumentPosition` branch, and
* one of the arguments would be which callable is the target. However, we cannot build `ArgumentPosition`
* branches based on the call-graph, so this strategy doesn't work.
*
* Another approach to solving it precisely is to add multiple synthetic parameters that have store steps
* to the real `*args` parameter. So for the example below, `foo` would need to have synthetic parameter
* nodes for indexes 1 and 2 (which would have store step for index 0 and 1 of the `*args` parameter),
* and `bar` would need it for indexes 1, 2, and 3. The question becomes how many synthetic parameters to
* create, which _must_ be `max(Call call, int i | exists(call.getArg(i)))`, since (again) we can't base
* this on the call-graph. And each function with a `*args` parameter would need this many extra synthetic
* nodes. My gut feeling at that this simple approach will be good enough, but if we need to get it more
* precise, it should be possible to do it like this.
*
* In PR review, @yoff suggested an alternative approach for more precise handling:
*
* - At the call site, all positional arguments are stored into a synthetic starArgs argument, always tarting at index 0
* - This is sent to a synthetic star parameter
* - At the receiving end, we know the offset of a potential real star parameter, so we can define read steps accordingly: In foo, we read from the synthetic star parameter at index 1 and store to the real star parameter at index 0.
*
* ```py
* def foo(one, *args): ...
* def bar(*args): ...
*
* func = foo if <cond> else bar
* func(1, 2, 3)
*/
class SynthStarArgsElementParameterNode extends ParameterNodeImpl,
TSynthStarArgsElementParameterNode
{
DataFlowCallable callable;
/** DEPRECATED: Alias for `SyntheticPostUpdateNode` */
deprecated module syntheticPostUpdateNode = SyntheticPostUpdateNode;
SynthStarArgsElementParameterNode() { this = TSynthStarArgsElementParameterNode(callable) }
/** A module collecting the different reasons for synthesising a post-update node. */
module SyntheticPostUpdateNode {
private import semmle.python.SpecialMethods
override string toString() { result = "SynthStarArgsElementParameterNode" }
/** A post-update node is synthesized for all nodes which satisfy `NeedsSyntheticPostUpdateNode`. */
class SyntheticPostUpdateNode extends PostUpdateNode, TSyntheticPostUpdateNode {
NeedsSyntheticPostUpdateNode pre;
override Scope getScope() { result = callable.getScope() }
SyntheticPostUpdateNode() { this = TSyntheticPostUpdateNode(pre) }
override Location getLocation() { result = callable.getLocation() }
override Node getPreUpdateNode() { result = pre }
override string toString() { result = "[post " + pre.label() + "] " + pre.toString() }
override Scope getScope() { result = pre.getScope() }
override Location getLocation() { result = pre.getLocation() }
}
/** A data flow node for which we should synthesise an associated post-update node. */
class NeedsSyntheticPostUpdateNode extends Node {
NeedsSyntheticPostUpdateNode() {
this = argumentPreUpdateNode()
or
this = storePreUpdateNode()
or
this = readPreUpdateNode()
}
/**
* Gets the label for this kind of node. This will figure in the textual representation of the synthesized post-update node.
* We favour being an arguments as the reason for the post-update node in case multiple reasons apply.
*/
string label() {
if this = argumentPreUpdateNode()
then result = "arg"
else
if this = storePreUpdateNode()
then result = "store"
else result = "read"
}
}
/**
* Gets the pre-update node for this node.
*
* An argument might have its value changed as a result of a call.
* Certain arguments, such as implicit self arguments are already post-update nodes
* and should not have an extra node synthesised.
*/
Node argumentPreUpdateNode() {
result = any(FunctionCall c).getArg(_)
or
result = any(LambdaCall c).getArg(_)
or
// Avoid argument 0 of method calls as those have read post-update nodes.
exists(MethodCall c, int n | n > 0 | result = c.getArg(n))
or
result = any(SpecialCall c).getArg(_)
or
// Avoid argument 0 of class calls as those have non-synthetic post-update nodes.
exists(ClassCall c, int n | n > 0 | result = c.getArg(n))
or
// any argument of any call that we have not been able to resolve
exists(CallNode call | not resolvedCall(call) |
result.(CfgNode).getNode() in [call.getArg(_), call.getArgByName(_)]
)
}
/** Holds if `call` can be resolved as a normal call */
private predicate resolvedCall(CallNode call) {
call = any(DataFlowCallableValue cv).getACall()
or
call = any(DataFlowLambda l).getACall()
}
/** Gets the pre-update node associated with a store. This is used for when an object might have its value changed after a store. */
CfgNode storePreUpdateNode() {
exists(Attribute a |
result.getNode() = a.getObject().getAFlowNode() and
a.getCtx() instanceof Store
)
}
/**
* Gets a node marking the state change of an object after a read.
*
* A reverse read happens when the result of a read is modified, e.g. in
* ```python
* l = [ mutable ]
* l[0].mutate()
* ```
* we may now have changed the content of `l`. To track this, there must be
* a postupdate node for `l`.
*/
CfgNode readPreUpdateNode() {
exists(Attribute a |
result.getNode() = a.getObject().getAFlowNode() and
a.getCtx() instanceof Load
)
or
result.getNode() = any(SubscriptNode s).getObject()
or
// The dictionary argument is read from if the callable has parameters matching the keys.
result.getNode().getNode() = any(Call call).getKwargs()
}
override Parameter getParameter() { none() }
}
import SyntheticPostUpdateNode
predicate synthStarArgsElementParameterNodeStoreStep(
SynthStarArgsElementParameterNode nodeFrom, ListElementContent c, ParameterNode nodeTo
) {
c = c and // suppress warning about unused parameter
exists(DataFlowCallable callable, ParameterPosition ppos |
nodeFrom = TSynthStarArgsElementParameterNode(callable) and
nodeTo = callable.getParameter(ppos) and
ppos.isStarArgs(_)
)
}
// =============================================================================
// **kwargs (DictSplat) related
// =============================================================================
/**
* A (synthetic) data-flow node that represents all keyword arguments, as if they had
* been passed in a `**kwargs` argument.
*/
class SynthDictSplatArgumentNode extends Node, TSynthDictSplatArgumentNode {
CallNode node;
SynthDictSplatArgumentNode() { this = TSynthDictSplatArgumentNode(node) }
override string toString() { result = "SynthDictSplatArgumentNode" }
override Scope getScope() { result = node.getScope() }
override Location getLocation() { result = node.getLocation() }
}
private predicate synthDictSplatArgumentNodeStoreStep(
ArgumentNode nodeFrom, DictionaryElementContent c, SynthDictSplatArgumentNode nodeTo
) {
exists(string name, CallNode call, ArgumentPosition keywordPos |
nodeTo = TSynthDictSplatArgumentNode(call) and
getCallArg(call, _, _, nodeFrom, keywordPos) and
keywordPos.isKeyword(name) and
c.getKey() = name
)
}
/**
* Ensures that the a `**kwargs` parameter will not contain elements with names of
* keyword parameters.
*
* For example, for the function below, it's not possible that the `kwargs` dictionary
* can contain an element with the name `a`, since that parameter can be given as a
* keyword argument.
*
* ```py
* def func(a, **kwargs):
* ...
* ```
*/
private predicate dictSplatParameterNodeClearStep(ParameterNode n, DictionaryElementContent c) {
exists(DataFlowCallable callable, ParameterPosition dictSplatPos, ParameterPosition keywordPos |
dictSplatPos.isDictSplat() and
(
n.getParameter() = callable.(DataFlowFunction).getScope().getKwarg()
or
n = TSummaryParameterNode(callable.asLibraryCallable(), dictSplatPos)
) and
exists(callable.getParameter(keywordPos)) and
keywordPos.isKeyword(c.getKey())
)
}
/**
* A synthetic data-flow node to allow flow to keyword parameters from a `**kwargs` argument.
*
* Take the code snippet below as an example. Since the call only has a `**kwargs` argument,
* with a `**` argument position, we add this synthetic parameter node with `**` parameter position,
* and a read step to the `p1` parameter.
*
* ```py
* def foo(p1, p2): ...
*
* kwargs = {"p1": 42, "p2": 43}
* foo(**kwargs)
* ```
*
*
* Note that this will introduce a bit of redundancy in cases like
*
* ```py
* foo(p1=taint(1), p2=taint(2))
* ```
*
* where direct keyword matching is possible, since we construct a synthesized dict
* splat argument (`SynthDictSplatArgumentNode`) at the call site, which means that
* `taint(1)` will flow into `p1` both via normal keyword matching and via the synthesized
* nodes (and similarly for `p2`). However, this redundancy is OK since
* (a) it means that type-tracking through keyword arguments also works in most cases,
* (b) read/store steps can be avoided when direct keyword matching is possible, and
* hence access path limits are not a concern, and
* (c) since the synthesized nodes are hidden, the reported data-flow paths will be
* collapsed anyway.
*/
class SynthDictSplatParameterNode extends ParameterNodeImpl, TSynthDictSplatParameterNode {
DataFlowCallable callable;
SynthDictSplatParameterNode() { this = TSynthDictSplatParameterNode(callable) }
override string toString() { result = "SynthDictSplatParameterNode" }
override Scope getScope() { result = callable.getScope() }
override Location getLocation() { result = callable.getLocation() }
override Parameter getParameter() { none() }
}
/**
* Flow step from the synthetic `**kwargs` parameter to the real `**kwargs` parameter.
* Due to restriction in dataflow library, we can only give one of them as result for
* `DataFlowCallable.getParameter`, so this is a workaround to ensure there is flow to
* _both_ of them.
*/
private predicate dictSplatParameterNodeFlowStep(
ParameterNodeImpl nodeFrom, ParameterNodeImpl nodeTo
) {
exists(DataFlowCallable callable |
nodeFrom = TSynthDictSplatParameterNode(callable) and
(
nodeTo.getParameter() = callable.(DataFlowFunction).getScope().getKwarg()
or
exists(ParameterPosition pos |
nodeTo = TSummaryParameterNode(callable.asLibraryCallable(), pos) and
pos.isDictSplat()
)
)
)
}
/**
* Reads from the synthetic **kwargs parameter to each keyword parameter.
*/
predicate synthDictSplatParameterNodeReadStep(
SynthDictSplatParameterNode nodeFrom, DictionaryElementContent c, ParameterNode nodeTo
) {
exists(DataFlowCallable callable, ParameterPosition ppos |
nodeFrom = TSynthDictSplatParameterNode(callable) and
nodeTo = callable.getParameter(ppos) and
ppos.isKeyword(c.getKey())
)
}
// =============================================================================
// PostUpdateNode
// =============================================================================
abstract class PostUpdateNodeImpl extends Node {
/** Gets the node before the state update. */
abstract Node getPreUpdateNode();
}
class SyntheticPostUpdateNode extends PostUpdateNodeImpl, TSyntheticPostUpdateNode {
ControlFlowNode node;
SyntheticPostUpdateNode() { this = TSyntheticPostUpdateNode(node) }
override Node getPreUpdateNode() { result.(CfgNode).getNode() = node }
override string toString() { result = "[post] " + node.toString() }
override Scope getScope() { result = node.getScope() }
override Location getLocation() { result = node.getLocation() }
}
class NonSyntheticPostUpdateNode extends PostUpdateNodeImpl, CfgNode {
SyntheticPreUpdateNode pre;
NonSyntheticPostUpdateNode() { this = pre.getPostUpdateNode() }
override Node getPreUpdateNode() { result = pre }
}
class DataFlowExpr = Expr;
@@ -274,13 +380,6 @@ module EssaFlow {
iterableUnpackingFlowStep(nodeFrom, nodeTo)
or
matchFlowStep(nodeFrom, nodeTo)
or
// Overflow keyword argument
exists(CallNode call, CallableValue callable |
call = callable.getACall() and
nodeTo = TKwOverflowNode(call, callable) and
nodeFrom.asCfgNode() = call.getNode().getKwargs().getAFlowNode()
)
}
predicate useToNextUse(NameNode nodeFrom, NameNode nodeTo) {
@@ -305,6 +404,8 @@ predicate simpleLocalFlowStep(Node nodeFrom, Node nodeTo) {
simpleLocalFlowStepForTypetracking(nodeFrom, nodeTo)
or
summaryFlowSteps(nodeFrom, nodeTo)
or
dictSplatParameterNodeFlowStep(nodeFrom, nodeTo)
}
/**
@@ -521,15 +622,15 @@ predicate storeStep(Node nodeFrom, Content c, Node nodeTo) {
or
attributeStoreStep(nodeFrom, c, nodeTo)
or
posOverflowStoreStep(nodeFrom, c, nodeTo)
or
kwOverflowStoreStep(nodeFrom, c, nodeTo)
or
matchStoreStep(nodeFrom, c, nodeTo)
or
any(Orm::AdditionalOrmSteps es).storeStep(nodeFrom, c, nodeTo)
or
FlowSummaryImpl::Private::Steps::summaryStoreStep(nodeFrom, c, nodeTo)
or
synthStarArgsElementParameterNodeStoreStep(nodeFrom, c, nodeTo)
or
synthDictSplatArgumentNodeStoreStep(nodeFrom, c, nodeTo)
}
/**
@@ -669,30 +770,6 @@ predicate attributeStoreStep(Node nodeFrom, AttributeContent c, PostUpdateNode n
)
}
/**
* Holds if `nodeFrom` flows into the synthesized positional overflow argument (`nodeTo`)
* at the position indicated by `c`.
*/
predicate posOverflowStoreStep(CfgNode nodeFrom, TupleElementContent c, Node nodeTo) {
exists(CallNode call, CallableValue callable, int n |
nodeFrom.asCfgNode() = getPositionalOverflowArg(call, callable, n) and
nodeTo = TPosOverflowNode(call, callable) and
c.getIndex() = n
)
}
/**
* Holds if `nodeFrom` flows into the synthesized keyword overflow argument (`nodeTo`)
* at the key indicated by `c`.
*/
predicate kwOverflowStoreStep(CfgNode nodeFrom, DictionaryElementContent c, Node nodeTo) {
exists(CallNode call, CallableValue callable, string key |
nodeFrom.asCfgNode() = getKeywordOverflowArg(call, callable, key) and
nodeTo = TKwOverflowNode(call, callable) and
c.getKey() = key
)
}
predicate defaultValueFlowStep(CfgNode nodeFrom, CfgNode nodeTo) {
exists(Function f, Parameter p, ParameterDefinition def |
// `getArgByName` supports, unlike `getAnArg`, keyword-only parameters
@@ -722,9 +799,9 @@ predicate readStep(Node nodeFrom, Content c, Node nodeTo) {
or
attributeReadStep(nodeFrom, c, nodeTo)
or
kwUnpackReadStep(nodeFrom, c, nodeTo)
or
FlowSummaryImpl::Private::Steps::summaryReadStep(nodeFrom, c, nodeTo)
or
synthDictSplatParameterNodeReadStep(nodeFrom, c, nodeTo)
}
/** Data flows from a sequence to a subscript of the sequence. */
@@ -814,43 +891,19 @@ predicate attributeReadStep(Node nodeFrom, AttributeContent c, AttrRead nodeTo)
nodeTo.accesses(nodeFrom, c.getAttribute())
}
/**
* Holds if `nodeFrom` is a dictionary argument being unpacked and `nodeTo` is the
* synthesized unpacked argument with the name indicated by `c`.
*/
predicate kwUnpackReadStep(CfgNode nodeFrom, DictionaryElementContent c, Node nodeTo) {
exists(CallNode call, string name |
nodeFrom.asCfgNode() = call.getNode().getKwargs().getAFlowNode() and
nodeTo = TKwUnpackedNode(call, _, name) and
name = c.getKey()
)
}
/**
* Clear content at key `name` of the synthesized dictionary `TKwOverflowNode(call, callable)`,
* whenever `call` unpacks `name`.
*/
predicate kwOverflowClearStep(Node n, Content c) {
exists(CallNode call, CallableValue callable, string name |
call_unpacks(call, _, callable, name, _) and
n = TKwOverflowNode(call, callable) and
c.(DictionaryElementContent).getKey() = name
)
}
/**
* Holds if values stored inside content `c` are cleared at node `n`. For example,
* any value stored inside `f` is cleared at the pre-update node associated with `x`
* in `x.f = newValue`.
*/
predicate clearsContent(Node n, Content c) {
kwOverflowClearStep(n, c)
or
matchClearStep(n, c)
or
attributeClearStep(n, c)
or
FlowSummaryImpl::Private::Steps::summaryClearsContent(n, c)
or
dictSplatParameterNodeClearStep(n, c)
}
/**
@@ -906,23 +959,24 @@ predicate nodeIsHidden(Node n) {
n instanceof SummaryNode
or
n instanceof SummaryParameterNode
or
n instanceof SynthStarArgsElementParameterNode
or
n instanceof SynthDictSplatArgumentNode
or
n instanceof SynthDictSplatParameterNode
}
class LambdaCallKind = Unit;
/** Holds if `creation` is an expression that creates a lambda of kind `kind` for `c`. */
predicate lambdaCreation(Node creation, LambdaCallKind kind, DataFlowCallable c) {
// lambda
// lambda and plain functions
kind = kind and
creation.asExpr() = c.(DataFlowLambda).getDefinition()
or
// normal function
exists(FunctionDef def |
def.defines(creation.asVar().getSourceVariable()) and
def.getDefinedFunction() = c.(DataFlowCallableValue).getCallableValue().getScope()
)
creation.asExpr() = c.(DataFlowPlainFunction).getScope().getDefinition()
or
// summarized function
exists(kind) and // avoid warning on unused 'kind'
exists(Call call |
creation.asExpr() = call.getAnArg() and
creation = c.(LibraryCallableValue).getACallback()
@@ -955,3 +1009,12 @@ class ContentApprox = Unit;
/** Gets an approximated value for content `c`. */
pragma[inline]
ContentApprox getContentApprox(Content c) { any() }
/**
* Gets an additional term that is added to the `join` and `branch` computations to reflect
* an additional forward or backwards branching factor that is not taken into account
* when calculating the (virtual) dispatch cost.
*
* Argument `arg` is part of a path from a source to a sink, and `p` is the target parameter.
*/
int getAdditionalFlowIntoCallNodeTerm(ArgumentNode arg, ParameterNode p) { none() }

View File

@@ -31,10 +31,44 @@ newtype TNode =
or
node.getNode() instanceof Pattern
} or
/** A synthetic node representing the value of an object before a state change */
TSyntheticPreUpdateNode(NeedsSyntheticPreUpdateNode post) or
/** A synthetic node representing the value of an object after a state change. */
TSyntheticPostUpdateNode(NeedsSyntheticPostUpdateNode pre) or
/**
* A synthetic node representing the value of an object before a state change.
*
* For class calls we pass a synthetic self argument, so attribute writes in
* `__init__` is reflected on the resulting object (we need special logic for this
* since there is no `return` in `__init__`)
*/
// NOTE: since we can't rely on the call graph, but we want to have synthetic
// pre-update nodes for class calls, we end up getting synthetic pre-update nodes for
// ALL calls :|
TSyntheticPreUpdateNode(CallNode call) or
/**
* A synthetic node representing the value of an object after a state change.
* See QLDoc for `PostUpdateNode`.
*/
TSyntheticPostUpdateNode(ControlFlowNode node) {
exists(CallNode call |
node = call.getArg(_)
or
node = call.getArgByName(_)
or
// `self` argument when handling class instance calls (`__call__` special method))
node = call.getFunction()
)
or
node = any(AttrNode a).getObject()
or
node = any(SubscriptNode s).getObject()
or
// self parameter when used implicitly in `super()`
exists(Class cls, Function func, ParameterDefinition def |
func = cls.getAMethod() and
not isStaticmethod(func) and
// this matches what we do in ExtractedParameterNode
def.getDefiningNode() = node and
def.getParameter() = func.getArg(0)
)
} or
/** A node representing a global (module-level) variable in a specific module. */
TModuleVariableNode(Module m, GlobalVariable v) {
v.getScope() = m and
@@ -45,37 +79,6 @@ newtype TNode =
ImportStar::globalNameDefinedInModule(v.getId(), m)
)
} or
/**
* A node representing the overflow positional arguments to a call.
* That is, `call` contains more positional arguments than there are
* positional parameters in `callable`. The extra ones are passed as
* a tuple to a starred parameter; this synthetic node represents that tuple.
*/
TPosOverflowNode(CallNode call, CallableValue callable) {
exists(getPositionalOverflowArg(call, callable, _))
} or
/**
* A node representing the overflow keyword arguments to a call.
* That is, `call` contains keyword arguments for keys that do not have
* keyword parameters in `callable`. These extra ones are passed as
* a dictionary to a doubly starred parameter; this synthetic node
* represents that dictionary.
*/
TKwOverflowNode(CallNode call, CallableValue callable) {
exists(getKeywordOverflowArg(call, callable, _))
or
ArgumentPassing::connects(call, callable) and
exists(call.getNode().getKwargs()) and
callable.getScope().hasKwArg()
} or
/**
* A node representing an unpacked element of a dictionary argument.
* That is, `call` contains argument `**{"foo": bar}` which is passed
* to parameter `foo` of `callable`.
*/
TKwUnpackedNode(CallNode call, CallableValue callable, string name) {
call_unpacks(call, _, callable, name, _)
} or
/**
* A synthetic node representing that an iterable sequence flows to consumer.
*/
@@ -109,10 +112,18 @@ newtype TNode =
} or
TSummaryParameterNode(FlowSummaryImpl::Public::SummarizedCallable c, ParameterPosition pos) {
FlowSummaryImpl::Private::summaryParameterNodeRange(c, pos)
} or
/** A synthetic node to capture positional arguments that are passed to a `*args` parameter. */
TSynthStarArgsElementParameterNode(DataFlowCallable callable) {
exists(ParameterPosition ppos | ppos.isStarArgs(_) | exists(callable.getParameter(ppos)))
} or
/** A synthetic node to capture keyword arguments that are passed to a `**kwargs` parameter. */
TSynthDictSplatArgumentNode(CallNode call) { exists(call.getArgByName(_)) } or
/** A synthetic node to allow flow to keyword parameters from a `**kwargs` argument. */
TSynthDictSplatParameterNode(DataFlowCallable callable) {
exists(ParameterPosition ppos | ppos.isKeyword(_) | exists(callable.getParameter(ppos)))
}
class TParameterNode = TCfgNode or TSummaryParameterNode;
/** Helper for `Node::getEnclosingCallable`. */
private DataFlowCallable getCallableScope(Scope s) {
result.getScope() = s
@@ -288,7 +299,7 @@ ExprNode exprNode(DataFlowExpr e) { result.getNode().getNode() = e }
* The value of a parameter at function entry, viewed as a node in a data
* flow graph.
*/
class ParameterNode extends Node, TParameterNode instanceof ParameterNodeImpl {
class ParameterNode extends Node instanceof ParameterNodeImpl {
/** Gets the parameter corresponding to this node, if any. */
final Parameter getParameter() { result = super.getParameter() }
}
@@ -298,18 +309,8 @@ class ExtractedParameterNode extends ParameterNodeImpl, CfgNode {
//, LocalSourceNode {
ParameterDefinition def;
ExtractedParameterNode() {
node = def.getDefiningNode() and
// Disregard parameters that we cannot resolve
// TODO: Make this unnecessary
exists(DataFlowCallable c | node = c.getParameter(_))
}
ExtractedParameterNode() { node = def.getDefiningNode() }
override predicate isParameterOf(DataFlowCallable c, int i) { node = c.getParameter(i) }
override DataFlowCallable getEnclosingCallable() { this.isParameterOf(result, _) }
/** Gets the `Parameter` this `ParameterNode` represents. */
override Parameter getParameter() { result = def.getParameter() }
}
@@ -327,16 +328,24 @@ abstract class ArgumentNode extends Node {
final ExtractedDataFlowCall getCall() { this.argumentOf(result, _) }
}
/** A data flow node that represents a call argument found in the source code. */
/**
* A data flow node that represents a call argument found in the source code.
*/
class ExtractedArgumentNode extends ArgumentNode {
ExtractedArgumentNode() { this = any(ExtractedDataFlowCall c).getArg(_) }
final override predicate argumentOf(DataFlowCall call, ArgumentPosition pos) {
this.extractedArgumentOf(call, pos)
ExtractedArgumentNode() {
// for resolved calls, we need to allow all argument nodes
getCallArg(_, _, _, this, _)
or
// for potential summaries we allow all normal call arguments
normalCallArg(_, this, _)
or
// and self arguments
this.asCfgNode() = any(CallNode c).getFunction().(AttrNode).getObject()
}
predicate extractedArgumentOf(ExtractedDataFlowCall call, ArgumentPosition pos) {
this = call.getArg(pos)
final override predicate argumentOf(DataFlowCall call, ArgumentPosition pos) {
this = call.getArgument(pos) and
call instanceof ExtractedDataFlowCall
}
}
@@ -345,16 +354,17 @@ class ExtractedArgumentNode extends ArgumentNode {
* changed its state.
*
* This can be either the argument to a callable after the callable returns
* (which might have mutated the argument), or the qualifier of a field after
* an update to the field.
* (which might have mutated the argument), the qualifier of a field after
* an update to the field, or a container such as a list/dictionary after an element
* update.
*
* Nodes corresponding to AST elements, for example `ExprNode`s, usually refer
* to the value before the update with the exception of `ObjectCreationNode`s,
* to the value before the update with the exception of class calls,
* which represents the value _after_ the constructor has run.
*/
abstract class PostUpdateNode extends Node {
class PostUpdateNode extends Node instanceof PostUpdateNodeImpl {
/** Gets the node before the state update. */
abstract Node getPreUpdateNode();
Node getPreUpdateNode() { result = super.getPreUpdateNode() }
}
/**
@@ -448,70 +458,6 @@ private predicate resolved_import_star_module(Module m, string name, Node n) {
)
}
/**
* The node holding the extra positional arguments to a call. This node is passed as a tuple
* to the starred parameter of the callable.
*/
class PosOverflowNode extends Node, TPosOverflowNode {
CallNode call;
PosOverflowNode() { this = TPosOverflowNode(call, _) }
override string toString() { result = "PosOverflowNode for " + call.getNode().toString() }
override DataFlowCallable getEnclosingCallable() {
exists(Node node |
node = TCfgNode(call) and
result = node.getEnclosingCallable()
)
}
override Location getLocation() { result = call.getLocation() }
}
/**
* The node holding the extra keyword arguments to a call. This node is passed as a dictionary
* to the doubly starred parameter of the callable.
*/
class KwOverflowNode extends Node, TKwOverflowNode {
CallNode call;
KwOverflowNode() { this = TKwOverflowNode(call, _) }
override string toString() { result = "KwOverflowNode for " + call.getNode().toString() }
override DataFlowCallable getEnclosingCallable() {
exists(Node node |
node = TCfgNode(call) and
result = node.getEnclosingCallable()
)
}
override Location getLocation() { result = call.getLocation() }
}
/**
* The node representing the synthetic argument of a call that is unpacked from a dictionary
* argument.
*/
class KwUnpackedNode extends Node, TKwUnpackedNode {
CallNode call;
string name;
KwUnpackedNode() { this = TKwUnpackedNode(call, _, name) }
override string toString() { result = "KwUnpacked " + name }
override DataFlowCallable getEnclosingCallable() {
exists(Node node |
node = TCfgNode(call) and
result = node.getEnclosingCallable()
)
}
override Location getLocation() { result = call.getLocation() }
}
/**
* A synthetic node representing an iterable sequence. Used for changing content type
* for instance from a `ListElement` to a `TupleElement`, especially if the content is

View File

@@ -248,7 +248,9 @@ module Public {
/**
* Holds if all the summaries that apply to `this` are auto generated and not manually created.
*/
final predicate isAutoGenerated() { this.hasProvenance("generated") and not this.isManual() }
final predicate isAutoGenerated() {
this.hasProvenance(["generated", "ai-generated"]) and not this.isManual()
}
/**
* Holds if there exists a manual summary that applies to `this`.
@@ -268,7 +270,7 @@ module Public {
/**
* Holds if the neutral is auto generated.
*/
predicate isAutoGenerated() { neutralElement(this, "generated") }
predicate isAutoGenerated() { neutralElement(this, ["generated", "ai-generated"]) }
/**
* Holds if there exists a manual neutral that applies to `this`.
@@ -299,8 +301,8 @@ module Private {
TWithoutContentSummaryComponent(ContentSet c) or
TWithContentSummaryComponent(ContentSet c)
private TParameterSummaryComponent thisParam() {
result = TParameterSummaryComponent(instanceParameterPosition())
private TParameterSummaryComponent callbackSelfParam() {
result = TParameterSummaryComponent(callbackSelfParameterPosition())
}
newtype TSummaryComponentStack =
@@ -309,7 +311,7 @@ module Private {
any(RequiredSummaryComponentStack x).required(head, tail)
or
any(RequiredSummaryComponentStack x).required(TParameterSummaryComponent(_), tail) and
head = thisParam()
head = callbackSelfParam()
or
derivedFluentFlowPush(_, _, _, head, tail, _)
}
@@ -334,7 +336,7 @@ module Private {
callbackRef = s.drop(_) and
(isCallbackParameter(callbackRef) or callbackRef.head() = TReturnSummaryComponent(_)) and
input = callbackRef.tail() and
output = TConsSummaryComponentStack(thisParam(), input) and
output = TConsSummaryComponentStack(callbackSelfParam(), input) and
preservesValue = true
)
or
@@ -437,6 +439,9 @@ module Private {
out.head() = TParameterSummaryComponent(_) and
s = out.tail()
)
or
// Add the post-update node corresponding to the requested argument node
outputState(c, s) and isCallbackParameter(s)
}
private newtype TSummaryNodeState =
@@ -1010,7 +1015,7 @@ module Private {
private predicate relevantSummaryElementGenerated(
AccessPath inSpec, AccessPath outSpec, string kind
) {
summaryElement(this, inSpec, outSpec, kind, "generated") and
summaryElement(this, inSpec, outSpec, kind, ["generated", "ai-generated"]) and
not summaryElement(this, _, _, _, "manual")
}
@@ -1202,11 +1207,11 @@ module Private {
}
private string renderProvenance(SummarizedCallable c) {
if c.isAutoGenerated() then result = "generated" else result = "manual"
if c.isManual() then result = "manual" else c.hasProvenance(result)
}
private string renderProvenanceNeutral(NeutralCallable c) {
if c.isAutoGenerated() then result = "generated" else result = "manual"
if c.isManual() then result = "manual" else c.hasProvenance(result)
}
/**

View File

@@ -45,7 +45,7 @@ class SummarizedCallableBase = string;
DataFlowCallable inject(SummarizedCallable c) { result.asLibraryCallable() = c }
/** Gets the parameter position of the instance parameter. */
ArgumentPosition instanceParameterPosition() { none() } // disables implicit summary flow to `this` for callbacks
ArgumentPosition callbackSelfParameterPosition() { none() } // disables implicit summary flow to `this` for callbacks
/** Gets the synthesized summary data-flow node for the given values. */
Node summaryNode(SummarizedCallable c, SummaryNodeState state) { result = TSummaryNode(c, state) }
@@ -61,11 +61,11 @@ bindingset[c, rk]
DataFlowType getReturnType(SummarizedCallable c, ReturnKind rk) { any() }
/**
* Gets the type of the `i`th parameter in a synthesized call that targets a
* callback of type `t`.
* Gets the type of the parameter matching arguments at position `pos` in a
* synthesized call that targets a callback of type `t`.
*/
bindingset[t, i]
DataFlowType getCallbackParameterType(DataFlowType t, int i) { any() }
bindingset[t, pos]
DataFlowType getCallbackParameterType(DataFlowType t, ArgumentPosition pos) { any() }
/**
* Gets the return type of kind `rk` in a synthesized call that targets a
@@ -114,10 +114,34 @@ string getComponentSpecific(SummaryComponent sc) {
}
/** Gets the textual representation of a parameter position in the format used for flow summaries. */
string getParameterPosition(ParameterPosition pos) { result = pos.toString() }
string getParameterPosition(ParameterPosition pos) {
pos.isSelf() and result = "self"
or
exists(int i |
pos.isPositional(i) and
result = i.toString()
)
or
exists(string name |
pos.isKeyword(name) and
result = name + ":"
)
}
/** Gets the textual representation of an argument position in the format used for flow summaries. */
string getArgumentPosition(ArgumentPosition pos) { result = pos.toString() }
string getArgumentPosition(ArgumentPosition pos) {
pos.isSelf() and result = "self"
or
exists(int i |
pos.isPositional(i) and
result = i.toString()
)
or
exists(string name |
pos.isKeyword(name) and
result = name + ":"
)
}
/** Holds if input specification component `c` needs a reference. */
predicate inputNeedsReferenceSpecific(string c) { none() }
@@ -197,29 +221,55 @@ module ParsePositions {
)
}
predicate isParsedParameterPosition(string c, int i) {
predicate isParsedPositionalParameterPosition(string c, int i) {
isParamBody(c) and
i = AccessPath::parseInt(c)
}
predicate isParsedArgumentPosition(string c, int i) {
predicate isParsedKeywordParameterPosition(string c, string paramName) {
isParamBody(c) and
c = paramName + ":"
}
predicate isParsedPositionalArgumentPosition(string c, int i) {
isArgBody(c) and
i = AccessPath::parseInt(c)
}
predicate isParsedKeywordArgumentPosition(string c, string argName) {
isArgBody(c) and
c = argName + ":"
}
}
/** Gets the argument position obtained by parsing `X` in `Parameter[X]`. */
ArgumentPosition parseParamBody(string s) {
exists(int i |
ParsePositions::isParsedParameterPosition(s, i) and
ParsePositions::isParsedPositionalParameterPosition(s, i) and
result.isPositional(i)
)
or
exists(string name |
ParsePositions::isParsedKeywordParameterPosition(s, name) and
result.isKeyword(name)
)
or
s = "self" and
result.isSelf()
}
/** Gets the parameter position obtained by parsing `X` in `Argument[X]`. */
ParameterPosition parseArgBody(string s) {
exists(int i |
ParsePositions::isParsedArgumentPosition(s, i) and
ParsePositions::isParsedPositionalArgumentPosition(s, i) and
result.isPositional(i)
)
or
exists(string name |
ParsePositions::isParsedKeywordArgumentPosition(s, name) and
result.isKeyword(name)
)
or
s = "self" and
result.isSelf()
}

View File

@@ -71,13 +71,19 @@ module ImportResolution {
*/
pragma[nomagic]
predicate module_export(Module m, string name, DataFlow::CfgNode defn) {
exists(EssaVariable v |
exists(EssaVariable v, EssaDefinition essaDef |
v.getName() = name and
v.getAUse() = ImportStar::getStarImported*(m).getANormalExit()
v.getAUse() = ImportStar::getStarImported*(m).getANormalExit() and
(
essaDef = v.getDefinition()
or
// to handle definitions guarded by if-then-else
essaDef = v.getDefinition().(PhiFunction).getAnInput()
)
|
defn.getNode() = v.getDefinition().(AssignmentDefinition).getValue()
defn.getNode() = essaDef.(AssignmentDefinition).getValue()
or
defn.getNode() = v.getDefinition().(ArgumentRefinement).getArgument()
defn.getNode() = essaDef.(ArgumentRefinement).getArgument()
)
or
exists(Alias a |
@@ -167,8 +173,22 @@ module ImportResolution {
)
}
/**
* Gets the (most likely) module for the name `name`, if any.
*
* Handles the fact that for the name `<pkg>` representing a package the actual module
* is `<pkg>.__init__`.
*
* See `isPreferredModuleForName` for more details on what "most likely" module means.
*/
pragma[inline]
private Module getModuleFromName(string name) {
isPreferredModuleForName(result.getFile(), name + ["", ".__init__"])
}
/** Gets the module from which attributes are imported by `i`. */
Module getModuleImportedByImportStar(ImportStar i) {
isPreferredModuleForName(result.getFile(), i.getImportedModuleName())
result = getModuleFromName(i.getImportedModuleName())
}
/**
@@ -223,7 +243,7 @@ module ImportResolution {
exists(string module_name | result = getReferenceToModuleName(module_name) |
// Depending on whether the referenced module is a package or not, we may need to add a
// trailing `.__init__` to the module name.
isPreferredModuleForName(m.getFile(), module_name + ["", ".__init__"])
m = getModuleFromName(module_name)
or
// Module defined via `sys.modules`
m = sys_modules_module_with_name(module_name)
@@ -234,7 +254,7 @@ module ImportResolution {
ar.accesses(getModuleReference(p), attr_name) and
result = ar
|
isPreferredModuleForName(m.getFile(), p.getPackageName() + "." + attr_name + ["", ".__init__"])
m = getModuleFromName(p.getPackageName() + "." + attr_name)
)
or
// This is also true for attributes that come from reexports.
@@ -248,8 +268,7 @@ module ImportResolution {
exists(string submodule, Module package |
SsaSource::init_module_submodule_defn(result.asVar().getSourceVariable(),
package.getEntryNode()) and
isPreferredModuleForName(m.getFile(),
package.getPackageName() + "." + submodule + ["", ".__init__"])
m = getModuleFromName(package.getPackageName() + "." + submodule)
)
}

View File

@@ -60,22 +60,6 @@ string getPossibleContentName() {
result = any(DataFlowPublic::AttrRef a).getAttributeName()
}
/**
* Gets a callable for the call where `nodeFrom` is used as the `i`'th argument.
*
* Helper predicate to avoid bad join order experienced in `callStep`.
* This happened when `isParameterOf` was joined _before_ `getCallable`.
*/
pragma[nomagic]
private DataFlowPrivate::DataFlowCallable getCallableForArgument(
DataFlowPublic::ExtractedArgumentNode nodeFrom, int i
) {
exists(DataFlowPrivate::ExtractedDataFlowCall call |
nodeFrom.extractedArgumentOf(call, i) and
result = call.getCallable()
)
}
/**
* Holds if `nodeFrom` steps to `nodeTo` by being passed as a parameter in a call.
*
@@ -83,11 +67,15 @@ private DataFlowPrivate::DataFlowCallable getCallableForArgument(
* recursion (or, at best, terrible performance), since identifying calls to library
* methods is done using API graphs (which uses type tracking).
*/
predicate callStep(DataFlowPublic::ArgumentNode nodeFrom, DataFlowPrivate::ParameterNodeImpl nodeTo) {
// TODO: Support special methods?
exists(DataFlowPrivate::DataFlowCallable callable, int i |
callable = getCallableForArgument(nodeFrom, i) and
nodeTo.isParameterOf(callable, i)
predicate callStep(DataFlowPublic::ArgumentNode nodeFrom, DataFlowPublic::ParameterNode nodeTo) {
exists(
DataFlowPrivate::DataFlowCall call, DataFlowPrivate::DataFlowCallable callable,
DataFlowPrivate::ArgumentPosition apos, DataFlowPrivate::ParameterPosition ppos
|
nodeFrom = call.getArgument(apos) and
nodeTo = callable.getParameter(ppos) and
DataFlowPrivate::parameterMatch(ppos, apos) and
callable = call.getCallable()
)
}

View File

@@ -0,0 +1,64 @@
/**
* Provides classes for performing local (intra-procedural) and
* global (inter-procedural) taint-tracking analyses.
*/
import TaintTrackingParameter::Public
private import TaintTrackingParameter::Private
private module AddTaintDefaults<DataFlowInternal::FullStateConfigSig Config> implements
DataFlowInternal::FullStateConfigSig
{
import Config
predicate isBarrier(DataFlow::Node node) {
Config::isBarrier(node) or defaultTaintSanitizer(node)
}
predicate isAdditionalFlowStep(DataFlow::Node node1, DataFlow::Node node2) {
Config::isAdditionalFlowStep(node1, node2) or
defaultAdditionalTaintStep(node1, node2)
}
predicate allowImplicitRead(DataFlow::Node node, DataFlow::ContentSet c) {
Config::allowImplicitRead(node, c)
or
(
Config::isSink(node, _) or
Config::isAdditionalFlowStep(node, _) or
Config::isAdditionalFlowStep(node, _, _, _)
) and
defaultImplicitTaintRead(node, c)
}
}
/**
* Constructs a standard taint tracking computation.
*/
module Make<DataFlow::ConfigSig Config> implements DataFlow::DataFlowSig {
private module Config0 implements DataFlowInternal::FullStateConfigSig {
import DataFlowInternal::DefaultState<Config>
import Config
}
private module C implements DataFlowInternal::FullStateConfigSig {
import AddTaintDefaults<Config0>
}
import DataFlowInternal::Impl<C>
}
/**
* Constructs a taint tracking computation using flow state.
*/
module MakeWithState<DataFlow::StateConfigSig Config> implements DataFlow::DataFlowSig {
private module Config0 implements DataFlowInternal::FullStateConfigSig {
import Config
}
private module C implements DataFlowInternal::FullStateConfigSig {
import AddTaintDefaults<Config0>
}
import DataFlowInternal::Impl<C>
}

View File

@@ -2,5 +2,6 @@ import semmle.python.dataflow.new.internal.TaintTrackingPublic as Public
module Private {
import semmle.python.dataflow.new.DataFlow::DataFlow as DataFlow
import semmle.python.dataflow.new.internal.DataFlowImpl as DataFlowInternal
import semmle.python.dataflow.new.internal.TaintTrackingPrivate
}

View File

@@ -59,7 +59,8 @@ module AiohttpWebModel {
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `AiohttpRouteSetup::Range` instead.
*/
class AiohttpRouteSetup extends Http::Server::RouteSetup::Range instanceof AiohttpRouteSetup::Range {
class AiohttpRouteSetup extends Http::Server::RouteSetup::Range instanceof AiohttpRouteSetup::Range
{
override Parameter getARoutedParameter() { none() }
override string getFramework() { result = "aiohttp.web" }
@@ -252,7 +253,8 @@ module AiohttpWebModel {
}
/** A request handler defined in an `aiohttp.web` view class, that has no known route. */
private class AiohttpViewClassRequestHandlerWithoutKnownRoute extends Http::Server::RequestHandler::Range {
private class AiohttpViewClassRequestHandlerWithoutKnownRoute extends Http::Server::RequestHandler::Range
{
AiohttpViewClassRequestHandlerWithoutKnownRoute() {
exists(AiohttpViewClass vc | vc.getARequestHandler() = this) and
not exists(AiohttpRouteSetup setup | setup.getARequestHandler() = this)
@@ -440,7 +442,8 @@ module AiohttpWebModel {
* handler is invoked.
*/
class AiohttpRequestHandlerRequestParam extends Request::InstanceSource, RemoteFlowSource::Range,
DataFlow::ParameterNode {
DataFlow::ParameterNode
{
AiohttpRequestHandlerRequestParam() {
exists(Function requestHandler |
requestHandler = any(AiohttpCoroutineRouteSetup setup).getARequestHandler() and
@@ -470,7 +473,8 @@ module AiohttpWebModel {
* which is the request being processed currently.
*/
class AiohttpViewClassRequestAttributeRead extends Request::InstanceSource,
RemoteFlowSource::Range, DataFlow::Node {
RemoteFlowSource::Range, DataFlow::Node
{
AiohttpViewClassRequestAttributeRead() {
this.(DataFlow::AttrRead).getObject() = any(AiohttpViewClass vc).getASelfRef() and
this.(DataFlow::AttrRead).getAttributeName() = "request"
@@ -494,7 +498,8 @@ module AiohttpWebModel {
* - https://docs.aiohttp.org/en/stable/web_quickstart.html#aiohttp-web-exceptions
*/
class AiohttpWebResponseInstantiation extends Http::Server::HttpResponse::Range,
Response::InstanceSource, DataFlow::CallCfgNode {
Response::InstanceSource, DataFlow::CallCfgNode
{
API::Node apiNode;
AiohttpWebResponseInstantiation() {
@@ -562,7 +567,8 @@ module AiohttpWebModel {
* See the part about redirects at https://docs.aiohttp.org/en/stable/web_quickstart.html#aiohttp-web-exceptions
*/
class AiohttpRedirectExceptionInstantiation extends AiohttpWebResponseInstantiation,
Http::Server::HttpRedirectResponse::Range {
Http::Server::HttpRedirectResponse::Range
{
AiohttpRedirectExceptionInstantiation() {
exists(string httpRedirectExceptionClassName |
httpRedirectExceptionClassName in [
@@ -585,7 +591,8 @@ module AiohttpWebModel {
/**
* A call to `set_cookie` on a HTTP Response.
*/
class AiohttpResponseSetCookieCall extends Http::Server::CookieWrite::Range, DataFlow::CallCfgNode {
class AiohttpResponseSetCookieCall extends Http::Server::CookieWrite::Range, DataFlow::CallCfgNode
{
AiohttpResponseSetCookieCall() {
this = aiohttpResponseInstance().getMember("set_cookie").getACall()
}
@@ -600,7 +607,8 @@ module AiohttpWebModel {
/**
* A call to `del_cookie` on a HTTP Response.
*/
class AiohttpResponseDelCookieCall extends Http::Server::CookieWrite::Range, DataFlow::CallCfgNode {
class AiohttpResponseDelCookieCall extends Http::Server::CookieWrite::Range, DataFlow::CallCfgNode
{
AiohttpResponseDelCookieCall() {
this = aiohttpResponseInstance().getMember("del_cookie").getACall()
}

View File

@@ -23,7 +23,8 @@ private module CryptodomeModel {
* See https://pycryptodome.readthedocs.io/en/latest/src/public_key/rsa.html#Crypto.PublicKey.RSA.generate
*/
class CryptodomePublicKeyRsaGenerateCall extends Cryptography::PublicKey::KeyGeneration::RsaRange,
DataFlow::CallCfgNode {
DataFlow::CallCfgNode
{
CryptodomePublicKeyRsaGenerateCall() {
this =
API::moduleImport(["Crypto", "Cryptodome"])
@@ -44,7 +45,8 @@ private module CryptodomeModel {
* See https://pycryptodome.readthedocs.io/en/latest/src/public_key/dsa.html#Crypto.PublicKey.DSA.generate
*/
class CryptodomePublicKeyDsaGenerateCall extends Cryptography::PublicKey::KeyGeneration::DsaRange,
DataFlow::CallCfgNode {
DataFlow::CallCfgNode
{
CryptodomePublicKeyDsaGenerateCall() {
this =
API::moduleImport(["Crypto", "Cryptodome"])
@@ -65,7 +67,8 @@ private module CryptodomeModel {
* See https://pycryptodome.readthedocs.io/en/latest/src/public_key/ecc.html#Crypto.PublicKey.ECC.generate
*/
class CryptodomePublicKeyEccGenerateCall extends Cryptography::PublicKey::KeyGeneration::EccRange,
DataFlow::CallCfgNode {
DataFlow::CallCfgNode
{
CryptodomePublicKeyEccGenerateCall() {
this =
API::moduleImport(["Crypto", "Cryptodome"])
@@ -105,7 +108,8 @@ private module CryptodomeModel {
* A cryptographic operation on an instance from the `Cipher` subpackage of `Cryptodome`/`Crypto`.
*/
class CryptodomeGenericCipherOperation extends Cryptography::CryptographicOperation::Range,
DataFlow::CallCfgNode {
DataFlow::CallCfgNode
{
string methodName;
string cipherName;
API::CallNode newCall;
@@ -175,7 +179,8 @@ private module CryptodomeModel {
* A cryptographic operation on an instance from the `Signature` subpackage of `Cryptodome`/`Crypto`.
*/
class CryptodomeGenericSignatureOperation extends Cryptography::CryptographicOperation::Range,
DataFlow::CallCfgNode {
DataFlow::CallCfgNode
{
string methodName;
string signatureName;
@@ -214,7 +219,8 @@ private module CryptodomeModel {
* A cryptographic operation on an instance from the `Hash` subpackage of `Cryptodome`/`Crypto`.
*/
class CryptodomeGenericHashOperation extends Cryptography::CryptographicOperation::Range,
DataFlow::CallCfgNode {
DataFlow::CallCfgNode
{
string hashName;
CryptodomeGenericHashOperation() {

View File

@@ -82,7 +82,8 @@ private module CryptographyModel {
* See https://cryptography.io/en/latest/hazmat/primitives/asymmetric/rsa.html#cryptography.hazmat.primitives.asymmetric.rsa.generate_private_key
*/
class CryptographyRsaGeneratePrivateKeyCall extends Cryptography::PublicKey::KeyGeneration::RsaRange,
DataFlow::CallCfgNode {
DataFlow::CallCfgNode
{
CryptographyRsaGeneratePrivateKeyCall() {
this =
API::moduleImport("cryptography")
@@ -105,7 +106,8 @@ private module CryptographyModel {
* See https://cryptography.io/en/latest/hazmat/primitives/asymmetric/dsa.html#cryptography.hazmat.primitives.asymmetric.dsa.generate_private_key
*/
class CryptographyDsaGeneratePrivateKeyCall extends Cryptography::PublicKey::KeyGeneration::DsaRange,
DataFlow::CallCfgNode {
DataFlow::CallCfgNode
{
CryptographyDsaGeneratePrivateKeyCall() {
this =
API::moduleImport("cryptography")
@@ -128,7 +130,8 @@ private module CryptographyModel {
* See https://cryptography.io/en/latest/hazmat/primitives/asymmetric/ec.html#cryptography.hazmat.primitives.asymmetric.ec.generate_private_key
*/
class CryptographyEcGeneratePrivateKeyCall extends Cryptography::PublicKey::KeyGeneration::EccRange,
DataFlow::CallCfgNode {
DataFlow::CallCfgNode
{
CryptographyEcGeneratePrivateKeyCall() {
this =
API::moduleImport("cryptography")
@@ -204,7 +207,8 @@ private module CryptographyModel {
* An encrypt or decrypt operation from `cryptography.hazmat.primitives.ciphers`.
*/
class CryptographyGenericCipherOperation extends Cryptography::CryptographicOperation::Range,
DataFlow::MethodCallNode {
DataFlow::MethodCallNode
{
string algorithmName;
string modeName;
@@ -262,7 +266,8 @@ private module CryptographyModel {
* An hashing operation from `cryptography.hazmat.primitives.hashes`.
*/
class CryptographyGenericHashOperation extends Cryptography::CryptographicOperation::Range,
DataFlow::MethodCallNode {
DataFlow::MethodCallNode
{
string algorithmName;
CryptographyGenericHashOperation() {

View File

@@ -1271,7 +1271,8 @@ module PrivateDjango {
}
/** An attribute read on an django request that is a `MultiValueDict` instance. */
private class DjangoHttpRequestMultiValueDictInstances extends Django::MultiValueDict::InstanceSource {
private class DjangoHttpRequestMultiValueDictInstances extends Django::MultiValueDict::InstanceSource
{
DjangoHttpRequestMultiValueDictInstances() {
this.(DataFlow::AttrRead).getObject() = instance() and
this.(DataFlow::AttrRead).getAttributeName() in ["GET", "POST", "FILES"]
@@ -1279,7 +1280,8 @@ module PrivateDjango {
}
/** An attribute read on an django request that is a `ResolverMatch` instance. */
private class DjangoHttpRequestResolverMatchInstances extends Django::ResolverMatch::InstanceSource {
private class DjangoHttpRequestResolverMatchInstances extends Django::ResolverMatch::InstanceSource
{
DjangoHttpRequestResolverMatchInstances() {
this.(DataFlow::AttrRead).getObject() = instance() and
this.(DataFlow::AttrRead).getAttributeName() = "resolver_match"
@@ -1287,7 +1289,8 @@ module PrivateDjango {
}
/** An `UploadedFile` instance that originates from a django request. */
private class DjangoHttpRequestUploadedFileInstances extends Django::UploadedFile::InstanceSource {
private class DjangoHttpRequestUploadedFileInstances extends Django::UploadedFile::InstanceSource
{
DjangoHttpRequestUploadedFileInstances() {
// TODO: this currently only works in local-scope, since writing type-trackers for
// this is a little too much effort. Once API-graphs are available for more
@@ -1421,7 +1424,8 @@ module PrivateDjango {
* Use the predicate `HttpResponseRedirect::instance()` to get references to instances of `django.http.response.HttpResponseRedirect`.
*/
abstract class InstanceSource extends HttpResponse::InstanceSource,
Http::Server::HttpRedirectResponse::Range, DataFlow::Node { }
Http::Server::HttpRedirectResponse::Range, DataFlow::Node
{ }
/** A direct instantiation of `django.http.response.HttpResponseRedirect`. */
private class ClassInstantiation extends InstanceSource, DataFlow::CallCfgNode {
@@ -1483,7 +1487,8 @@ module PrivateDjango {
* Use the predicate `HttpResponsePermanentRedirect::instance()` to get references to instances of `django.http.response.HttpResponsePermanentRedirect`.
*/
abstract class InstanceSource extends HttpResponse::InstanceSource,
Http::Server::HttpRedirectResponse::Range, DataFlow::Node { }
Http::Server::HttpRedirectResponse::Range, DataFlow::Node
{ }
/** A direct instantiation of `django.http.response.HttpResponsePermanentRedirect`. */
private class ClassInstantiation extends InstanceSource, DataFlow::CallCfgNode {
@@ -2086,7 +2091,8 @@ module PrivateDjango {
*
* See https://docs.djangoproject.com/en/3.1/ref/request-response/#django.http.HttpResponse.write
*/
class HttpResponseWriteCall extends Http::Server::HttpResponse::Range, DataFlow::CallCfgNode {
class HttpResponseWriteCall extends Http::Server::HttpResponse::Range, DataFlow::CallCfgNode
{
DjangoImpl::DjangoHttp::Response::HttpResponse::InstanceSource instance;
HttpResponseWriteCall() { this.getFunction() = write(instance) }
@@ -2106,7 +2112,8 @@ module PrivateDjango {
* A call to `set_cookie` on a HTTP Response.
*/
class DjangoResponseSetCookieCall extends Http::Server::CookieWrite::Range,
DataFlow::MethodCallNode {
DataFlow::MethodCallNode
{
DjangoResponseSetCookieCall() {
this.calls(DjangoImpl::DjangoHttp::Response::HttpResponse::instance(), "set_cookie")
}
@@ -2126,7 +2133,8 @@ module PrivateDjango {
* A call to `delete_cookie` on a HTTP Response.
*/
class DjangoResponseDeleteCookieCall extends Http::Server::CookieWrite::Range,
DataFlow::MethodCallNode {
DataFlow::MethodCallNode
{
DjangoResponseDeleteCookieCall() {
this.calls(DjangoImpl::DjangoHttp::Response::HttpResponse::instance(), "delete_cookie")
}
@@ -2429,7 +2437,8 @@ module PrivateDjango {
/** A request handler defined in a django view class, that has no known route. */
private class DjangoViewClassHandlerWithoutKnownRoute extends Http::Server::RequestHandler::Range,
DjangoRouteHandler {
DjangoRouteHandler
{
DjangoViewClassHandlerWithoutKnownRoute() {
exists(DjangoViewClass vc | vc.getARequestHandler() = this) and
not exists(DjangoRouteSetup setup | setup.getARequestHandler() = this)
@@ -2587,7 +2596,8 @@ module PrivateDjango {
// ---------------------------------------------------------------------------
/** A parameter that will receive the django `HttpRequest` instance when a request handler is invoked. */
private class DjangoRequestHandlerRequestParam extends DjangoImpl::DjangoHttp::Request::HttpRequest::InstanceSource,
RemoteFlowSource::Range, DataFlow::ParameterNode {
RemoteFlowSource::Range, DataFlow::ParameterNode
{
DjangoRequestHandlerRequestParam() {
this.getParameter() = any(DjangoRouteSetup setup).getARequestHandler().getRequestParam()
or
@@ -2604,7 +2614,8 @@ module PrivateDjango {
* See https://docs.djangoproject.com/en/3.1/topics/class-based-views/generic-display/#dynamic-filtering
*/
private class DjangoViewClassRequestAttributeRead extends DjangoImpl::DjangoHttp::Request::HttpRequest::InstanceSource,
RemoteFlowSource::Range, DataFlow::Node {
RemoteFlowSource::Range, DataFlow::Node
{
DjangoViewClassRequestAttributeRead() {
exists(DataFlow::AttrRead read | this = read |
read.getObject() = any(DjangoViewClass vc).getASelfRef() and
@@ -2624,7 +2635,8 @@ module PrivateDjango {
* See https://docs.djangoproject.com/en/3.1/topics/class-based-views/generic-display/#dynamic-filtering
*/
private class DjangoViewClassRoutedParamsAttributeRead extends RemoteFlowSource::Range,
DataFlow::Node {
DataFlow::Node
{
DjangoViewClassRoutedParamsAttributeRead() {
exists(DataFlow::AttrRead read | this = read |
read.getObject() = any(DjangoViewClass vc).getASelfRef() and
@@ -2652,7 +2664,8 @@ module PrivateDjango {
* - https://docs.djangoproject.com/en/3.1/topics/http/file-uploads/#handling-uploaded-files-with-a-model
*/
private class DjangoFileFieldUploadToFunctionFilenameParam extends RemoteFlowSource::Range,
DataFlow::ParameterNode {
DataFlow::ParameterNode
{
DjangoFileFieldUploadToFunctionFilenameParam() {
exists(DataFlow::CallCfgNode call, DataFlow::Node uploadToArg, Function func |
this.getParameter() = func.getArg(1) and
@@ -2679,7 +2692,8 @@ module PrivateDjango {
* See https://docs.djangoproject.com/en/3.1/topics/http/shortcuts/#redirect
*/
private class DjangoShortcutsRedirectCall extends Http::Server::HttpRedirectResponse::Range,
DataFlow::CallCfgNode {
DataFlow::CallCfgNode
{
DjangoShortcutsRedirectCall() { this = DjangoImpl::Shortcuts::redirect().getACall() }
/**
@@ -2713,7 +2727,8 @@ module PrivateDjango {
* See https://docs.djangoproject.com/en/3.1/ref/class-based-views/base/#redirectview
*/
private class DjangoRedirectViewGetRedirectUrlReturn extends Http::Server::HttpRedirectResponse::Range,
DataFlow::CfgNode {
DataFlow::CfgNode
{
DjangoRedirectViewGetRedirectUrlReturn() {
node = any(GetRedirectUrlFunction f).getAReturnValueFlowNode()
}

View File

@@ -162,7 +162,8 @@ private module FabricV2 {
* - https://docs.fabfile.org/en/2.5/api/connection.html#fabric.connection.Connection.local
*/
private class FabricConnectionRunSudoLocalCall extends SystemCommandExecution::Range,
DataFlow::CallCfgNode {
DataFlow::CallCfgNode
{
FabricConnectionRunSudoLocalCall() {
this.getFunction() = Fabric::Connection::ConnectionClass::instanceRunMethods()
}
@@ -187,7 +188,8 @@ private module FabricV2 {
}
class FabricTaskFirstParamConnectionInstance extends Fabric::Connection::ConnectionClass::InstanceSource,
DataFlow::ParameterNode {
DataFlow::ParameterNode
{
FabricTaskFirstParamConnectionInstance() {
exists(Function func |
func.getADecorator() = Fabric::Tasks::task().getAValueReachableFromSource().asExpr() and

View File

@@ -88,7 +88,8 @@ private module FastApi {
* Pydantic model.
*/
private class PydanticModelRequestHandlerParam extends Pydantic::BaseModel::InstanceSource,
DataFlow::ParameterNode {
DataFlow::ParameterNode
{
PydanticModelRequestHandlerParam() {
this.getParameter().getAnnotation() =
Pydantic::BaseModel::subclassRef().getAValueReachableFromSource().asExpr() and
@@ -103,7 +104,8 @@ private module FastApi {
* A parameter to a request handler that has a WebSocket type-annotation.
*/
private class WebSocketRequestHandlerParam extends Starlette::WebSocket::InstanceSource,
DataFlow::ParameterNode {
DataFlow::ParameterNode
{
WebSocketRequestHandlerParam() {
this.getParameter().getAnnotation() =
Starlette::WebSocket::classRef().getAValueReachableFromSource().asExpr() and
@@ -196,7 +198,8 @@ private module FastApi {
/** A direct instantiation of a response class. */
private class ResponseInstantiation extends InstanceSource, Http::Server::HttpResponse::Range,
DataFlow::CallCfgNode {
DataFlow::CallCfgNode
{
API::Node baseApiNode;
API::Node responseClass;
@@ -223,7 +226,8 @@ private module FastApi {
* A direct instantiation of a redirect response.
*/
private class RedirectResponseInstantiation extends ResponseInstantiation,
Http::Server::HttpRedirectResponse::Range {
Http::Server::HttpRedirectResponse::Range
{
RedirectResponseInstantiation() { baseApiNode = getModeledResponseClass("RedirectResponse") }
override DataFlow::Node getRedirectLocation() {
@@ -246,7 +250,8 @@ private module FastApi {
* An implicit response from a return of FastAPI request handler.
*/
private class FastApiRequestHandlerReturn extends Http::Server::HttpResponse::Range,
DataFlow::CfgNode {
DataFlow::CfgNode
{
FastApiRouteSetup routeSetup;
FastApiRequestHandlerReturn() {
@@ -273,7 +278,8 @@ private module FastApi {
* `response_class` set to a `FileResponse`.
*/
private class FastApiRequestHandlerFileResponseReturn extends FastApiRequestHandlerReturn,
FileSystemAccess::Range {
FileSystemAccess::Range
{
FastApiRequestHandlerFileResponseReturn() {
exists(API::Node responseClass |
responseClass.getAValueReachableFromSource() = routeSetup.getResponseClassArg() and
@@ -291,7 +297,8 @@ private module FastApi {
* `response_class` set to a `RedirectResponse`.
*/
private class FastApiRequestHandlerRedirectReturn extends FastApiRequestHandlerReturn,
Http::Server::HttpRedirectResponse::Range {
Http::Server::HttpRedirectResponse::Range
{
FastApiRequestHandlerRedirectReturn() {
exists(API::Node responseClass |
responseClass.getAValueReachableFromSource() = routeSetup.getResponseClassArg() and
@@ -349,7 +356,8 @@ private module FastApi {
* header-key.
*/
private class HeadersAppendCookie extends Http::Server::CookieWrite::Range,
DataFlow::MethodCallNode {
DataFlow::MethodCallNode
{
HeadersAppendCookie() {
exists(DataFlow::AttrRead headers, DataFlow::Node keyArg |
headers.accesses(instance(), "headers") and

View File

@@ -447,7 +447,8 @@ module Flask {
// ---------------------------------------------------------------------------
// Implicit response from returns of flask request handlers
// ---------------------------------------------------------------------------
private class FlaskRouteHandlerReturn extends Http::Server::HttpResponse::Range, DataFlow::CfgNode {
private class FlaskRouteHandlerReturn extends Http::Server::HttpResponse::Range, DataFlow::CfgNode
{
FlaskRouteHandlerReturn() {
exists(Function routeHandler |
routeHandler = any(FlaskRouteSetup rs).getARequestHandler() and
@@ -471,7 +472,8 @@ module Flask {
* See https://flask.palletsprojects.com/en/1.1.x/api/#flask.redirect
*/
private class FlaskRedirectCall extends Http::Server::HttpRedirectResponse::Range,
DataFlow::CallCfgNode {
DataFlow::CallCfgNode
{
FlaskRedirectCall() { this = API::moduleImport("flask").getMember("redirect").getACall() }
override DataFlow::Node getRedirectLocation() {
@@ -499,7 +501,8 @@ module Flask {
* See https://flask.palletsprojects.com/en/2.0.x/api/#flask.Response.set_cookie
*/
class FlaskResponseSetCookieCall extends Http::Server::CookieWrite::Range,
DataFlow::MethodCallNode {
DataFlow::MethodCallNode
{
FlaskResponseSetCookieCall() { this.calls(Flask::Response::instance(), "set_cookie") }
override DataFlow::Node getHeaderArg() { none() }
@@ -515,7 +518,8 @@ module Flask {
* See https://flask.palletsprojects.com/en/2.0.x/api/#flask.Response.delete_cookie
*/
class FlaskResponseDeleteCookieCall extends Http::Server::CookieWrite::Range,
DataFlow::MethodCallNode {
DataFlow::MethodCallNode
{
FlaskResponseDeleteCookieCall() { this.calls(Flask::Response::instance(), "delete_cookie") }
override DataFlow::Node getHeaderArg() { none() }

View File

@@ -307,7 +307,8 @@ private module Lxml {
* - https://lxml.de/apidoc/lxml.etree.html?highlight=parseids#lxml.etree.iterparse
*/
private class LxmlIterparseCall extends API::CallNode, XML::XmlParsing::Range,
FileSystemAccess::Range {
FileSystemAccess::Range
{
LxmlIterparseCall() {
this = API::moduleImport("lxml").getMember("etree").getMember("iterparse").getACall()
}

View File

@@ -101,7 +101,8 @@ private module MarkupSafeModel {
/** A call to any of the escaping functions in `markupsafe` */
private class MarkupSafeEscapeCall extends Markup::InstanceSource, MarkupSafeEscape,
DataFlow::CallCfgNode {
DataFlow::CallCfgNode
{
MarkupSafeEscapeCall() {
this = API::moduleImport("markupsafe").getMember(["escape", "escape_silent"]).getACall()
or
@@ -141,7 +142,8 @@ private module MarkupSafeModel {
/** A escape from %-style string format with `markupsafe.Markup` as the format string. */
private class MarkupEscapeFromPercentStringFormat extends MarkupSafeEscape,
Markup::PercentStringFormat {
Markup::PercentStringFormat
{
override DataFlow::Node getAnInput() {
result.asCfgNode() = node.getRight() and
not result = Markup::instance()

View File

@@ -164,7 +164,8 @@ private module Peewee {
* https://docs.peewee-orm.com/en/latest/peewee/api.html#Database.connection.
*/
class PeeweeDatabaseConnectionCall extends PEP249::Connection::InstanceSource,
DataFlow::CallCfgNode {
DataFlow::CallCfgNode
{
PeeweeDatabaseConnectionCall() {
this = Database::instance().getMember("connection").getACall()
}

View File

@@ -159,7 +159,8 @@ private module RestFramework {
* known route setup.
*/
class RestFrameworkFunctionBasedViewWithoutKnownRoute extends Http::Server::RequestHandler::Range,
PrivateDjango::DjangoRouteHandler instanceof RestFrameworkFunctionBasedView {
PrivateDjango::DjangoRouteHandler instanceof RestFrameworkFunctionBasedView
{
RestFrameworkFunctionBasedViewWithoutKnownRoute() {
not exists(PrivateDjango::DjangoRouteSetup setup | setup.getARequestHandler() = this)
}
@@ -183,7 +184,8 @@ private module RestFramework {
* request handler is invoked.
*/
private class RestFrameworkRequestHandlerRequestParam extends Request::InstanceSource,
RemoteFlowSource::Range, DataFlow::ParameterNode {
RemoteFlowSource::Range, DataFlow::ParameterNode
{
RestFrameworkRequestHandlerRequestParam() {
// rest_framework.views.APIView subclass
exists(RestFrameworkApiViewClass vc |
@@ -220,8 +222,8 @@ private module RestFramework {
*
* Use the predicate `Request::instance()` to get references to instances of `rest_framework.request.Request`.
*/
abstract class InstanceSource extends PrivateDjango::DjangoImpl::DjangoHttp::Request::HttpRequest::InstanceSource {
}
abstract class InstanceSource extends PrivateDjango::DjangoImpl::DjangoHttp::Request::HttpRequest::InstanceSource
{ }
/** A direct instantiation of `rest_framework.request.Request`. */
private class ClassInstantiation extends InstanceSource, DataFlow::CallCfgNode {
@@ -297,7 +299,8 @@ private module RestFramework {
/** A direct instantiation of `rest_framework.response.Response`. */
private class ClassInstantiation extends PrivateDjango::DjangoImpl::DjangoHttp::Response::HttpResponse::InstanceSource,
DataFlow::CallCfgNode {
DataFlow::CallCfgNode
{
ClassInstantiation() { this = classRef().getACall() }
override DataFlow::Node getBody() { result in [this.getArg(0), this.getArgByName("data")] }
@@ -321,7 +324,8 @@ private module RestFramework {
module ApiException {
/** A direct instantiation of `rest_framework.exceptions.ApiException` or subclass. */
private class ClassInstantiation extends Http::Server::HttpResponse::Range,
DataFlow::CallCfgNode {
DataFlow::CallCfgNode
{
string className;
ClassInstantiation() {

View File

@@ -20,7 +20,8 @@ private module Rsa {
* See https://stuvel.eu/python-rsa-doc/reference.html#rsa.newkeys
*/
class RsaNewkeysCall extends Cryptography::PublicKey::KeyGeneration::RsaRange,
DataFlow::CallCfgNode {
DataFlow::CallCfgNode
{
RsaNewkeysCall() { this = API::moduleImport("rsa").getMember("newkeys").getACall() }
override DataFlow::Node getKeySizeArg() {
@@ -116,7 +117,8 @@ private module Rsa {
* See https://stuvel.eu/python-rsa-doc/reference.html#rsa.compute_hash
*/
class RsaComputeHashCall extends Cryptography::CryptographicOperation::Range,
DataFlow::CallCfgNode {
DataFlow::CallCfgNode
{
RsaComputeHashCall() { this = API::moduleImport("rsa").getMember("compute_hash").getACall() }
override Cryptography::CryptographicAlgorithm getAlgorithm() {

View File

@@ -152,7 +152,8 @@ module Starlette {
}
/** An attribute read on a `starlette.requests.URL` instance that is a `urllib.parse.SplitResult` instance. */
private class UrlSplitInstances extends Stdlib::SplitResult::InstanceSource instanceof DataFlow::AttrRead {
private class UrlSplitInstances extends Stdlib::SplitResult::InstanceSource instanceof DataFlow::AttrRead
{
UrlSplitInstances() {
super.getObject() = instance() and
super.getAttributeName() = "components"

View File

@@ -1098,7 +1098,8 @@ private module StdlibPrivate {
* See https://docs.python.org/3.8/library/os.html#os.execl
*/
private class OsExecCall extends SystemCommandExecution::Range, FileSystemAccess::Range,
DataFlow::CallCfgNode {
DataFlow::CallCfgNode
{
OsExecCall() {
exists(string name |
name in ["execl", "execle", "execlp", "execlpe", "execv", "execve", "execvp", "execvpe"] and
@@ -1120,7 +1121,8 @@ private module StdlibPrivate {
* See https://docs.python.org/3.8/library/os.html#os.spawnl
*/
private class OsSpawnCall extends SystemCommandExecution::Range, FileSystemAccess::Range,
DataFlow::CallCfgNode {
DataFlow::CallCfgNode
{
OsSpawnCall() {
exists(string name |
name in [
@@ -1150,7 +1152,8 @@ private module StdlibPrivate {
* See https://docs.python.org/3.8/library/os.html#os.posix_spawn
*/
private class OsPosixSpawnCall extends SystemCommandExecution::Range, FileSystemAccess::Range,
DataFlow::CallCfgNode {
DataFlow::CallCfgNode
{
OsPosixSpawnCall() { this = os().getMember(["posix_spawn", "posix_spawnp"]).getACall() }
override DataFlow::Node getCommand() { result in [this.getArg(0), this.getArgByName("path")] }
@@ -1361,7 +1364,8 @@ private module StdlibPrivate {
* argument as being deserialized...
*/
private class ShelveOpenCall extends Decoding::Range, FileSystemAccess::Range,
DataFlow::CallCfgNode {
DataFlow::CallCfgNode
{
ShelveOpenCall() { this = API::moduleImport("shelve").getMember("open").getACall() }
override predicate mayExecuteInput() { any() }
@@ -1469,7 +1473,8 @@ private module StdlibPrivate {
* See https://docs.python.org/3/library/functions.html#open
*/
private class OpenCall extends FileSystemAccess::Range, Stdlib::FileLikeObject::InstanceSource,
DataFlow::CallCfgNode {
DataFlow::CallCfgNode
{
OpenCall() { this = getOpenFunctionRef().getACall() }
override DataFlow::Node getAPathArgument() {
@@ -1482,7 +1487,19 @@ private module StdlibPrivate {
t.start() and
result = openCall and
(
openCall instanceof OpenCall
openCall instanceof OpenCall and
// don't include the open call inside of Path.open in pathlib.py since
// the call to `path_obj.open` is covered by `PathLibOpenCall`.
not exists(Module mod, Class cls, Function func |
openCall.(OpenCall).asCfgNode().getScope() = func and
func.getName() = "open" and
func.getScope() = cls and
cls.getName() = "Path" and
cls.getScope() = mod and
mod.getName() = "pathlib" and
// do allow this call if we're analyzing pathlib.py as part of CPython though
not exists(mod.getFile().getRelativePath())
)
or
openCall instanceof PathLibOpenCall
)
@@ -1717,7 +1734,8 @@ private module StdlibPrivate {
* if it turns out to be a problem, we'll have to refine.
*/
private class ClassInstantiation extends InstanceSource, RemoteFlowSource::Range,
DataFlow::CallCfgNode {
DataFlow::CallCfgNode
{
ClassInstantiation() { this = classRef().getACall() }
override string getSourceType() { result = "cgi.FieldStorage" }
@@ -1975,7 +1993,8 @@ private module StdlibPrivate {
abstract class InstanceSource extends DataFlow::Node { }
/** The `self` parameter in a method on the `BaseHttpRequestHandler` class or any subclass. */
private class SelfParam extends InstanceSource, RemoteFlowSource::Range, DataFlow::ParameterNode {
private class SelfParam extends InstanceSource, RemoteFlowSource::Range, DataFlow::ParameterNode
{
SelfParam() {
exists(HttpRequestHandlerClassDef cls | cls.getAMethod().getArg(0) = this.getParameter())
}
@@ -2013,14 +2032,16 @@ private module StdlibPrivate {
}
/** An `HttpMessage` instance that originates from a `BaseHttpRequestHandler` instance. */
private class BaseHttpRequestHandlerHeadersInstances extends Stdlib::HttpMessage::InstanceSource {
private class BaseHttpRequestHandlerHeadersInstances extends Stdlib::HttpMessage::InstanceSource
{
BaseHttpRequestHandlerHeadersInstances() {
this.(DataFlow::AttrRead).accesses(instance(), "headers")
}
}
/** A file-like object that originates from a `BaseHttpRequestHandler` instance. */
private class BaseHttpRequestHandlerFileLikeObjectInstances extends Stdlib::FileLikeObject::InstanceSource {
private class BaseHttpRequestHandlerFileLikeObjectInstances extends Stdlib::FileLikeObject::InstanceSource
{
BaseHttpRequestHandlerFileLikeObjectInstances() {
this.(DataFlow::AttrRead).accesses(instance(), "rfile")
}
@@ -2172,7 +2193,8 @@ private module StdlibPrivate {
* See https://github.com/python/cpython/blob/b567b9d74bd9e476a3027335873bb0508d6e450f/Lib/wsgiref/handlers.py#L276
*/
class WsgirefSimpleServerApplicationWriteCall extends Http::Server::HttpResponse::Range,
DataFlow::CallCfgNode {
DataFlow::CallCfgNode
{
WsgirefSimpleServerApplicationWriteCall() { this.getFunction() = writeFunction() }
override DataFlow::Node getBody() { result in [this.getArg(0), this.getArgByName("data")] }
@@ -2186,7 +2208,8 @@ private module StdlibPrivate {
* A return from a `WsgirefSimpleServerApplication`, which is included in the response body.
*/
class WsgirefSimpleServerApplicationReturn extends Http::Server::HttpResponse::Range,
DataFlow::CfgNode {
DataFlow::CfgNode
{
WsgirefSimpleServerApplicationReturn() {
exists(WsgirefSimpleServerApplication requestHandler |
node = requestHandler.getAReturnValueFlowNode()
@@ -2297,7 +2320,8 @@ private module StdlibPrivate {
/** A call to the `getresponse` method. */
private class HttpConnectionGetResponseCall extends DataFlow::MethodCallNode,
HttpResponse::InstanceSource {
HttpResponse::InstanceSource
{
HttpConnectionGetResponseCall() { this.calls(instance(_), "getresponse") }
}
@@ -2356,7 +2380,8 @@ private module StdlibPrivate {
* Use the predicate `HTTPResponse::instance()` to get references to instances of `http.client.HTTPResponse`.
*/
abstract class InstanceSource extends Stdlib::FileLikeObject::InstanceSource,
DataFlow::LocalSourceNode { }
DataFlow::LocalSourceNode
{ }
/** A direct instantiation of `http.client.HttpResponse`. */
private class ClassInstantiation extends InstanceSource, DataFlow::CallCfgNode {
@@ -2686,6 +2711,7 @@ private module StdlibPrivate {
HashlibNewCall() {
this = hashlibNewCall(hashName) and
// we only want to consider it as an cryptographic operation if the input is available
exists(this.getParameter(1, "data"))
}
@@ -2726,7 +2752,8 @@ private module StdlibPrivate {
* `HashlibNewCall` and `HashlibNewUpdateCall`.
*/
abstract class HashlibGenericHashOperation extends Cryptography::CryptographicOperation::Range,
DataFlow::CallCfgNode {
DataFlow::CallCfgNode
{
string hashName;
API::Node hashClass;
@@ -2768,6 +2795,79 @@ private module StdlibPrivate {
}
}
// ---------------------------------------------------------------------------
// hmac
// ---------------------------------------------------------------------------
abstract class HmacCryptographicOperation extends Cryptography::CryptographicOperation::Range,
API::CallNode
{
abstract API::Node getDigestArg();
override Cryptography::CryptographicAlgorithm getAlgorithm() {
exists(string algorithmName | result.matchesName(algorithmName) |
this.getDigestArg().asSink() = hashlibMember(algorithmName).asSource()
or
this.getDigestArg().getAValueReachingSink().asExpr().(StrConst).getText() = algorithmName
)
}
override Cryptography::BlockMode getBlockMode() { none() }
}
API::CallNode getHmacConstructorCall(API::Node digestArg) {
result = API::moduleImport("hmac").getMember(["new", "HMAC"]).getACall() and
digestArg = result.getParameter(2, "digestmod")
}
/**
* A call to `hmac.new`/`hmac.HMAC`.
*
* See https://docs.python.org/3.11/library/hmac.html#hmac.new
*/
class HmacNewCall extends HmacCryptographicOperation {
API::Node digestArg;
HmacNewCall() {
this = getHmacConstructorCall(digestArg) and
// we only want to consider it as an cryptographic operation if the input is available
exists(this.getParameter(1, "msg").asSink())
}
override API::Node getDigestArg() { result = digestArg }
override DataFlow::Node getAnInput() { result = this.getParameter(1, "msg").asSink() }
}
/**
* A call to `.update` on an HMAC object.
*
* See https://docs.python.org/3.11/library/hmac.html#hmac.HMAC.update
*/
class HmacUpdateCall extends HmacCryptographicOperation {
API::Node digestArg;
HmacUpdateCall() {
this = getHmacConstructorCall(digestArg).getReturn().getMember("update").getACall()
}
override API::Node getDigestArg() { result = digestArg }
override DataFlow::Node getAnInput() { result = this.getParameter(0, "msg").asSink() }
}
/**
* A call to `hmac.digest`.
*
* See https://docs.python.org/3.11/library/hmac.html#hmac.digest
*/
class HmacDigestCall extends HmacCryptographicOperation {
HmacDigestCall() { this = API::moduleImport("hmac").getMember("digest").getACall() }
override API::Node getDigestArg() { result = this.getParameter(2, "digest") }
override DataFlow::Node getAnInput() { result = this.getParameter(1, "msg").asSink() }
}
// ---------------------------------------------------------------------------
// logging
// ---------------------------------------------------------------------------
@@ -2928,7 +3028,8 @@ private module StdlibPrivate {
}
/** Extra taint-step such that the result of `urllib.parse.urlsplit(tainted_string)` is tainted. */
private class UrllibParseUrlsplitCallAdditionalTaintStep extends TaintTracking::AdditionalTaintStep {
private class UrllibParseUrlsplitCallAdditionalTaintStep extends TaintTracking::AdditionalTaintStep
{
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
nodeTo.(UrllibParseUrlsplitCall).getUrl() = nodeFrom
}
@@ -2959,7 +3060,8 @@ private module StdlibPrivate {
* See https://docs.python.org/3/library/tempfile.html#tempfile.NamedTemporaryFile
*/
private class TempfileNamedTemporaryFileCall extends FileSystemAccess::Range,
DataFlow::CallCfgNode {
DataFlow::CallCfgNode
{
TempfileNamedTemporaryFileCall() {
this = API::moduleImport("tempfile").getMember("NamedTemporaryFile").getACall()
}
@@ -2996,7 +3098,8 @@ private module StdlibPrivate {
* See https://docs.python.org/3/library/tempfile.html#tempfile.SpooledTemporaryFile
*/
private class TempfileSpooledTemporaryFileCall extends FileSystemAccess::Range,
DataFlow::CallCfgNode {
DataFlow::CallCfgNode
{
TempfileSpooledTemporaryFileCall() {
this = API::moduleImport("tempfile").getMember("SpooledTemporaryFile").getACall()
}
@@ -3031,7 +3134,8 @@ private module StdlibPrivate {
* See https://docs.python.org/3/library/tempfile.html#tempfile.TemporaryDirectory
*/
private class TempfileTemporaryDirectoryCall extends FileSystemAccess::Range,
DataFlow::CallCfgNode {
DataFlow::CallCfgNode
{
TempfileTemporaryDirectoryCall() {
this = API::moduleImport("tempfile").getMember("TemporaryDirectory").getACall()
}
@@ -3488,7 +3592,8 @@ private module StdlibPrivate {
* See https://docs.python.org/3/library/xml.sax.reader.html#xml.sax.xmlreader.XMLReader.parse
*/
private class XmlSaxInstanceParsing extends DataFlow::MethodCallNode, XML::XmlParsing::Range,
FileSystemAccess::Range {
FileSystemAccess::Range
{
XmlSaxInstanceParsing() {
this =
API::moduleImport("xml")

View File

@@ -200,7 +200,8 @@ module Tornado {
override string getAsyncMethodName() { none() }
}
private class RequestAttrAccess extends TornadoModule::HttpUtil::HttpServerRequest::InstanceSource {
private class RequestAttrAccess extends TornadoModule::HttpUtil::HttpServerRequest::InstanceSource
{
RequestAttrAccess() {
this.(DataFlow::AttrRead).getObject() = instance() and
this.(DataFlow::AttrRead).getAttributeName() = "request"
@@ -463,7 +464,8 @@ module Tornado {
* See https://www.tornadoweb.org/en/stable/web.html#tornado.web.RequestHandler.redirect
*/
private class TornadoRequestHandlerRedirectCall extends Http::Server::HttpRedirectResponse::Range,
DataFlow::CallCfgNode {
DataFlow::CallCfgNode
{
TornadoRequestHandlerRedirectCall() {
this.getFunction() = TornadoModule::Web::RequestHandler::redirectMethod()
}
@@ -485,7 +487,8 @@ module Tornado {
* See https://www.tornadoweb.org/en/stable/web.html#tornado.web.RequestHandler.write
*/
private class TornadoRequestHandlerWriteCall extends Http::Server::HttpResponse::Range,
DataFlow::CallCfgNode {
DataFlow::CallCfgNode
{
TornadoRequestHandlerWriteCall() {
this.getFunction() = TornadoModule::Web::RequestHandler::writeMethod()
}
@@ -503,7 +506,8 @@ module Tornado {
* See https://www.tornadoweb.org/en/stable/web.html#tornado.web.RequestHandler.set_cookie
*/
class TornadoRequestHandlerSetCookieCall extends Http::Server::CookieWrite::Range,
DataFlow::MethodCallNode {
DataFlow::MethodCallNode
{
TornadoRequestHandlerSetCookieCall() {
this.calls(TornadoModule::Web::RequestHandler::instance(), "set_cookie")
}

View File

@@ -143,7 +143,8 @@ private module Twisted {
* when a twisted request handler is called.
*/
class TwistedResourceRequestHandlerRequestParam extends RemoteFlowSource::Range,
Request::InstanceSource, DataFlow::ParameterNode {
Request::InstanceSource, DataFlow::ParameterNode
{
TwistedResourceRequestHandlerRequestParam() {
this.getParameter() = any(TwistedResourceRequestHandler handler).getRequestParameter()
}
@@ -156,7 +157,8 @@ private module Twisted {
* that is also given remote user input. (a bit like RoutedParameter).
*/
class TwistedResourceRequestHandlerExtraSources extends RemoteFlowSource::Range,
DataFlow::ParameterNode {
DataFlow::ParameterNode
{
TwistedResourceRequestHandlerExtraSources() {
exists(TwistedResourceRequestHandler func, int i |
func.getName() in ["getChild", "getChildWithDefault"] and i = 1
@@ -177,7 +179,8 @@ private module Twisted {
* Implicit response from returns of render methods.
*/
private class TwistedResourceRenderMethodReturn extends Http::Server::HttpResponse::Range,
DataFlow::CfgNode {
DataFlow::CfgNode
{
TwistedResourceRenderMethodReturn() {
this.asCfgNode() = any(TwistedResourceRenderMethod meth).getAReturnValueFlowNode()
}
@@ -212,7 +215,8 @@ private module Twisted {
* See https://twistedmatrix.com/documents/21.2.0/api/twisted.web.http.Request.html#redirect
*/
class TwistedRequestRedirectCall extends Http::Server::HttpRedirectResponse::Range,
DataFlow::MethodCallNode {
DataFlow::MethodCallNode
{
TwistedRequestRedirectCall() { this.calls(Request::instance(), "redirect") }
override DataFlow::Node getBody() { none() }
@@ -232,7 +236,8 @@ private module Twisted {
* See https://twistedmatrix.com/documents/21.2.0/api/twisted.web.http.Request.html#addCookie
*/
class TwistedRequestAddCookieCall extends Http::Server::CookieWrite::Range,
DataFlow::MethodCallNode {
DataFlow::MethodCallNode
{
TwistedRequestAddCookieCall() { this.calls(Twisted::Request::instance(), "addCookie") }
override DataFlow::Node getHeaderArg() { none() }
@@ -248,7 +253,8 @@ private module Twisted {
* See https://twistedmatrix.com/documents/21.2.0/api/twisted.web.http.Request.html#cookies
*/
class TwistedRequestCookiesAppendCall extends Http::Server::CookieWrite::Range,
DataFlow::MethodCallNode {
DataFlow::MethodCallNode
{
TwistedRequestCookiesAppendCall() {
exists(DataFlow::AttrRead cookiesLookup |
cookiesLookup.getObject() = Twisted::Request::instance() and

View File

@@ -83,7 +83,8 @@ module Werkzeug {
// possible to do storage.read() instead of the long form storage.stream.read(). So
// that's why InstanceSource also extends `Stdlib::FileLikeObject::InstanceSource`
abstract class InstanceSource extends Stdlib::FileLikeObject::InstanceSource,
DataFlow::LocalSourceNode { }
DataFlow::LocalSourceNode
{ }
/** Gets a reference to an instance of `werkzeug.datastructures.FileStorage`. */
private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t) {

View File

@@ -93,6 +93,8 @@ module Stages {
exists(PyFlow::DefinitionNode b)
or
exists(any(PyFlow::SequenceNode n).getElement(_))
or
exists(any(PyFlow::ControlFlowNode c).toString())
}
}
@@ -125,6 +127,45 @@ module Stages {
}
}
/**
* The points-to stage.
*/
cached
module PointsTo {
/**
* Always holds.
* Ensures that a predicate is evaluated as part of the points-to stage.
*/
cached
predicate ref() { 1 = 1 }
private import semmle.python.pointsto.Base as PointsToBase
private import semmle.python.types.Object as TypeObject
private import semmle.python.objects.TObject as TObject
private import semmle.python.objects.ObjectInternal as ObjectInternal
// have to alias since this module is also called PointsTo
private import semmle.python.pointsto.PointsTo as RealPointsTo
/**
* DONT USE!
* Contains references to each predicate that use the above `ref` predicate.
*/
cached
predicate backref() {
1 = 1
or
PointsToBase::BaseFlow::scope_entry_value_transfer_from_earlier(_, _, _, _)
or
exists(TypeObject::Object a)
or
exists(TObject::TObject f)
or
exists(any(ObjectInternal::ObjectInternal o).toString())
or
RealPointsTo::AttributePointsTo::variableAttributePointsTo(_, _, _, _, _)
}
}
/**
* The `dataflow` stage.
*/
@@ -138,14 +179,9 @@ module Stages {
predicate ref() { 1 = 1 }
private import semmle.python.dataflow.new.internal.DataFlowPublic as DataFlowPublic
private import semmle.python.dataflow.new.internal.DataFlowDispatch as DataFlowDispatch
private import semmle.python.dataflow.new.internal.LocalSources as LocalSources
private import semmle.python.internal.Awaited as Awaited
private import semmle.python.pointsto.Base as PointsToBase
private import semmle.python.types.Object as TypeObject
private import semmle.python.objects.TObject as TObject
private import semmle.python.Flow as Flow
private import semmle.python.objects.ObjectInternal as ObjectInternal
private import semmle.python.pointsto.PointsTo as PointsTo
/**
* DONT USE!
@@ -159,21 +195,13 @@ module Stages {
or
any(DataFlowPublic::Node node).hasLocationInfo(_, _, _, _, _)
or
DataFlowDispatch::resolveCall(_, _, _)
or
DataFlowDispatch::getCallArg(_, _, _, _, _)
or
any(LocalSources::LocalSourceNode n).flowsTo(_)
or
exists(Awaited::awaited(_))
or
PointsToBase::BaseFlow::scope_entry_value_transfer_from_earlier(_, _, _, _)
or
exists(TypeObject::Object a)
or
exists(TObject::TObject f)
or
exists(any(Flow::ControlFlowNode c).toString())
or
exists(any(ObjectInternal::ObjectInternal o).toString())
or
PointsTo::AttributePointsTo::variableAttributePointsTo(_, _, _, _, _)
}
}
}

View File

@@ -45,8 +45,12 @@ module Cryptography {
/**
* Gets the block mode used to perform this cryptographic operation.
* This may have no result - for example if the `CryptographicAlgorithm` used
* is a stream cipher rather than a block cipher.
*
* This predicate is only expected to have a result if two conditions hold:
* 1. The operation is an encryption operation, i.e. the algorithm used is an `EncryptionAlgorithm`, and
* 2. The algorithm used is a block cipher (not a stream cipher).
*
* If either of these conditions do not hold, then this predicate should have no result.
*/
BlockMode getBlockMode() { result = super.getBlockMode() }
}
@@ -69,8 +73,12 @@ module Cryptography {
/**
* Gets the block mode used to perform this cryptographic operation.
* This may have no result - for example if the `CryptographicAlgorithm` used
* is a stream cipher rather than a block cipher.
*
* This predicate is only expected to have a result if two conditions hold:
* 1. The operation is an encryption operation, i.e. the algorithm used is an `EncryptionAlgorithm`, and
* 2. The algorithm used is a block cipher (not a stream cipher).
*
* If either of these conditions do not hold, then this predicate should have no result.
*/
abstract BlockMode getBlockMode();
}
@@ -81,10 +89,21 @@ module Cryptography {
* data of arbitrary length using a block encryption algorithm.
*/
class BlockMode extends string {
BlockMode() { this = ["ECB", "CBC", "GCM", "CCM", "CFB", "OFB", "CTR", "OPENPGP"] }
BlockMode() {
this =
[
"ECB", "CBC", "GCM", "CCM", "CFB", "OFB", "CTR", "OPENPGP",
"XTS", // https://csrc.nist.gov/publications/detail/sp/800-38e/final
"EAX" // https://en.wikipedia.org/wiki/EAX_mode
]
}
/** Holds if this block mode is considered to be insecure. */
predicate isWeak() { this = "ECB" }
/** Holds if the given string appears to match this block mode. */
bindingset[s]
predicate matchesString(string s) { s.toUpperCase().matches("%" + this + "%") }
}
}

View File

@@ -216,7 +216,7 @@ class BuiltinOpaqueObjectInternal extends ObjectInternal, TBuiltinOpaqueObject {
override Builtin getBuiltin() { this = TBuiltinOpaqueObject(result) }
override string toString() {
Stages::DataFlow::ref() and
Stages::PointsTo::ref() and
result = this.getBuiltin().getClass().getName() + " object"
}

View File

@@ -318,7 +318,7 @@ module BaseFlow {
predicate scope_entry_value_transfer_from_earlier(
EssaVariable pred_var, Scope pred_scope, ScopeEntryDefinition succ_def, Scope succ_scope
) {
Stages::DataFlow::ref() and
Stages::PointsTo::ref() and
exists(SsaSourceVariable var |
essa_var_scope(var, pred_scope, pred_var) and
scope_entry_def_scope(var, succ_scope, succ_def)

View File

@@ -2566,7 +2566,7 @@ module AttributePointsTo {
predicate variableAttributePointsTo(
EssaVariable var, Context context, string name, ObjectInternal value, CfgOrigin origin
) {
Stages::DataFlow::ref() and
Stages::PointsTo::ref() and
definitionAttributePointsTo(var.getDefinition(), context, name, value, origin)
or
exists(EssaVariable prev |

View File

@@ -85,6 +85,17 @@ predicate used_as_regex(Expr s, string mode) {
)
}
private import semmle.python.Concepts
private import semmle.python.RegexTreeView
/** Gets a parsed regular expression term that is executed at `exec`. */
RegExpTerm getTermForExecution(RegexExecution exec) {
exists(RegexTracking t, DataFlow::Node source | t.hasFlow(source, exec.getRegex()) |
result.getRegex() = source.asExpr() and
result.isRootTerm()
)
}
/**
* Gets the canonical name for the API graph node corresponding to the `re` flag `flag`. For flags
* that have multiple names, we pick the long-form name as a canonical representative.

View File

@@ -57,16 +57,43 @@ module CleartextLogging {
/** A piece of data printed, considered as a flow sink. */
class PrintedDataAsSink extends Sink {
PrintedDataAsSink() {
this = API::builtin("print").getACall().getArg(_)
or
// special handling of writing to `sys.stdout` and `sys.stderr`, which is
// essentially the same as printing
this =
API::moduleImport("sys")
.getMember(["stdout", "stderr"])
.getMember("write")
.getACall()
.getArg(0)
(
this = API::builtin("print").getACall().getArg(_)
or
// special handling of writing to `sys.stdout` and `sys.stderr`, which is
// essentially the same as printing
this =
API::moduleImport("sys")
.getMember(["stdout", "stderr"])
.getMember("write")
.getACall()
.getArg(0)
) and
// since some of the inner error handling implementation of the logging module is
// ```py
// sys.stderr.write('Message: %r\n'
// 'Arguments: %s\n' % (record.msg,
// record.args))
// ```
// any time we would report flow to such a logging sink, we can ALSO report
// the flow to the `record.msg`/`record.args` sinks -- obviously we
// don't want that.
//
// However, simply removing taint edges out of a sink is not a good enough solution,
// since we would only flag one of the `logging.info` calls in the following example
// due to use-use flow
// ```py
// logging.info(user_controlled)
// logging.info(user_controlled)
// ```
//
// The same approach is used in the command injection query.
not exists(Module loggingInit |
loggingInit.getName() = "logging.__init__" and
this.getScope().getEnclosingModule() = loggingInit and
// do allow this call if we're analyzing logging/__init__.py as part of CPython though
not exists(loggingInit.getFile().getRelativePath())
)
}
}
}

View File

@@ -50,7 +50,34 @@ module CleartextStorage {
/** The data written to a file, considered as a flow sink. */
class FileWriteDataAsSink extends Sink {
FileWriteDataAsSink() { this = any(FileSystemWriteAccess write).getADataNode() }
FileWriteDataAsSink() {
this = any(FileSystemWriteAccess write).getADataNode() and
// since implementation of Path.write_bytes in pathlib.py is like
// ```py
// def write_bytes(self, data):
// with self.open(mode='wb') as f:
// return f.write(data)
// ```
// any time we would report flow to the `Path.write_bytes` sink, we can ALSO report
// the flow from the `data` parameter to the `f.write` sink -- obviously we
// don't want that.
//
// However, simply removing taint edges out of a sink is not a good enough solution,
// since we would only flag one of the `p.write` calls in the following example
// due to use-use flow
// ```py
// p.write(user_controlled)
// p.write(user_controlled)
// ```
//
// The same approach is used in the command injection query.
not exists(Module pathlib |
pathlib.getName() = "pathlib" and
this.getScope().getEnclosingModule() = pathlib and
// do allow this call if we're analyzing pathlib.py as part of CPython though
not exists(pathlib.getFile().getRelativePath())
)
}
}
/** The data written to a cookie on a HTTP response, considered as a flow sink. */

View File

@@ -76,6 +76,9 @@ module CommandInjection {
// `subprocess`. See:
// https://github.com/python/cpython/blob/fa7ce080175f65d678a7d5756c94f82887fc9803/Lib/os.py#L974
// https://github.com/python/cpython/blob/fa7ce080175f65d678a7d5756c94f82887fc9803/Lib/subprocess.py#L341
//
// The same approach is used in the path-injection, cleartext-storage, and
// cleartext-logging queries.
not this.getScope().getEnclosingModule().getName() in [
"os", "subprocess", "platform", "popen2"
]

View File

@@ -58,7 +58,33 @@ module PathInjection {
* A file system access, considered as a flow sink.
*/
class FileSystemAccessAsSink extends Sink {
FileSystemAccessAsSink() { this = any(FileSystemAccess e).getAPathArgument() }
FileSystemAccessAsSink() {
this = any(FileSystemAccess e).getAPathArgument() and
// since implementation of Path.open in pathlib.py is like
// ```py
// def open(self, ...):
// return io.open(self, ...)
// ```
// any time we would report flow to the `path_obj.open` sink, we can ALSO report
// the flow from the `self` parameter to the `io.open` sink -- obviously we
// don't want that.
//
// However, simply removing taint edges out of a sink is not a good enough solution,
// since we would only flag one of the `p.open` calls in the following example
// due to use-use flow
// ```py
// p.open()
// p.open()
// ```
//
// The same approach is used in the command injection query.
not exists(Module pathlib |
pathlib.getName() = "pathlib" and
this.getScope().getEnclosingModule() = pathlib and
// do allow this call if we're analyzing pathlib.py as part of CPython though
not exists(pathlib.getFile().getRelativePath())
)
}
}
private import semmle.python.frameworks.data.ModelsAsData

View File

@@ -13,6 +13,7 @@ private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.dataflow.new.BarrierGuards
private import semmle.python.RegexTreeView::RegexTreeView as TreeView
private import semmle.python.ApiGraphs
private import semmle.python.regex
/**
* Provides default sources, sinks and sanitizers for detecting
@@ -66,7 +67,7 @@ module PolynomialReDoS {
RegexExecutionAsSink() {
exists(RegexExecution re |
re.getRegex().asExpr() = t.getRegex() and
t = getTermForExecution(re) and
this = re.getString()
) and
t.isRootTerm()

View File

@@ -41,7 +41,32 @@ module StackTraceExposure {
/**
* A source of exception info, considered as a flow source.
*/
class ExceptionInfoAsSource extends Source instanceof ExceptionInfo { }
class ExceptionInfoAsSource extends Source instanceof ExceptionInfo {
ExceptionInfoAsSource() {
// since `traceback.format_exc()` in Python 2 is internally implemented as
// ```py
// def format_exc(limit=None):
// """Like print_exc() but return a string."""
// try:
// etype, value, tb = sys.exc_info()
// return ''.join(format_exception(etype, value, tb, limit))
// finally:
// etype = value = tb = None
// ```
// any time we would report flow to such from a call to format_exc, we can ALSO report
// the flow from the `sys.exc_info()` source -- obviously we don't want that.
//
//
// To avoid this, we use the same approach as for sinks in the command injection
// query (and others).
not exists(Module traceback |
traceback.getName() = "traceback" and
this.getScope().getEnclosingModule() = traceback and
// do allow this call if we're analyzing traceback.py as part of CPython though
not exists(traceback.getFile().getRelativePath())
)
}
}
/**
* The body of a HTTP response that will be returned from a server, considered as a flow sink.

View File

@@ -111,11 +111,7 @@ class Builtin extends @py_cobject {
}
module Builtin {
Builtin builtinModule() {
py_special_objects(result, "builtin_module_2") and major_version() = 2
or
py_special_objects(result, "builtin_module_3") and major_version() = 3
}
Builtin builtinModule() { py_special_objects(result, "builtin_module") }
Builtin builtin(string name) { result = builtinModule().getMember(name) }

View File

@@ -5,7 +5,7 @@ private import semmle.python.internal.CachedStages
cached
private predicate is_an_object(@py_object obj) {
Stages::DataFlow::ref() and
Stages::PointsTo::ref() and
/* CFG nodes for numeric literals, all of which have a @py_cobject for the value of that literal */
obj instanceof ControlFlowNode and
not obj.(ControlFlowNode).getNode() instanceof IntegerLiteral and
@@ -78,7 +78,7 @@ class Object extends @py_object {
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
) {
Stages::DataFlow::ref() and
Stages::PointsTo::ref() and
this.hasOrigin() and
this.getOrigin()
.getLocation()
@@ -98,7 +98,7 @@ class Object extends @py_object {
/** Gets a textual representation of this element. */
cached
string toString() {
Stages::DataFlow::ref() and
Stages::PointsTo::ref() and
not this = undefinedVariable() and
not this = unknownValue() and
exists(ClassObject type | type.asBuiltin() = this.asBuiltin().getClass() |

View File

@@ -108,20 +108,6 @@ class XmlFile extends XmlParent, File {
/** Gets the name of this XML file. */
override string getName() { result = File.super.getAbsolutePath() }
/**
* DEPRECATED: Use `getAbsolutePath()` instead.
*
* Gets the path of this XML file.
*/
deprecated string getPath() { result = this.getAbsolutePath() }
/**
* DEPRECATED: Use `getParentContainer().getAbsolutePath()` instead.
*
* Gets the path of the folder that contains this XML file.
*/
deprecated string getFolder() { result = this.getParentContainer().getAbsolutePath() }
/** Gets the encoding of this XML file. */
string getEncoding() { xmlEncoding(this, result) }

View File

@@ -1,3 +1,11 @@
## 0.6.4
No user-facing changes.
## 0.6.3
No user-facing changes.
## 0.6.2
No user-facing changes.

View File

@@ -1,48 +1,36 @@
/**
* Definitions for reasoning about untrusted data used in APIs defined outside the
* database.
* user-written code.
*/
import python
private import python
import semmle.python.dataflow.new.DataFlow
import semmle.python.dataflow.new.TaintTracking
import semmle.python.Concepts
import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.dataflow.new.TaintTracking
private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.ApiGraphs
private import semmle.python.dataflow.new.internal.DataFlowPrivate as DataFlowPrivate
private import semmle.python.dataflow.new.internal.TaintTrackingPrivate as TaintTrackingPrivate
private import semmle.python.types.Builtins
private import semmle.python.objects.ObjectInternal
// IMPLEMENTATION NOTES:
//
// This query uses *both* the new data-flow library, and points-to. Why? To get this
// finished quickly, so it can provide value for our field team and ourselves.
//
// In the long run, it should not need to use points-to for anything. Possibly this can
// even be helpful in figuring out what we need from TypeTrackers and the new data-flow
// library to be fully operational.
//
// At least it will allow us to provide a baseline comparison against a solution that
// doesn't use points-to at all
//
// There is a few dirty things we do here:
// 1. DataFlowPrivate: since `DataFlowCall` and `DataFlowCallable` are not exposed
// publicly, but we really want access to them.
// 2. points-to: we kinda need to do this since this is what powers `DataFlowCall` and
// `DataFlowCallable`
// 3. ObjectInternal: to provide better names for built-in functions and methods. If we
// really wanted to polish our points-to implementation, we could move this
// functionality into `BuiltinFunctionValue` and `BuiltinMethodValue`, but will
// probably require some more work: for this query, it's totally ok to use
// `builtins.open` for the code `open(f)`, but well, it requires a bit of thinking to
// figure out if that is desirable in general. I simply skipped a corner here!
// 4. TaintTrackingPrivate: Nothing else gives us access to `defaultAdditionalTaintStep` :(
/**
* A callable that is considered a "safe" external API from a security perspective.
* An external API that is considered "safe" from a security perspective.
*/
class SafeExternalApi extends Unit {
/** Gets a callable that is considered a "safe" external API from a security perspective. */
abstract DataFlowPrivate::DataFlowCallable getSafeCallable();
/**
* Gets a call that is considered "safe" from a security perspective. You can use API
* graphs to find calls to functions you know are safe.
*
* Which works even when the external library isn't extracted.
*/
abstract DataFlow::CallCfgNode getSafeCall();
/**
* Gets a callable that is considered a "safe" external API from a security
* perspective.
*
* You probably want to define this as `none()` and use `getSafeCall` instead, since
* that can handle the external library not being extracted.
*/
DataFlowPrivate::DataFlowCallable getSafeCallable() { none() }
}
/** DEPRECATED: Alias for SafeExternalApi */
@@ -50,42 +38,127 @@ deprecated class SafeExternalAPI = SafeExternalApi;
/** The default set of "safe" external APIs. */
private class DefaultSafeExternalApi extends SafeExternalApi {
override DataFlowPrivate::DataFlowCallable getSafeCallable() {
exists(CallableValue cv | cv = result.getCallableValue() |
cv = Value::named(["len", "isinstance", "getattr", "hasattr"])
or
exists(ClassValue cls, string attr |
cls = Value::named("dict") and attr in ["__getitem__", "__setitem__"]
|
cls.lookup(attr) = cv
)
override DataFlow::CallCfgNode getSafeCall() {
result =
API::builtin([
"len", "enumerate", "isinstance", "getattr", "hasattr", "bool", "float", "int", "repr",
"str", "type"
]).getACall()
}
}
/**
* Gets a human readable representation of `node`.
*
* Note that this is only defined for API nodes that are allowed as external APIs,
* so `None.json.dumps` will for example not be allowed.
*/
string apiNodeToStringRepr(API::Node node) {
node = API::builtin(result)
or
node = API::moduleImport(result)
or
exists(API::Node base, string basename |
base.getDepth() < node.getDepth() and
basename = apiNodeToStringRepr(base) and
not base = API::builtin(["None", "True", "False"])
|
exists(string m | node = base.getMember(m) | result = basename + "." + m)
or
node = base.getReturn() and
result = basename + "()" and
not base.getACall() = any(SafeExternalApi safe).getSafeCall()
or
node = base.getAwaited() and
result = basename
)
}
predicate resolvedCall(CallNode call) {
DataFlowPrivate::resolveCall(call, _, _) or
DataFlowPrivate::resolveClassCall(call, _)
}
newtype TInterestingExternalApiCall =
TUnresolvedCall(DataFlow::CallCfgNode call) {
exists(call.getLocation().getFile().getRelativePath()) and
not resolvedCall(call.getNode()) and
not call = any(SafeExternalApi safe).getSafeCall()
} or
TResolvedCall(DataFlowPrivate::DataFlowCall call) {
exists(call.getLocation().getFile().getRelativePath()) and
exists(call.getCallable()) and
not call.getCallable() = any(SafeExternalApi safe).getSafeCallable() and
// ignore calls inside codebase, and ignore calls that are marked as safe. This is
// only needed as long as we extract dependencies. When we stop doing that, all
// targets of resolved calls will be from user-written code.
not exists(call.getCallable().getLocation().getFile().getRelativePath()) and
not exists(DataFlow::CallCfgNode callCfgNode | callCfgNode.getNode() = call.getNode() |
any(SafeExternalApi safe).getSafeCall() = callCfgNode
)
}
abstract class InterestingExternalApiCall extends TInterestingExternalApiCall {
/** Gets the argument at position `apos`, if any */
abstract DataFlow::Node getArgument(DataFlowPrivate::ArgumentPosition apos);
/** Gets a textual representation of this element. */
abstract string toString();
/**
* Gets a human-readable name for the external API.
*/
abstract string getApiName();
}
class UnresolvedCall extends InterestingExternalApiCall, TUnresolvedCall {
DataFlow::CallCfgNode call;
UnresolvedCall() { this = TUnresolvedCall(call) }
override DataFlow::Node getArgument(DataFlowPrivate::ArgumentPosition apos) {
exists(int i | apos.isPositional(i) | result = call.getArg(i))
or
exists(string name | apos.isKeyword(name) | result = call.getArgByName(name))
}
override string toString() {
result = "ExternalAPI:UnresolvedCall: " + call.getNode().getNode().toString()
}
override string getApiName() {
exists(API::Node apiNode |
result = apiNodeToStringRepr(apiNode) and
apiNode.getACall() = call
)
}
}
class ResolvedCall extends InterestingExternalApiCall, TResolvedCall {
DataFlowPrivate::DataFlowCall dfCall;
ResolvedCall() { this = TResolvedCall(dfCall) }
override DataFlow::Node getArgument(DataFlowPrivate::ArgumentPosition apos) {
result = dfCall.getArgument(apos)
}
override string toString() {
result = "ExternalAPI:ResolvedCall: " + dfCall.getNode().getNode().toString()
}
override string getApiName() {
exists(DataFlow::CallCfgNode call, API::Node apiNode | dfCall.getNode() = call.getNode() |
result = apiNodeToStringRepr(apiNode) and
apiNode.getACall() = call
)
}
}
/** A node representing data being passed to an external API through a call. */
class ExternalApiDataNode extends DataFlow::Node {
DataFlowPrivate::DataFlowCallable callable;
int i;
ExternalApiDataNode() {
exists(DataFlowPrivate::DataFlowCall call |
exists(call.getLocation().getFile().getRelativePath())
|
callable = call.getCallable() and
// TODO: this ignores some complexity of keyword arguments (especially keyword-only args)
this = call.getArg(i)
) and
not any(SafeExternalApi safe).getSafeCallable() = callable and
exists(Value cv | cv = callable.getCallableValue() |
cv.isAbsent()
or
cv.isBuiltin()
or
cv.(CallableValue).getScope().getLocation().getFile().inStdlib()
or
not exists(cv.(CallableValue).getScope().getLocation().getFile().getRelativePath())
) and
exists(InterestingExternalApiCall call | this = call.getArgument(_)) and
// Not already modeled as a taint step
not TaintTrackingPrivate::defaultAdditionalTaintStep(this, _) and
// for `list.append(x)`, we have a additional taint step from x -> [post] list.
@@ -95,12 +168,6 @@ class ExternalApiDataNode extends DataFlow::Node {
TaintTrackingPrivate::defaultAdditionalTaintStep(_, post)
)
}
/** Gets the index for the parameter that will receive this untrusted data */
int getIndex() { result = i }
/** Gets the callable to which this argument is passed. */
DataFlowPrivate::DataFlowCallable getCallable() { result = callable }
}
/** DEPRECATED: Alias for ExternalApiDataNode */
@@ -133,19 +200,26 @@ deprecated class UntrustedExternalAPIDataNode = UntrustedExternalApiDataNode;
/** An external API which is used with untrusted data. */
private newtype TExternalApi =
/** An untrusted API method `m` where untrusted data is passed at `index`. */
TExternalApiParameter(DataFlowPrivate::DataFlowCallable callable, int index) {
exists(UntrustedExternalApiDataNode n |
callable = n.getCallable() and
index = n.getIndex()
MkExternalApi(string repr, DataFlowPrivate::ArgumentPosition apos) {
exists(UntrustedExternalApiDataNode ex, InterestingExternalApiCall call |
ex = call.getArgument(apos) and
repr = call.getApiName()
)
}
/** An external API which is used with untrusted data. */
class ExternalApiUsedWithUntrustedData extends TExternalApi {
/** A argument of an external API which is used with untrusted data. */
class ExternalApiUsedWithUntrustedData extends MkExternalApi {
string repr;
DataFlowPrivate::ArgumentPosition apos;
ExternalApiUsedWithUntrustedData() { this = MkExternalApi(repr, apos) }
/** Gets a possibly untrusted use of this external API. */
UntrustedExternalApiDataNode getUntrustedDataNode() {
this = TExternalApiParameter(result.getCallable(), result.getIndex())
exists(InterestingExternalApiCall call |
result = call.getArgument(apos) and
call.getApiName() = repr
)
}
/** Gets the number of untrusted sources used with this external API. */
@@ -154,63 +228,8 @@ class ExternalApiUsedWithUntrustedData extends TExternalApi {
}
/** Gets a textual representation of this element. */
string toString() {
exists(
DataFlowPrivate::DataFlowCallable callable, int index, string callableString,
string indexString
|
this = TExternalApiParameter(callable, index) and
indexString = "param " + index and
exists(CallableValue cv | cv = callable.getCallableValue() |
callableString =
cv.getScope().getEnclosingModule().getName() + "." + cv.getScope().getQualifiedName()
or
not exists(cv.getScope()) and
(
cv instanceof BuiltinFunctionValue and
callableString = pretty_builtin_function_value(cv)
or
cv instanceof BuiltinMethodValue and
callableString = pretty_builtin_method_value(cv)
or
not cv instanceof BuiltinFunctionValue and
not cv instanceof BuiltinMethodValue and
callableString = cv.toString()
)
) and
result = callableString + " [" + indexString + "]"
)
}
string toString() { result = repr + " [" + apos + "]" }
}
/** DEPRECATED: Alias for ExternalApiUsedWithUntrustedData */
deprecated class ExternalAPIUsedWithUntrustedData = ExternalApiUsedWithUntrustedData;
/** Gets the fully qualified name for the `BuiltinFunctionValue` bfv. */
private string pretty_builtin_function_value(BuiltinFunctionValue bfv) {
exists(Builtin b | b = bfv.(BuiltinFunctionObjectInternal).getBuiltin() |
result = prefix_with_module_if_found(b)
)
}
/** Gets the fully qualified name for the `BuiltinMethodValue` bmv. */
private string pretty_builtin_method_value(BuiltinMethodValue bmv) {
exists(Builtin b | b = bmv.(BuiltinMethodObjectInternal).getBuiltin() |
exists(Builtin cls | cls.isClass() and cls.getMember(b.getName()) = b |
result = prefix_with_module_if_found(cls) + "." + b.getName()
)
or
not exists(Builtin cls | cls.isClass() and cls.getMember(b.getName()) = b) and
result = b.getName()
)
}
/** Helper predicate that tries to adds module qualifier to `b`. Will succeed even if module not found. */
private string prefix_with_module_if_found(Builtin b) {
exists(Builtin mod | mod.isModule() and mod.getMember(b.getName()) = b |
result = mod.getName() + "." + b.getName()
)
or
not exists(Builtin mod | mod.isModule() and mod.getMember(b.getName()) = b) and
result = b.getName()
}

View File

@@ -11,11 +11,9 @@ relevant for security analysis of this application.</p>
<p>An external API is defined as a call to a method that is not defined in the source
code, and is not modeled as a taint step in the default taint library. External APIs may
be from the Python standard library or dependencies. The query will report the fully qualified name,
along with <code>[param x]</code>, where <code>x</code> indicates the position of
the parameter receiving the untrusted data. Note that for methods and
<code>classmethod</code>s, parameter 0 represents the class instance or class itself
respectively.</p>
be from the Python standard library or dependencies. The query will report the fully
qualified name, along with <code>[position index]</code> or <code>[keyword name]</code>,
to indicate the argument passing the untrusted data.</p>
<p>Note that an excepted sink might not be included in the results, if it also defines a
taint step. This is the case for <code>pickle.loads</code> which is a sink for the
@@ -24,8 +22,6 @@ Unsafe Deserialization query, but is also a taint step for other queries.</p>
<p>Note: Compared to the Java version of this query, we currently do not give special
care to methods that are overridden in the source code.</p>
<p>Note: Currently this query will only report results for external packages that are extracted.</p>
</overview>
<recommendation>

View File

@@ -11,11 +11,9 @@ be modeled as either taint steps, or sinks for specific problems.</p>
<p>An external API is defined as a call to a method that is not defined in the source
code, and is not modeled as a taint step in the default taint library. External APIs may
be from the Python standard library or dependencies. The query will report the fully qualified name,
along with <code>[param x]</code>, where <code>x</code> indicates the position of
the parameter receiving the untrusted data. Note that for methods and
<code>classmethod</code>s, parameter 0 represents the class instance or class itself
respectively.</p>
be from the Python standard library or dependencies. The query will report the fully
qualified name, along with <code>[position index]</code> or <code>[keyword name]</code>,
to indicate the argument passing the untrusted data.</p>
<p>Note that an excepted sink might not be included in the results, if it also defines a
taint step. This is the case for <code>pickle.loads</code> which is a sink for the
@@ -24,8 +22,6 @@ Unsafe Deserialization query, but is also a taint step for other queries.</p>
<p>Note: Compared to the Java version of this query, we currently do not give special
care to methods that are overridden in the source code.</p>
<p>Note: Currently this query will only report results for external packages that are extracted.</p>
</overview>
<recommendation>

View File

@@ -1,36 +1,30 @@
import os.path
from flask import Flask, request, abort
app = Flask(__name__)
urlpatterns = [
# Route to user_picture
url(r'^user-pic1$', user_picture1, name='user-picture1'),
url(r'^user-pic2$', user_picture2, name='user-picture2'),
url(r'^user-pic3$', user_picture3, name='user-picture3')
]
def user_picture1(request):
"""A view that is vulnerable to malicious file access."""
filename = request.GET.get('p')
@app.route("/user_picture1")
def user_picture1():
filename = request.args.get('p')
# BAD: This could read any file on the file system
data = open(filename, 'rb').read()
return HttpResponse(data)
return data
def user_picture2(request):
"""A view that is vulnerable to malicious file access."""
@app.route("/user_picture2")
def user_picture2():
base_path = '/server/static/images'
filename = request.GET.get('p')
filename = request.args.get('p')
# BAD: This could still read any file on the file system
data = open(os.path.join(base_path, filename), 'rb').read()
return HttpResponse(data)
return data
def user_picture3(request):
"""A view that is not vulnerable to malicious file access."""
@app.route("/user_picture3")
def user_picture3():
base_path = '/server/static/images'
filename = request.GET.get('p')
filename = request.args.get('p')
#GOOD -- Verify with normalised version of path
fullpath = os.path.normpath(os.path.join(base_path, filename))
if not fullpath.startswith(base_path):
raise SecurityException()
raise Exception("not allowed")
data = open(fullpath, 'rb').read()
return HttpResponse(data)
return data

View File

@@ -1,7 +1,7 @@
import sys
import tarfile
with tarfile.open('archive.zip') as tar:
with tarfile.open(sys.argv[1]) as tar:
#BAD : This could write any file on the filesystem.
for entry in tar:
tar.extract(entry, "/tmp/unpack/")

View File

@@ -1,8 +1,8 @@
import sys
import tarfile
import os.path
with tarfile.open('archive.zip') as tar:
with tarfile.open(sys.argv[1]) as tar:
for entry in tar:
#GOOD: Check that entry is safe
if os.path.isabs(entry.name) or ".." in entry.name:

View File

@@ -20,48 +20,100 @@ import TlsLibraryModel
* Since we really want "the last unrestriction, not nullified by a restriction",
* we also disallow flow into restrictions.
*/
class InsecureContextConfiguration extends DataFlow::Configuration {
TlsLibrary library;
ProtocolVersion tracked_version;
module InsecureContextConfiguration2 implements DataFlow::StateConfigSig {
private newtype TFlowState =
TMkFlowState(TlsLibrary library, int bits) {
bits in [0 .. max(any(ProtocolVersion v).getBit()) * 2 - 1]
}
InsecureContextConfiguration() {
this = library + "Allows" + tracked_version and
tracked_version.isInsecure()
class FlowState extends TFlowState {
int getBits() { this = TMkFlowState(_, result) }
TlsLibrary getLibrary() { this = TMkFlowState(result, _) }
predicate allowsInsecureVersion(ProtocolVersion v) {
v.isInsecure() and this.getBits().bitAnd(v.getBit()) != 0
}
string toString() {
result =
"FlowState(" + this.getLibrary().toString() + ", " +
concat(ProtocolVersion v | this.allowsInsecureVersion(v) | v, ", ") + ")"
}
}
ProtocolVersion getTrackedVersion() { result = tracked_version }
override predicate isSource(DataFlow::Node source) { this.isUnrestriction(source) }
override predicate isSink(DataFlow::Node sink) {
sink = library.connection_creation().getContext()
}
override predicate isBarrierIn(DataFlow::Node node) {
this.isRestriction(node)
private predicate relevantState(FlowState state) {
isSource(_, state)
or
this.isUnrestriction(node)
}
private predicate isRestriction(DataFlow::Node node) {
exists(ProtocolRestriction r |
r = library.protocol_restriction() and
r.getRestriction() = tracked_version
|
node = r.getContext()
exists(FlowState state0 | relevantState(state0) |
exists(ProtocolRestriction r |
r = state0.getLibrary().protocol_restriction() and
state.getBits() = state0.getBits().bitAnd(sum(r.getRestriction().getBit()).bitNot()) and
state0.getLibrary() = state.getLibrary()
)
or
exists(ProtocolUnrestriction pu |
pu = state0.getLibrary().protocol_unrestriction() and
state.getBits() = state0.getBits().bitOr(sum(pu.getUnrestriction().getBit())) and
state0.getLibrary() = state.getLibrary()
)
)
}
private predicate isUnrestriction(DataFlow::Node node) {
exists(ProtocolUnrestriction pu |
pu = library.protocol_unrestriction() and
pu.getUnrestriction() = tracked_version
|
node = pu.getContext()
predicate isSource(DataFlow::Node source, FlowState state) {
exists(ProtocolFamily family |
source = state.getLibrary().unspecific_context_creation(family) and
state.getBits() = family.getBits()
)
}
predicate isSink(DataFlow::Node sink, FlowState state) {
sink = state.getLibrary().connection_creation().getContext() and
state.allowsInsecureVersion(_)
}
predicate isAdditionalFlowStep(
DataFlow::Node node1, FlowState state1, DataFlow::Node node2, FlowState state2
) {
DataFlow::localFlowStep(node1, node2) and
relevantState(state1) and
(
exists(ProtocolRestriction r |
r = state1.getLibrary().protocol_restriction() and
node2 = r.getContext() and
state2.getBits() = state1.getBits().bitAnd(sum(r.getRestriction().getBit()).bitNot()) and
state1.getLibrary() = state2.getLibrary()
)
or
exists(ProtocolUnrestriction pu |
pu = state1.getLibrary().protocol_unrestriction() and
node2 = pu.getContext() and
state2.getBits() = state1.getBits().bitOr(sum(pu.getUnrestriction().getBit())) and
state1.getLibrary() = state2.getLibrary()
)
)
}
predicate isBarrier(DataFlow::Node node, FlowState state) {
relevantState(state) and
(
exists(ProtocolRestriction r |
r = state.getLibrary().protocol_restriction() and
node = r.getContext() and
state.allowsInsecureVersion(r.getRestriction())
)
or
exists(ProtocolUnrestriction pu |
pu = state.getLibrary().protocol_unrestriction() and
node = pu.getContext() and
not state.allowsInsecureVersion(pu.getUnrestriction())
)
)
}
}
private module InsecureContextFlow = DataFlow::MakeWithState<InsecureContextConfiguration2>;
/**
* Holds if `conectionCreation` marks the creation of a connection based on the contex
* found at `contextOrigin` and allowing `insecure_version`.
@@ -74,8 +126,11 @@ predicate unsafe_connection_creation_with_context(
boolean specific
) {
// Connection created from a context allowing `insecure_version`.
exists(InsecureContextConfiguration c | c.hasFlow(contextOrigin, connectionCreation) |
insecure_version = c.getTrackedVersion() and
exists(InsecureContextFlow::PathNode src, InsecureContextFlow::PathNode sink |
InsecureContextFlow::hasFlowPath(src, sink) and
src.getNode() = contextOrigin and
sink.getNode() = connectionCreation and
sink.getState().allowsInsecureVersion(insecure_version) and
specific = false
)
or

View File

@@ -51,8 +51,9 @@ class SetOptionsCall extends ProtocolRestriction, DataFlow::CallCfgNode {
}
}
class UnspecificPyOpenSslContextCreation extends PyOpenSslContextCreation, UnspecificContextCreation {
UnspecificPyOpenSslContextCreation() { library instanceof PyOpenSsl }
class UnspecificPyOpenSslContextCreation extends PyOpenSslContextCreation, UnspecificContextCreation
{
// UnspecificPyOpenSslContextCreation() { library instanceof PyOpenSsl }
}
class PyOpenSsl extends TlsLibrary {

View File

@@ -162,23 +162,21 @@ class ContextSetVersion extends ProtocolRestriction, ProtocolUnrestriction, Data
}
class UnspecificSslContextCreation extends SslContextCreation, UnspecificContextCreation {
UnspecificSslContextCreation() { library instanceof Ssl }
override ProtocolVersion getUnrestriction() {
result = UnspecificContextCreation.super.getUnrestriction() and
// These are turned off by default since Python 3.6
// see https://docs.python.org/3.6/library/ssl.html#ssl.SSLContext
not result in ["SSLv2", "SSLv3"]
}
// UnspecificSslContextCreation() { library instanceof Ssl }
// override ProtocolVersion getUnrestriction() {
// result = UnspecificContextCreation.super.getUnrestriction() and
// // These are turned off by default since Python 3.6
// // see https://docs.python.org/3.6/library/ssl.html#ssl.SSLContext
// not result in ["SSLv2", "SSLv3"]
// }
}
class UnspecificSslDefaultContextCreation extends SslDefaultContextCreation, ProtocolUnrestriction {
override DataFlow::Node getContext() { result = this }
// see https://docs.python.org/3/library/ssl.html#ssl.create_default_context
override ProtocolVersion getUnrestriction() {
result in ["TLSv1", "TLSv1_1", "TLSv1_2", "TLSv1_3"]
}
class UnspecificSslDefaultContextCreation extends SslDefaultContextCreation {
// override DataFlow::Node getContext() { result = this }
// // see https://docs.python.org/3/library/ssl.html#ssl.create_default_context
// override ProtocolVersion getUnrestriction() {
// result in ["TLSv1", "TLSv1_1", "TLSv1_2", "TLSv1_3"]
// }
}
class Ssl extends TlsLibrary {

View File

@@ -15,18 +15,45 @@ class ProtocolVersion extends string {
or
this = "TLSv1" and version = ["TLSv1_1", "TLSv1_2", "TLSv1_3"]
or
this = ["TLSv1", "TLSv1_1"] and version = ["TLSv1_2", "TLSv1_3"]
this = "TLSv1_1" and version = ["TLSv1_2", "TLSv1_3"]
or
this = ["TLSv1", "TLSv1_1", "TLSv1_2"] and version = "TLSv1_3"
this = "TLSv1_2" and version = "TLSv1_3"
}
/** Holds if this protocol version is known to be insecure. */
predicate isInsecure() { this in ["SSLv2", "SSLv3", "TLSv1", "TLSv1_1"] }
/** Gets the bit mask for this protocol version. */
int getBit() {
this = "SSLv2" and result = 1
or
this = "SSLv3" and result = 2
or
this = "TLSv1" and result = 4
or
this = "TLSv1_1" and result = 8
or
this = "TLSv1_2" and result = 16
or
this = "TLSv1_3" and result = 32
}
/** Gets the protocol family for this protocol version. */
ProtocolFamily getFamily() {
result = "SSLv23" and this in ["SSLv2", "SSLv3"]
or
result = "TLS" and this in ["TLSv1", "TLSv1_1", "TLSv1_2", "TLSv1_3"]
}
}
/** An unspecific protocol version */
class ProtocolFamily extends string {
ProtocolFamily() { this in ["SSLv23", "TLS"] }
/** Gets the bit mask for this protocol family. */
int getBits() {
result = sum(ProtocolVersion version | version.getFamily() = this | version.getBit())
}
}
/** The creation of a context. */
@@ -63,21 +90,14 @@ abstract class ProtocolUnrestriction extends DataFlow::Node {
* A context is being created with a range of allowed protocols.
* This also serves as unrestricting these protocols.
*/
abstract class UnspecificContextCreation extends ContextCreation, ProtocolUnrestriction {
TlsLibrary library;
ProtocolFamily family;
UnspecificContextCreation() { this.getProtocol() = family }
override DataFlow::CfgNode getContext() { result = this }
override ProtocolVersion getUnrestriction() {
// There is only one family, the two names are aliases in OpenSSL.
// see https://github.com/openssl/openssl/blob/13888e797c5a3193e91d71e5f5a196a2d68d266f/include/openssl/ssl.h.in#L1953-L1955
family in ["SSLv23", "TLS"] and
// see https://docs.python.org/3/library/ssl.html#ssl-contexts
result in ["SSLv2", "SSLv3", "TLSv1", "TLSv1_1", "TLSv1_2", "TLSv1_3"]
}
abstract class UnspecificContextCreation extends ContextCreation {
// override ProtocolVersion getUnrestriction() {
// // There is only one family, the two names are aliases in OpenSSL.
// // see https://github.com/openssl/openssl/blob/13888e797c5a3193e91d71e5f5a196a2d68d266f/include/openssl/ssl.h.in#L1953-L1955
// family in ["SSLv23", "TLS"] and
// // see https://docs.python.org/3/library/ssl.html#ssl-contexts
// result in ["SSLv2", "SSLv3", "TLSv1", "TLSv1_1", "TLSv1_2", "TLSv1_3"]
// }
}
/** A model of a SSL/TLS library. */

View File

@@ -0,0 +1,3 @@
## 0.6.3
No user-facing changes.

View File

@@ -0,0 +1,3 @@
## 0.6.4
No user-facing changes.

View File

@@ -1,2 +1,2 @@
---
lastReleaseVersion: 0.6.2
lastReleaseVersion: 0.6.4

View File

@@ -0,0 +1,56 @@
<!DOCTYPE qhelp PUBLIC
"-//Semmle//qhelp//EN"
"qhelp.dtd">
<qhelp>
<overview>
<p>Extracting files from a malicious tarball without validating that the destination file path
is within the destination directory using <code>shutil.unpack_archive()</code> can cause files outside the
destination directory to be overwritten, due to the possible presence of directory traversal elements
(<code>..</code>) in archive path names.</p>
<p>Tarball contain archive entries representing each file in the archive. These entries
include a file path for the entry, but these file paths are not restricted and may contain
unexpected special elements such as the directory traversal element (<code>..</code>). If these
file paths are used to determine an output file to write the contents of the archive item to, then
the file may be written to an unexpected location. This can result in sensitive information being
revealed or deleted, or an attacker being able to influence behavior by modifying unexpected
files.</p>
<p>For example, if a tarball contains a file entry <code>../sneaky-file.txt</code>, and the tarball
is extracted to the directory <code>/tmp/tmp123</code>, then naively combining the paths would result
in an output file path of <code>/tmp/tmp123/../sneaky-file.txt</code>, which would cause the file to be
written to <code>/tmp/</code>.</p>
</overview>
<recommendation>
<p>Ensure that output paths constructed from tarball entries are validated
to prevent writing files to unexpected locations.</p>
<p>Consider using a safer module, such as: <code>zipfile</code></p>
</recommendation>
<example>
<p>
In this example an archive is extracted without validating file paths.
</p>
<sample src="examples/HIT_UnsafeUnpack.py" />
<p>To fix this vulnerability, we need to call the function <code>tarfile.extract()</code>
on each <code>member</code> after verifying that it does not contain either <code>..</code> or startswith <code>/</code>.
</p>
<sample src="examples/NoHIT_UnsafeUnpack.py" />
</example>
<references>
<li>
Shutil official documentation
<a href="https://docs.python.org/3/library/shutil.html?highlight=unpack_archive#shutil.unpack_archive">shutil.unpack_archive() warning.</a>
</li>
</references>
</qhelp>

View File

@@ -0,0 +1,24 @@
/**
* @name Arbitrary file write during a tarball extraction from a user controlled source
* @description Extracting files from a potentially malicious tarball using `shutil.unpack_archive()` without validating
* that the destination file path is within the destination directory can cause files outside
* the destination directory to be overwritten. More precisely, if the tarball comes from a user controlled
* location either a remote one or cli argument.
* @kind path-problem
* @id py/unsafe-unpacking
* @problem.severity error
* @security-severity 7.5
* @precision medium
* @tags security
* experimental
* external/cwe/cwe-022
*/
import python
import experimental.Security.UnsafeUnpackQuery
import DataFlow::PathGraph
from UnsafeUnpackingConfig config, DataFlow::PathNode source, DataFlow::PathNode sink
where config.hasFlowPath(source, sink)
select sink.getNode(), source, sink,
"Unsafe extraction from a malicious tarball retrieved from a remote location."

View File

@@ -0,0 +1,12 @@
import requests
import shutil
url = "https://www.someremote.location/tarball.tar.gz"
response = requests.get(url, stream=True)
tarpath = "/tmp/tmp456/tarball.tar.gz"
with open(tarpath, "wb") as f:
f.write(response.raw.read())
untarredpath = "/tmp/tmp123"
shutil.unpack_archive(tarpath, untarredpath)

View File

@@ -0,0 +1,17 @@
import requests
import tarfile
url = "https://www.someremote.location/tarball.tar.gz"
response = requests.get(url, stream=True)
tarpath = "/tmp/tmp456/tarball.tar.gz"
with open(tarpath, "wb") as f:
f.write(response.raw.read())
untarredpath = "/tmp/tmp123"
with tarfile.open(tarpath) as tar:
for member in tar.getmembers():
if member.name.startswith("/") or ".." in member.name:
raise Exception("Path traversal identified in tarball")
tar.extract(untarredpath, member)

View File

@@ -16,7 +16,8 @@ private import semmle.python.frameworks.Tornado
abstract class ClientSuppliedIpUsedInSecurityCheck extends DataFlow::Node { }
private class FlaskClientSuppliedIpUsedInSecurityCheck extends ClientSuppliedIpUsedInSecurityCheck,
DataFlow::MethodCallNode {
DataFlow::MethodCallNode
{
FlaskClientSuppliedIpUsedInSecurityCheck() {
this = Flask::request().getMember("headers").getMember(["get", "get_all", "getlist"]).getACall() and
this.getArg(0).asExpr().(StrConst).getText().toLowerCase() = clientIpParameterName()
@@ -24,7 +25,8 @@ private class FlaskClientSuppliedIpUsedInSecurityCheck extends ClientSuppliedIpU
}
private class DjangoClientSuppliedIpUsedInSecurityCheck extends ClientSuppliedIpUsedInSecurityCheck,
DataFlow::MethodCallNode {
DataFlow::MethodCallNode
{
DjangoClientSuppliedIpUsedInSecurityCheck() {
exists(DataFlow::Node req, DataFlow::AttrRead headers |
// a call to request.headers.get or request.META.get
@@ -38,7 +40,8 @@ private class DjangoClientSuppliedIpUsedInSecurityCheck extends ClientSuppliedIp
}
private class TornadoClientSuppliedIpUsedInSecurityCheck extends ClientSuppliedIpUsedInSecurityCheck,
DataFlow::MethodCallNode {
DataFlow::MethodCallNode
{
TornadoClientSuppliedIpUsedInSecurityCheck() {
// a call to self.request.headers.get or self.request.headers.get_list inside a tornado requesthandler
exists(

View File

@@ -0,0 +1,213 @@
/**
* Provides a taint-tracking configuration for detecting "UnsafeUnpacking" vulnerabilities.
*/
import python
import semmle.python.Concepts
import semmle.python.dataflow.new.internal.DataFlowPublic
import semmle.python.ApiGraphs
import semmle.python.dataflow.new.TaintTracking
import semmle.python.frameworks.Stdlib
import semmle.python.dataflow.new.RemoteFlowSources
/**
* Handle those three cases of Tarfile opens:
* - `tarfile.open()`
* - `tarfile.TarFile()`
* - `MKtarfile.Tarfile.open()`
*/
API::Node tarfileOpen() {
result in [
API::moduleImport("tarfile").getMember(["open", "TarFile"]),
API::moduleImport("tarfile").getMember("TarFile").getASubclass().getMember("open")
]
}
/**
* A class for handling the previous three cases, plus the use of `closing` in with the previous cases
*/
class AllTarfileOpens extends API::CallNode {
AllTarfileOpens() {
this = tarfileOpen().getACall()
or
exists(API::Node closing, Node arg |
closing = API::moduleImport("contextlib").getMember("closing") and
this = closing.getACall() and
arg = this.getArg(0) and
arg = tarfileOpen().getACall()
)
}
}
class UnsafeUnpackingConfig extends TaintTracking::Configuration {
UnsafeUnpackingConfig() { this = "UnsafeUnpackingConfig" }
override predicate isSource(DataFlow::Node source) {
// A source coming from a remote location
source instanceof RemoteFlowSource
or
// A source coming from a CLI argparse module
// see argparse: https://docs.python.org/3/library/argparse.html
exists(MethodCallNode args |
args = source.(AttrRead).getObject().getALocalSource() and
args =
API::moduleImport("argparse")
.getMember("ArgumentParser")
.getReturn()
.getMember("parse_args")
.getACall()
)
or
// A source catching an S3 file download
// see boto3: https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/s3.html#S3.Client.download_file
source =
API::moduleImport("boto3")
.getMember("client")
.getReturn()
.getMember(["download_file", "download_fileobj"])
.getACall()
.getArg(2)
or
// A source catching an S3 file download
// see boto3: https://boto3.amazonaws.com/v1/documentation/api/latest/reference/core/session.html
source =
API::moduleImport("boto3")
.getMember("Session")
.getReturn()
.getMember("client")
.getReturn()
.getMember(["download_file", "download_fileobj"])
.getACall()
.getArg(2)
or
// A source download a file using wget
// see wget: https://pypi.org/project/wget/
exists(API::CallNode mcn |
mcn = API::moduleImport("wget").getMember("download").getACall() and
if exists(mcn.getArg(1)) then source = mcn.getArg(1) else source = mcn.getReturn().asSource()
)
or
// catch the Django uploaded files as a source
// see HttpRequest.FILES: https://docs.djangoproject.com/en/4.1/ref/request-response/#django.http.HttpRequest.FILES
source.(AttrRead).getAttributeName() = "FILES"
}
override predicate isSink(DataFlow::Node sink) {
(
// A sink capturing method calls to `unpack_archive`.
sink = API::moduleImport("shutil").getMember("unpack_archive").getACall().getArg(0)
or
// A sink capturing method calls to `extractall` without `members` argument.
// For a call to `file.extractall` without `members` argument, `file` is considered a sink.
exists(MethodCallNode call, AllTarfileOpens atfo |
call = atfo.getReturn().getMember("extractall").getACall() and
not exists(call.getArgByName("members")) and
sink = call.getObject()
)
or
// A sink capturing method calls to `extractall` with `members` argument.
// For a call to `file.extractall` with `members` argument, `file` is considered a sink if not
// a the `members` argument contains a NameConstant as None, a List or call to the method `getmembers`.
// Otherwise, the argument of `members` is considered a sink.
exists(MethodCallNode call, Node arg, AllTarfileOpens atfo |
call = atfo.getReturn().getMember("extractall").getACall() and
arg = call.getArgByName("members") and
if
arg.asCfgNode() instanceof NameConstantNode or
arg.asCfgNode() instanceof ListNode
then sink = call.getObject()
else
if arg.(MethodCallNode).getMethodName() = "getmembers"
then sink = arg.(MethodCallNode).getObject()
else sink = call.getArgByName("members")
)
or
// An argument to `extract` is considered a sink.
exists(AllTarfileOpens atfo |
sink = atfo.getReturn().getMember("extract").getACall().getArg(0)
)
or
//An argument to `_extract_member` is considered a sink.
exists(MethodCallNode call, AllTarfileOpens atfo |
call = atfo.getReturn().getMember("_extract_member").getACall() and
call.getArg(1).(AttrRead).accesses(sink, "name")
)
) and
not sink.getScope().getLocation().getFile().inStdlib()
}
override predicate isAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
// Reading the response
nodeTo.(MethodCallNode).calls(nodeFrom, "read")
or
// Open a file for access
exists(MethodCallNode cn |
cn.calls(nodeTo, "open") and
cn.flowsTo(nodeFrom)
)
or
// Open a file for access using builtin
exists(API::CallNode cn |
cn = API::builtin("open").getACall() and
nodeTo = cn.getArg(0) and
cn.flowsTo(nodeFrom)
)
or
// Write access
exists(MethodCallNode cn |
cn.calls(nodeTo, "write") and
nodeFrom = cn.getArg(0)
)
or
// Retrieve Django uploaded files
// see getlist(): https://docs.djangoproject.com/en/4.1/ref/request-response/#django.http.QueryDict.getlist
// see chunks(): https://docs.djangoproject.com/en/4.1/ref/files/uploads/#django.core.files.uploadedfile.UploadedFile.chunks
nodeTo.(MethodCallNode).calls(nodeFrom, ["getlist", "get", "chunks"])
or
// Considering the use of "fs"
// see fs: https://docs.djangoproject.com/en/4.1/ref/files/storage/#the-filesystemstorage-class
nodeTo =
API::moduleImport("django")
.getMember("core")
.getMember("files")
.getMember("storage")
.getMember("FileSystemStorage")
.getReturn()
.getMember(["save", "path"])
.getACall() and
nodeFrom = nodeTo.(MethodCallNode).getArg(0)
or
// Accessing the name or raw content
nodeTo.(AttrRead).accesses(nodeFrom, ["name", "raw"])
or
// Join the base_dir to the filename
nodeTo = API::moduleImport("os").getMember("path").getMember("join").getACall() and
nodeFrom = nodeTo.(API::CallNode).getArg(1)
or
// Go through an Open for a Tarfile
nodeTo = tarfileOpen().getACall() and nodeFrom = nodeTo.(MethodCallNode).getArg(0)
or
// Handle the case where the getmembers is used.
nodeTo.(MethodCallNode).calls(nodeFrom, "getmembers") and
nodeFrom instanceof AllTarfileOpens
or
// To handle the case of `with closing(tarfile.open()) as file:`
// we add a step from the first argument of `closing` to the call to `closing`,
// whenever that first argument is a return of `tarfile.open()`.
nodeTo = API::moduleImport("contextlib").getMember("closing").getACall() and
nodeFrom = nodeTo.(API::CallNode).getArg(0) and
nodeFrom = tarfileOpen().getReturn().getAValueReachableFromSource()
or
// see Path : https://docs.python.org/3/library/pathlib.html#pathlib.Path
nodeTo = API::moduleImport("pathlib").getMember("Path").getACall() and
nodeFrom = nodeTo.(API::CallNode).getArg(0)
or
// Use of absolutepath
// see absolute : https://docs.python.org/3/library/pathlib.html#pathlib.Path.absolute
exists(API::CallNode mcn |
mcn = API::moduleImport("pathlib").getMember("Path").getACall() and
nodeTo = mcn.getAMethodCall("absolute") and
nodeFrom = mcn.getArg(0)
)
}
}

View File

@@ -59,12 +59,11 @@ module InsecureRandomness {
*/
class RandomFnSink extends Sink {
RandomFnSink() {
exists(DataFlowCallable randomFn |
randomFn
.getName()
exists(Function func |
func.getName()
.regexpMatch("(?i).*(gen(erate)?|make|mk|create).*(nonce|salt|pepper|Password).*")
|
this.getEnclosingCallable() = randomFn
this.asExpr().getScope() = func
)
}
}

View File

@@ -1,9 +1,9 @@
/**
* @name Call graph
* @description An edge in the points-to call graph.
* @description An edge in the call graph.
* @kind problem
* @problem.severity recommendation
* @id py/meta/points-to-call-graph
* @id py/meta/call-graph
* @tags meta
* @precision very-low
*/
@@ -12,9 +12,9 @@ import python
import semmle.python.dataflow.new.internal.DataFlowPrivate
import meta.MetaMetrics
from DataFlowCall c, DataFlowCallableValue f
from DataFlowCall call, DataFlowCallable target
where
c.getCallable() = f and
not c.getLocation().getFile() instanceof IgnoredFile and
not f.getScope().getLocation().getFile() instanceof IgnoredFile
select c, "Call to $@", f.getScope(), f.toString()
target = viableCallable(call) and
not call.getLocation().getFile() instanceof IgnoredFile and
not target.getScope().getLocation().getFile() instanceof IgnoredFile
select call, "Call to $@", target.getScope(), target.toString()

View File

@@ -1,16 +1,55 @@
/**
* Provides predicates for measuring the quality of the call graph, that is,
* the number of calls that could be resolved to a callee.
* the number of calls that could be resolved to a target.
*/
import python
import meta.MetaMetrics
newtype TTarget =
TFunction(Function func) or
TClass(Class cls)
class Target extends TTarget {
/** Gets a textual representation of this element. */
abstract string toString();
/** Gets the location of this dataflow call. */
abstract Location getLocation();
/** Whether this target is relevant. */
predicate isRelevant() { exists(this.getLocation().getFile().getRelativePath()) }
}
class TargetFunction extends Target, TFunction {
Function func;
TargetFunction() { this = TFunction(func) }
override string toString() { result = func.toString() }
override Location getLocation() { result = func.getLocation() }
Function getFunction() { result = func }
}
class TargetClass extends Target, TClass {
Class cls;
TargetClass() { this = TClass(cls) }
override string toString() { result = cls.toString() }
override Location getLocation() { result = cls.getLocation() }
Class getClass() { result = cls }
}
/**
* A call that is (possibly) relevant for analysis quality.
* See `IgnoredFile` for details on what is excluded.
*/
class RelevantCall extends Call {
class RelevantCall extends CallNode {
RelevantCall() { not this.getLocation().getFile() instanceof IgnoredFile }
}
@@ -18,12 +57,16 @@ class RelevantCall extends Call {
module PointsToBasedCallGraph {
/** A call that can be resolved by points-to. */
class ResolvableCall extends RelevantCall {
Value callee;
Value targetValue;
ResolvableCall() { callee.getACall() = this.getAFlowNode() }
ResolvableCall() { targetValue.getACall() = this }
/** Gets a resolved callee of this call. */
Value getCallee() { result = callee }
/** Gets a resolved target of this call. */
Target getTarget() {
result.(TargetFunction).getFunction() = targetValue.(CallableValue).getScope()
or
result.(TargetClass).getClass() = targetValue.(ClassValue).getScope()
}
}
/** A call that cannot be resolved by points-to. */
@@ -32,34 +75,79 @@ module PointsToBasedCallGraph {
}
/**
* A call that can be resolved by points-to, where the resolved callee is relevant.
* Relevant callees include:
* - builtins
* - standard library
* A call that can be resolved by points-to, where the resolved target is relevant.
* Relevant targets include:
* - source code of the project
*/
class ResolvableCallRelevantCallee extends ResolvableCall {
ResolvableCallRelevantCallee() {
callee.isBuiltin()
or
exists(File file |
file = callee.(CallableValue).getScope().getLocation().getFile()
or
file = callee.(ClassValue).getScope().getLocation().getFile()
|
file.inStdlib()
or
// part of the source code of the project
exists(file.getRelativePath())
class ResolvableCallRelevantTarget extends ResolvableCall {
ResolvableCallRelevantTarget() {
exists(Target target | target = this.getTarget() |
exists(target.getLocation().getFile().getRelativePath())
)
}
}
/**
* A call that can be resolved by points-to, where the resolved callee is not considered relevant.
* See `ResolvableCallRelevantCallee` for the definition of relevance.
* A call that can be resolved by points-to, where the resolved target is not considered relevant.
* See `ResolvableCallRelevantTarget` for the definition of relevance.
*/
class ResolvableCallIrrelevantCallee extends ResolvableCall {
ResolvableCallIrrelevantCallee() { not this instanceof ResolvableCallRelevantCallee }
class ResolvableCallIrrelevantTarget extends ResolvableCall {
ResolvableCallIrrelevantTarget() { not this instanceof ResolvableCallRelevantTarget }
}
}
/** Provides classes for call-graph resolution by using type-tracking. */
module TypeTrackingBasedCallGraph {
private import semmle.python.dataflow.new.internal.DataFlowDispatch as TT
/** A call that can be resolved by type-tracking. */
class ResolvableCall extends RelevantCall {
ResolvableCall() {
exists(TT::TNormalCall(this, _, _))
or
TT::resolveClassCall(this, _)
}
/** Gets a resolved target of this call. */
Target getTarget() {
exists(TT::DataFlowCall call, TT::CallType ct, Function targetFunc |
call = TT::TNormalCall(this, targetFunc, ct) and
not ct instanceof TT::CallTypeClass and
targetFunc = result.(TargetFunction).getFunction()
)
or
// a TT::TNormalCall only exists when the call can be resolved to a function.
// Since points-to just says the call goes directly to the class itself, and
// type-tracking based wants to resolve this to the constructor, which might not
// exist. So to do a proper comparison, we don't require the call to be resolve to
// a specific function.
TT::resolveClassCall(this, result.(TargetClass).getClass())
}
}
/** A call that cannot be resolved by type-tracking. */
class UnresolvableCall extends RelevantCall {
UnresolvableCall() { not this instanceof ResolvableCall }
}
/**
* A call that can be resolved by type-tracking, where the resolved callee is relevant.
* Relevant targets include:
* - source code of the project
*/
class ResolvableCallRelevantTarget extends ResolvableCall {
ResolvableCallRelevantTarget() {
exists(Target target | target = this.getTarget() |
exists(target.getLocation().getFile().getRelativePath())
)
}
}
/**
* A call that can be resolved by type-tracking, where the resolved target is not considered relevant.
* See `ResolvableCallRelevantTarget` for the definition of relevance.
*/
class ResolvableCallIrrelevantTarget extends ResolvableCall {
ResolvableCallIrrelevantTarget() { not this instanceof ResolvableCallRelevantTarget }
}
}

View File

@@ -11,4 +11,4 @@
import python
import CallGraphQuality
select projectRoot(), count(PointsToBasedCallGraph::ResolvableCallRelevantCallee call)
select projectRoot(), count(PointsToBasedCallGraph::ResolvableCallRelevantTarget call)

View File

@@ -0,0 +1,17 @@
/**
* @name New call graph edge from using type-tracking instead of points-to
* @kind problem
* @problem.severity recommendation
* @id py/meta/type-tracking-call-graph
* @tags meta
* @precision very-low
*/
import python
import CallGraphQuality
from CallNode call, Target target
where
target.isRelevant() and
call.(TypeTrackingBasedCallGraph::ResolvableCall).getTarget() = target
select call, "$@ to $@", call, "Call", target, target.toString()

View File

@@ -0,0 +1,18 @@
/**
* @name Missing call graph edge from using type-tracking instead of points-to
* @kind problem
* @problem.severity recommendation
* @id py/meta/call-graph-missing
* @tags meta
* @precision very-low
*/
import python
import CallGraphQuality
from CallNode call, Target target
where
target.isRelevant() and
call.(PointsToBasedCallGraph::ResolvableCall).getTarget() = target and
not call.(TypeTrackingBasedCallGraph::ResolvableCall).getTarget() = target
select call, "MISSING: $@ to $@", call, "Call", target, target.toString()

View File

@@ -0,0 +1,18 @@
/**
* @name New call graph edge from using type-tracking instead of points-to
* @kind problem
* @problem.severity recommendation
* @id py/meta/call-graph-new
* @tags meta
* @precision very-low
*/
import python
import CallGraphQuality
from CallNode call, Target target
where
target.isRelevant() and
not call.(PointsToBasedCallGraph::ResolvableCall).getTarget() = target and
call.(TypeTrackingBasedCallGraph::ResolvableCall).getTarget() = target
select call, "NEW: $@ to $@", call, "Call", target, target.toString()

View File

@@ -0,0 +1,19 @@
/**
* @name New call graph edge from using type-tracking instead of points-to, that is ambiguous
* @kind problem
* @problem.severity recommendation
* @id py/meta/call-graph-new-ambiguous
* @tags meta
* @precision very-low
*/
import python
import CallGraphQuality
from CallNode call, Target target
where
target.isRelevant() and
not call.(PointsToBasedCallGraph::ResolvableCall).getTarget() = target and
call.(TypeTrackingBasedCallGraph::ResolvableCall).getTarget() = target and
1 < count(call.(TypeTrackingBasedCallGraph::ResolvableCall).getTarget())
select call, "NEW: $@ to $@", call, "Call", target, target.toString()

View File

@@ -0,0 +1,35 @@
/**
* @name Call graph edge overview from using type-tracking instead of points-to
* @id py/meta/call-graph-overview
* @precision very-low
*/
import python
import CallGraphQuality
from string tag, int c
where
tag = "SHARED" and
c =
count(CallNode call, Target target |
target.isRelevant() and
call.(PointsToBasedCallGraph::ResolvableCall).getTarget() = target and
call.(TypeTrackingBasedCallGraph::ResolvableCall).getTarget() = target
)
or
tag = "NEW" and
c =
count(CallNode call, Target target |
target.isRelevant() and
not call.(PointsToBasedCallGraph::ResolvableCall).getTarget() = target and
call.(TypeTrackingBasedCallGraph::ResolvableCall).getTarget() = target
)
or
tag = "MISSING" and
c =
count(CallNode call, Target target |
target.isRelevant() and
call.(PointsToBasedCallGraph::ResolvableCall).getTarget() = target and
not call.(TypeTrackingBasedCallGraph::ResolvableCall).getTarget() = target
)
select tag, c

View File

@@ -0,0 +1,18 @@
/**
* @name Shared call graph edge from using type-tracking instead of points-to
* @kind problem
* @problem.severity recommendation
* @id py/meta/call-graph-shared
* @tags meta
* @precision very-low
*/
import python
import CallGraphQuality
from CallNode call, Target target
where
target.isRelevant() and
call.(PointsToBasedCallGraph::ResolvableCall).getTarget() = target and
call.(TypeTrackingBasedCallGraph::ResolvableCall).getTarget() = target
select call, "SHARED: $@ to $@", call, "Call", target, target.toString()

View File

@@ -1,5 +1,5 @@
name: codeql/python-queries
version: 0.6.3-dev
version: 0.6.5-dev
groups:
- python
- queries

View File

@@ -6,4 +6,4 @@
| six.moves.urllib | Package six.moves.urllib |
| six.moves.urllib.parse | Module six.moves.urllib_parse |
| six.moves.urllib.parse.urlsplit | Function urlsplit |
| six.moves.zip | builtin-class itertools.izip |
| six.moves.zip | Builtin-function zip |

View File

@@ -1 +1 @@
| bad_encoding.py:11:19:11:19 | Encoding Error | 'utf8' codec can't decode byte 0x82 in position 82: invalid start byte |
| bad_encoding.py:11:19:11:19 | Encoding Error | 'utf-8' codec can't decode byte 0x82 in position 82: invalid start byte |

View File

@@ -0,0 +1,47 @@
import python
import semmle.python.dataflow.new.DataFlow::DataFlow
import semmle.python.dataflow.new.internal.DataFlowPrivate
import semmle.python.dataflow.new.internal.DataFlowImplConsistency::Consistency
// TODO: this should be promoted to be a REAL consistency query by being placed in
// `python/ql/consistency-queries`. For for now it resides here.
private class MyConsistencyConfiguration extends ConsistencyConfiguration {
override predicate argHasPostUpdateExclude(ArgumentNode n) {
exists(ArgumentPosition apos | n.argumentOf(_, apos) and apos.isStarArgs(_))
or
exists(ArgumentPosition apos | n.argumentOf(_, apos) and apos.isDictSplat())
}
override predicate reverseReadExclude(Node n) {
// since `self`/`cls` parameters can be marked as implicit argument to `super()`,
// they will have PostUpdateNodes. We have a read-step from the synthetic `**kwargs`
// parameter, but dataflow-consistency queries should _not_ complain about there not
// being a post-update node for the synthetic `**kwargs` parameter.
n instanceof SynthDictSplatParameterNode
}
override predicate uniqueParameterNodeAtPositionExclude(
DataFlowCallable c, ParameterPosition pos, Node p
) {
// TODO: This can be removed once we solve the overlap of dictionary splat parameters
c.getParameter(pos) = p and
pos.isDictSplat() and
not exists(p.getLocation().getFile().getRelativePath())
}
override predicate uniqueParameterNodePositionExclude(
DataFlowCallable c, ParameterPosition pos, Node p
) {
// For normal parameters that can both be passed as positional arguments or keyword
// arguments, we currently have parameter positions for both cases..
//
// TODO: Figure out how bad breaking this consistency check is
exists(Function func, Parameter param |
c.getScope() = func and
p = parameterNode(param) and
c.getParameter(pos) = p and
param = func.getArg(_) and
param = func.getArgByName(_)
)
}
}

View File

@@ -26,29 +26,30 @@ abstract class RoutingTest extends InlineExpectationsTest {
element = fromNode.toString() and
(
tag = this.flowTag() and
if "\"" + tag + "\"" = this.fromValue(fromNode)
then value = ""
else value = this.fromValue(fromNode)
if "\"" + tag + "\"" = fromValue(fromNode) then value = "" else value = fromValue(fromNode)
or
// only have result for `func` tag if the function where `arg<n>` is used, is
// different from the function name of the call where `arg<n>` was specified as
// an argument
tag = "func" and
value = this.toFunc(toNode) and
not value = this.fromFunc(fromNode)
value = toFunc(toNode) and
not value = fromFunc(fromNode)
)
)
}
pragma[inline]
private string fromValue(DataFlow::Node fromNode) {
result = "\"" + prettyNode(fromNode).replaceAll("\"", "'") + "\""
}
pragma[inline]
private string fromFunc(DataFlow::ArgumentNode fromNode) {
result = fromNode.getCall().getNode().(CallNode).getFunction().getNode().(Name).getId()
}
pragma[inline]
private string toFunc(DataFlow::Node toNode) {
result = toNode.getEnclosingCallable().getCallableValue().getScope().getQualifiedName() // TODO: More robust pretty printing?
}
}
pragma[inline]
private string fromValue(DataFlow::Node fromNode) {
result = "\"" + prettyNode(fromNode).replaceAll("\"", "'") + "\""
}
pragma[inline]
private string fromFunc(DataFlow::ArgumentNode fromNode) {
result = fromNode.getCall().getNode().(CallNode).getFunction().getNode().(Name).getId()
}
pragma[inline]
private string toFunc(DataFlow::Node toNode) {
result = toNode.getEnclosingCallable().getQualifiedName()
}

Some files were not shown because too many files have changed in this diff Show More