Merge branch 'main' into maikypedia/python-unsafe-deserialization

This commit is contained in:
Rasmus Wriedt Larsen
2023-09-25 10:29:18 +02:00
3220 changed files with 230691 additions and 195299 deletions

View File

@@ -1,3 +1,36 @@
## 0.10.3
### Minor Analysis Improvements
* Support analyzing packages (folders with python code) that do not have `__init__.py` files, although this is technically required, we see real world projects that don't have this.
* Added modeling of AWS Lambda handlers that can be identified with `AWS::Serverless::Function` in YAML files, where the event parameter is modeled as a remote-flow-source.
* Improvements of the `aiohttp` models including remote-flow-sources from type annotations, new path manipulation, and SSRF sinks.
### Bug Fixes
* Fixed the computation of locations for imports with aliases in jump-to-definition.
## 0.10.2
No user-facing changes.
## 0.10.1
### New Features
* The `DataFlow::StateConfigSig` signature module has gained default implementations for `isBarrier/2` and `isAdditionalFlowStep/4`.
Hence it is no longer needed to provide `none()` implementations of these predicates if they are not needed.
### Minor Analysis Improvements
* Data flow configurations can now include a predicate `neverSkip(Node node)`
in order to ensure inclusion of certain nodes in the path explanations. The
predicate defaults to the end-points of the additional flow steps provided in
the configuration, which means that such steps now always are visible by
default in path explanations.
* Add support for Models as Data for Reflected XSS query
* Parameters with a default value are now considered a `DefinitionNode`. This improvement was motivated by allowing type-tracking and API graphs to follow flow from such a default value to a use by a captured variable.
## 0.10.0
### New Features

View File

@@ -492,9 +492,14 @@ class NiceLocationExpr extends Expr {
// for `import xxx` or for `import xxx as yyy`.
this.(ImportExpr).getLocation().hasLocationInfo(f, bl, bc, el, ec)
or
/* Show y for `y` in `from xxx import y` */
exists(string name |
name = this.(ImportMember).getName() and
// Show y for `y` in `from xxx import y`
// and y for `yyy as y` in `from xxx import yyy as y`.
exists(string name, Alias alias |
// This alias will always exist, as `from xxx import y`
// is expanded to `from xxx imprt y as y`.
this = alias.getValue() and
name = alias.getAsname().(Name).getId()
|
this.(ImportMember).getLocation().hasLocationInfo(f, _, _, el, ec) and
bl = el and
bc = ec - name.length() + 1

View File

@@ -1,4 +0,0 @@
---
category: minorAnalysis
---
* Parameters with a default value are now considered a `DefinitionNode`. This improvement was motivated by allowing type-tracking and API graphs to follow flow from such a default value to a use by a captured variable.

View File

@@ -1,6 +0,0 @@
---
category: feature
---
* The `DataFlow::StateConfigSig` signature module has gained default implementations for `isBarrier/2` and `isAdditionalFlowStep/4`.
Hence it is no longer needed to provide `none()` implementations of these predicates if they are not needed.

View File

@@ -1,4 +0,0 @@
---
category: minorAnalysis
---
* Add support for Models as Data for Reflected XSS query

View File

@@ -0,0 +1,4 @@
---
category: minorAnalysis
---
* Regular expressions containing multiple parse mode flags are now interpretted correctly. For example `"(?is)abc.*"` with both the `i` and `s` flags.

View File

@@ -0,0 +1,4 @@
---
category: minorAnalysis
---
* Added `shlex.quote` as a sanitizer for the `py/shell-command-constructed-from-input` query.

View File

@@ -0,0 +1,4 @@
---
category: fix
---
* Subterms of regular expressions encoded as single-line string literals now have better source-location information.

View File

@@ -0,0 +1,16 @@
## 0.10.1
### New Features
* The `DataFlow::StateConfigSig` signature module has gained default implementations for `isBarrier/2` and `isAdditionalFlowStep/4`.
Hence it is no longer needed to provide `none()` implementations of these predicates if they are not needed.
### Minor Analysis Improvements
* Data flow configurations can now include a predicate `neverSkip(Node node)`
in order to ensure inclusion of certain nodes in the path explanations. The
predicate defaults to the end-points of the additional flow steps provided in
the configuration, which means that such steps now always are visible by
default in path explanations.
* Add support for Models as Data for Reflected XSS query
* Parameters with a default value are now considered a `DefinitionNode`. This improvement was motivated by allowing type-tracking and API graphs to follow flow from such a default value to a use by a captured variable.

View File

@@ -0,0 +1,3 @@
## 0.10.2
No user-facing changes.

View File

@@ -0,0 +1,11 @@
## 0.10.3
### Minor Analysis Improvements
* Support analyzing packages (folders with python code) that do not have `__init__.py` files, although this is technically required, we see real world projects that don't have this.
* Added modeling of AWS Lambda handlers that can be identified with `AWS::Serverless::Function` in YAML files, where the event parameter is modeled as a remote-flow-source.
* Improvements of the `aiohttp` models including remote-flow-sources from type annotations, new path manipulation, and SSRF sinks.
### Bug Fixes
* Fixed the computation of locations for imports with aliases in jump-to-definition.

View File

@@ -1,2 +1,2 @@
---
lastReleaseVersion: 0.10.0
lastReleaseVersion: 0.10.3

View File

@@ -1,16 +1,17 @@
name: codeql/python-all
version: 0.10.1-dev
version: 0.10.4-dev
groups: python
dbscheme: semmlecode.python.dbscheme
extractor: python
library: true
upgrades: upgrades
dependencies:
codeql/dataflow: ${workspace}
codeql/mad: ${workspace}
codeql/regex: ${workspace}
codeql/tutorial: ${workspace}
codeql/util: ${workspace}
codeql/yaml: ${workspace}
dataExtensions:
- semmle/python/frameworks/**/model.yml
- semmle/python/frameworks/**/*.model.yml
warnOnImplicitThis: true

View File

@@ -52,6 +52,7 @@ private import semmle.python.frameworks.Requests
private import semmle.python.frameworks.RestFramework
private import semmle.python.frameworks.Rsa
private import semmle.python.frameworks.RuamelYaml
private import semmle.python.frameworks.ServerLess
private import semmle.python.frameworks.Setuptools
private import semmle.python.frameworks.Simplejson
private import semmle.python.frameworks.SqlAlchemy

View File

@@ -195,7 +195,22 @@ private predicate isPotentialPackage(Folder f) {
}
private string moduleNameFromBase(Container file) {
isPotentialPackage(file) and result = file.getBaseName()
// We used to also require `isPotentialPackage(f)` to hold in this case,
// but we saw modules not getting resolved because their folder did not
// contain an `__init__.py` file.
//
// This makes the folder not be a package but a namespace package instead.
// In most cases this is a mistake :| See following links for more details
// - https://dev.to/methane/don-t-omit-init-py-3hga
// - https://packaging.python.org/en/latest/guides/packaging-namespace-packages/
// - https://discuss.python.org/t/init-py-pep-420-and-iter-modules-confusion/9642
//
// It is possible that we can keep the original requirement on
// `isPotentialPackage(f)` here, but relax `isPotentialPackage` itself to allow
// for this behavior of missing `__init__.py` files. However, doing so involves
// cascading changes (for example to `moduleNameFromFile`), and was a more involved
// task than we wanted to take on.
result = file.getBaseName()
or
file instanceof File and result = file.getStem()
}

View File

@@ -22,6 +22,8 @@ private import python
* global (inter-procedural) data flow analyses.
*/
module DataFlow {
import internal.DataFlow
private import internal.DataFlowImplSpecific
private import codeql.dataflow.DataFlow
import DataFlowMake<PythonDataFlow>
import internal.DataFlowImpl1
}

View File

@@ -15,6 +15,10 @@ private import python
* global (inter-procedural) taint-tracking analyses.
*/
module TaintTracking {
import internal.tainttracking1.TaintTracking
import semmle.python.dataflow.new.internal.tainttracking1.TaintTrackingParameter::Public
private import semmle.python.dataflow.new.internal.DataFlowImplSpecific
private import semmle.python.dataflow.new.internal.TaintTrackingImplSpecific
private import codeql.dataflow.TaintTracking
import TaintFlowMake<PythonDataFlow, PythonTaintTracking>
import internal.tainttracking1.TaintTrackingImpl
}

View File

@@ -1,414 +0,0 @@
/**
* Provides an implementation of global (interprocedural) data flow. This file
* re-exports the local (intraprocedural) data flow analysis from
* `DataFlowImplSpecific::Public` and adds a global analysis, mainly exposed
* through the `Global` and `GlobalWithState` modules.
*/
private import DataFlowImplCommon
private import DataFlowImplSpecific::Private
import DataFlowImplSpecific::Public
import DataFlowImplCommonPublic
private import DataFlowImpl
/** An input configuration for data flow. */
signature module ConfigSig {
/**
* Holds if `source` is a relevant data flow source.
*/
predicate isSource(Node source);
/**
* Holds if `sink` is a relevant data flow sink.
*/
predicate isSink(Node sink);
/**
* Holds if data flow through `node` is prohibited. This completely removes
* `node` from the data flow graph.
*/
default predicate isBarrier(Node node) { none() }
/** Holds if data flow into `node` is prohibited. */
default predicate isBarrierIn(Node node) { none() }
/** Holds if data flow out of `node` is prohibited. */
default predicate isBarrierOut(Node node) { none() }
/**
* Holds if data may flow from `node1` to `node2` in addition to the normal data-flow steps.
*/
default predicate isAdditionalFlowStep(Node node1, Node node2) { none() }
/**
* Holds if an arbitrary number of implicit read steps of content `c` may be
* taken at `node`.
*/
default predicate allowImplicitRead(Node node, ContentSet c) { none() }
/**
* Gets the virtual dispatch branching limit when calculating field flow.
* This can be overridden to a smaller value to improve performance (a
* value of 0 disables field flow), or a larger value to get more results.
*/
default int fieldFlowBranchLimit() { result = 2 }
/**
* Gets a data flow configuration feature to add restrictions to the set of
* valid flow paths.
*
* - `FeatureHasSourceCallContext`:
* Assume that sources have some existing call context to disallow
* conflicting return-flow directly following the source.
* - `FeatureHasSinkCallContext`:
* Assume that sinks have some existing call context to disallow
* conflicting argument-to-parameter flow directly preceding the sink.
* - `FeatureEqualSourceSinkCallContext`:
* Implies both of the above and additionally ensures that the entire flow
* path preserves the call context.
*
* These features are generally not relevant for typical end-to-end data flow
* queries, but should only be used for constructing paths that need to
* somehow be pluggable in another path context.
*/
default FlowFeature getAFeature() { none() }
/** Holds if sources should be grouped in the result of `flowPath`. */
default predicate sourceGrouping(Node source, string sourceGroup) { none() }
/** Holds if sinks should be grouped in the result of `flowPath`. */
default predicate sinkGrouping(Node sink, string sinkGroup) { none() }
/**
* Holds if hidden nodes should be included in the data flow graph.
*
* This feature should only be used for debugging or when the data flow graph
* is not visualized (as it is in a `path-problem` query).
*/
default predicate includeHiddenNodes() { none() }
}
/** An input configuration for data flow using flow state. */
signature module StateConfigSig {
bindingset[this]
class FlowState;
/**
* Holds if `source` is a relevant data flow source with the given initial
* `state`.
*/
predicate isSource(Node source, FlowState state);
/**
* Holds if `sink` is a relevant data flow sink accepting `state`.
*/
predicate isSink(Node sink, FlowState state);
/**
* Holds if data flow through `node` is prohibited. This completely removes
* `node` from the data flow graph.
*/
default predicate isBarrier(Node node) { none() }
/**
* Holds if data flow through `node` is prohibited when the flow state is
* `state`.
*/
default predicate isBarrier(Node node, FlowState state) { none() }
/** Holds if data flow into `node` is prohibited. */
default predicate isBarrierIn(Node node) { none() }
/** Holds if data flow out of `node` is prohibited. */
default predicate isBarrierOut(Node node) { none() }
/**
* Holds if data may flow from `node1` to `node2` in addition to the normal data-flow steps.
*/
default predicate isAdditionalFlowStep(Node node1, Node node2) { none() }
/**
* Holds if data may flow from `node1` to `node2` in addition to the normal data-flow steps.
* This step is only applicable in `state1` and updates the flow state to `state2`.
*/
default predicate isAdditionalFlowStep(Node node1, FlowState state1, Node node2, FlowState state2) {
none()
}
/**
* Holds if an arbitrary number of implicit read steps of content `c` may be
* taken at `node`.
*/
default predicate allowImplicitRead(Node node, ContentSet c) { none() }
/**
* Gets the virtual dispatch branching limit when calculating field flow.
* This can be overridden to a smaller value to improve performance (a
* value of 0 disables field flow), or a larger value to get more results.
*/
default int fieldFlowBranchLimit() { result = 2 }
/**
* Gets a data flow configuration feature to add restrictions to the set of
* valid flow paths.
*
* - `FeatureHasSourceCallContext`:
* Assume that sources have some existing call context to disallow
* conflicting return-flow directly following the source.
* - `FeatureHasSinkCallContext`:
* Assume that sinks have some existing call context to disallow
* conflicting argument-to-parameter flow directly preceding the sink.
* - `FeatureEqualSourceSinkCallContext`:
* Implies both of the above and additionally ensures that the entire flow
* path preserves the call context.
*
* These features are generally not relevant for typical end-to-end data flow
* queries, but should only be used for constructing paths that need to
* somehow be pluggable in another path context.
*/
default FlowFeature getAFeature() { none() }
/** Holds if sources should be grouped in the result of `flowPath`. */
default predicate sourceGrouping(Node source, string sourceGroup) { none() }
/** Holds if sinks should be grouped in the result of `flowPath`. */
default predicate sinkGrouping(Node sink, string sinkGroup) { none() }
/**
* Holds if hidden nodes should be included in the data flow graph.
*
* This feature should only be used for debugging or when the data flow graph
* is not visualized (as it is in a `path-problem` query).
*/
default predicate includeHiddenNodes() { none() }
}
/**
* Gets the exploration limit for `partialFlow` and `partialFlowRev`
* measured in approximate number of interprocedural steps.
*/
signature int explorationLimitSig();
/**
* The output of a global data flow computation.
*/
signature module GlobalFlowSig {
/**
* A `Node` augmented with a call context (except for sinks) and an access path.
* Only those `PathNode`s that are reachable from a source, and which can reach a sink, are generated.
*/
class PathNode;
/**
* Holds if data can flow from `source` to `sink`.
*
* The corresponding paths are generated from the end-points and the graph
* included in the module `PathGraph`.
*/
predicate flowPath(PathNode source, PathNode sink);
/**
* Holds if data can flow from `source` to `sink`.
*/
predicate flow(Node source, Node sink);
/**
* Holds if data can flow from some source to `sink`.
*/
predicate flowTo(Node sink);
/**
* Holds if data can flow from some source to `sink`.
*/
predicate flowToExpr(DataFlowExpr sink);
}
/**
* Constructs a global data flow computation.
*/
module Global<ConfigSig Config> implements GlobalFlowSig {
private module C implements FullStateConfigSig {
import DefaultState<Config>
import Config
}
import Impl<C>
}
/** DEPRECATED: Use `Global` instead. */
deprecated module Make<ConfigSig Config> implements GlobalFlowSig {
import Global<Config>
}
/**
* Constructs a global data flow computation using flow state.
*/
module GlobalWithState<StateConfigSig Config> implements GlobalFlowSig {
private module C implements FullStateConfigSig {
import Config
}
import Impl<C>
}
/** DEPRECATED: Use `GlobalWithState` instead. */
deprecated module MakeWithState<StateConfigSig Config> implements GlobalFlowSig {
import GlobalWithState<Config>
}
signature class PathNodeSig {
/** Gets a textual representation of this element. */
string toString();
/**
* Holds if this element is at the specified location.
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
);
/** Gets the underlying `Node`. */
Node getNode();
}
signature module PathGraphSig<PathNodeSig PathNode> {
/** Holds if `(a,b)` is an edge in the graph of data flow path explanations. */
predicate edges(PathNode a, PathNode b);
/** Holds if `n` is a node in the graph of data flow path explanations. */
predicate nodes(PathNode n, string key, string val);
/**
* Holds if `(arg, par, ret, out)` forms a subpath-tuple, that is, flow through
* a subpath between `par` and `ret` with the connecting edges `arg -> par` and
* `ret -> out` is summarized as the edge `arg -> out`.
*/
predicate subpaths(PathNode arg, PathNode par, PathNode ret, PathNode out);
}
/**
* Constructs a `PathGraph` from two `PathGraph`s by disjoint union.
*/
module MergePathGraph<
PathNodeSig PathNode1, PathNodeSig PathNode2, PathGraphSig<PathNode1> Graph1,
PathGraphSig<PathNode2> Graph2>
{
private newtype TPathNode =
TPathNode1(PathNode1 p) or
TPathNode2(PathNode2 p)
/** A node in a graph of path explanations that is formed by disjoint union of the two given graphs. */
class PathNode extends TPathNode {
/** Gets this as a projection on the first given `PathGraph`. */
PathNode1 asPathNode1() { this = TPathNode1(result) }
/** Gets this as a projection on the second given `PathGraph`. */
PathNode2 asPathNode2() { this = TPathNode2(result) }
/** Gets a textual representation of this element. */
string toString() {
result = this.asPathNode1().toString() or
result = this.asPathNode2().toString()
}
/**
* Holds if this element is at the specified location.
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
) {
this.asPathNode1().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) or
this.asPathNode2().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
}
/** Gets the underlying `Node`. */
Node getNode() {
result = this.asPathNode1().getNode() or
result = this.asPathNode2().getNode()
}
}
/**
* Provides the query predicates needed to include a graph in a path-problem query.
*/
module PathGraph implements PathGraphSig<PathNode> {
/** Holds if `(a,b)` is an edge in the graph of data flow path explanations. */
query predicate edges(PathNode a, PathNode b) {
Graph1::edges(a.asPathNode1(), b.asPathNode1()) or
Graph2::edges(a.asPathNode2(), b.asPathNode2())
}
/** Holds if `n` is a node in the graph of data flow path explanations. */
query predicate nodes(PathNode n, string key, string val) {
Graph1::nodes(n.asPathNode1(), key, val) or
Graph2::nodes(n.asPathNode2(), key, val)
}
/**
* Holds if `(arg, par, ret, out)` forms a subpath-tuple, that is, flow through
* a subpath between `par` and `ret` with the connecting edges `arg -> par` and
* `ret -> out` is summarized as the edge `arg -> out`.
*/
query predicate subpaths(PathNode arg, PathNode par, PathNode ret, PathNode out) {
Graph1::subpaths(arg.asPathNode1(), par.asPathNode1(), ret.asPathNode1(), out.asPathNode1()) or
Graph2::subpaths(arg.asPathNode2(), par.asPathNode2(), ret.asPathNode2(), out.asPathNode2())
}
}
}
/**
* Constructs a `PathGraph` from three `PathGraph`s by disjoint union.
*/
module MergePathGraph3<
PathNodeSig PathNode1, PathNodeSig PathNode2, PathNodeSig PathNode3,
PathGraphSig<PathNode1> Graph1, PathGraphSig<PathNode2> Graph2, PathGraphSig<PathNode3> Graph3>
{
private module MergedInner = MergePathGraph<PathNode1, PathNode2, Graph1, Graph2>;
private module Merged =
MergePathGraph<MergedInner::PathNode, PathNode3, MergedInner::PathGraph, Graph3>;
/** A node in a graph of path explanations that is formed by disjoint union of the three given graphs. */
class PathNode instanceof Merged::PathNode {
/** Gets this as a projection on the first given `PathGraph`. */
PathNode1 asPathNode1() { result = super.asPathNode1().asPathNode1() }
/** Gets this as a projection on the second given `PathGraph`. */
PathNode2 asPathNode2() { result = super.asPathNode1().asPathNode2() }
/** Gets this as a projection on the third given `PathGraph`. */
PathNode3 asPathNode3() { result = super.asPathNode2() }
/** Gets a textual representation of this element. */
string toString() { result = super.toString() }
/**
* Holds if this element is at the specified location.
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
) {
super.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
}
/** Gets the underlying `Node`. */
Node getNode() { result = super.getNode() }
}
/**
* Provides the query predicates needed to include a graph in a path-problem query.
*/
module PathGraph = Merged::PathGraph;
}

View File

@@ -1561,7 +1561,8 @@ private class SummaryPostUpdateNode extends FlowSummaryNode, PostUpdateNodeImpl
}
/** Gets a viable run-time target for the call `call`. */
DataFlowCallable viableCallable(ExtractedDataFlowCall call) {
DataFlowCallable viableCallable(DataFlowCall call) {
call instanceof ExtractedDataFlowCall and
result = call.getCallable()
or
// A call to a library callable with a flow summary

View File

@@ -276,6 +276,8 @@ private module Config implements FullStateConfigSig {
getConfig(state).isSource(source) and getState(state) instanceof FlowStateEmpty
}
predicate isSink(Node sink) { none() }
predicate isSink(Node sink, FlowState state) {
getConfig(state).isSink(sink, getState(state))
or
@@ -313,6 +315,8 @@ private module Config implements FullStateConfigSig {
any(Configuration config).allowImplicitRead(node, c)
}
predicate neverSkip(Node node) { none() }
int fieldFlowBranchLimit() { result = min(any(Configuration config).fieldFlowBranchLimit()) }
FlowFeature getAFeature() { result = any(Configuration config).getAFeature() }

View File

@@ -276,6 +276,8 @@ private module Config implements FullStateConfigSig {
getConfig(state).isSource(source) and getState(state) instanceof FlowStateEmpty
}
predicate isSink(Node sink) { none() }
predicate isSink(Node sink, FlowState state) {
getConfig(state).isSink(sink, getState(state))
or
@@ -313,6 +315,8 @@ private module Config implements FullStateConfigSig {
any(Configuration config).allowImplicitRead(node, c)
}
predicate neverSkip(Node node) { none() }
int fieldFlowBranchLimit() { result = min(any(Configuration config).fieldFlowBranchLimit()) }
FlowFeature getAFeature() { result = any(Configuration config).getAFeature() }

View File

@@ -276,6 +276,8 @@ private module Config implements FullStateConfigSig {
getConfig(state).isSource(source) and getState(state) instanceof FlowStateEmpty
}
predicate isSink(Node sink) { none() }
predicate isSink(Node sink, FlowState state) {
getConfig(state).isSink(sink, getState(state))
or
@@ -313,6 +315,8 @@ private module Config implements FullStateConfigSig {
any(Configuration config).allowImplicitRead(node, c)
}
predicate neverSkip(Node node) { none() }
int fieldFlowBranchLimit() { result = min(any(Configuration config).fieldFlowBranchLimit()) }
FlowFeature getAFeature() { result = any(Configuration config).getAFeature() }

View File

@@ -276,6 +276,8 @@ private module Config implements FullStateConfigSig {
getConfig(state).isSource(source) and getState(state) instanceof FlowStateEmpty
}
predicate isSink(Node sink) { none() }
predicate isSink(Node sink, FlowState state) {
getConfig(state).isSink(sink, getState(state))
or
@@ -313,6 +315,8 @@ private module Config implements FullStateConfigSig {
any(Configuration config).allowImplicitRead(node, c)
}
predicate neverSkip(Node node) { none() }
int fieldFlowBranchLimit() { result = min(any(Configuration config).fieldFlowBranchLimit()) }
FlowFeature getAFeature() { result = any(Configuration config).getAFeature() }

View File

@@ -3,297 +3,54 @@
* data-flow classes and predicates.
*/
private import DataFlowImplSpecific::Private
private import DataFlowImplSpecific::Public
private import tainttracking1.TaintTrackingParameter::Private
private import tainttracking1.TaintTrackingParameter::Public
private import python
private import DataFlowImplSpecific
private import TaintTrackingImplSpecific
private import codeql.dataflow.internal.DataFlowImplConsistency
module Consistency {
private newtype TConsistencyConfiguration = MkConsistencyConfiguration()
private module Input implements InputSig<PythonDataFlow> {
private import Private
private import Public
/** A class for configuring the consistency queries. */
class ConsistencyConfiguration extends TConsistencyConfiguration {
string toString() { none() }
/** Holds if `n` should be excluded from the consistency test `uniqueEnclosingCallable`. */
predicate uniqueEnclosingCallableExclude(Node n) { none() }
/** Holds if `call` should be excluded from the consistency test `uniqueCallEnclosingCallable`. */
predicate uniqueCallEnclosingCallableExclude(DataFlowCall call) { none() }
/** Holds if `n` should be excluded from the consistency test `uniqueNodeLocation`. */
predicate uniqueNodeLocationExclude(Node n) { none() }
/** Holds if `n` should be excluded from the consistency test `missingLocation`. */
predicate missingLocationExclude(Node n) { none() }
/** Holds if `n` should be excluded from the consistency test `postWithInFlow`. */
predicate postWithInFlowExclude(Node n) { none() }
/** Holds if `n` should be excluded from the consistency test `argHasPostUpdate`. */
predicate argHasPostUpdateExclude(ArgumentNode n) { none() }
/** Holds if `n` should be excluded from the consistency test `reverseRead`. */
predicate reverseReadExclude(Node n) { none() }
/** Holds if `n` should be excluded from the consistency test `postHasUniquePre`. */
predicate postHasUniquePreExclude(PostUpdateNode n) { none() }
/** Holds if `n` should be excluded from the consistency test `uniquePostUpdate`. */
predicate uniquePostUpdateExclude(Node n) { none() }
/** Holds if `(call, ctx)` should be excluded from the consistency test `viableImplInCallContextTooLargeExclude`. */
predicate viableImplInCallContextTooLargeExclude(
DataFlowCall call, DataFlowCall ctx, DataFlowCallable callable
) {
none()
}
/** Holds if `(c, pos, p)` should be excluded from the consistency test `uniqueParameterNodeAtPosition`. */
predicate uniqueParameterNodeAtPositionExclude(DataFlowCallable c, ParameterPosition pos, Node p) {
none()
}
/** Holds if `(c, pos, p)` should be excluded from the consistency test `uniqueParameterNodePosition`. */
predicate uniqueParameterNodePositionExclude(DataFlowCallable c, ParameterPosition pos, Node p) {
none()
}
/** Holds if `n` should be excluded from the consistency test `identityLocalStep`. */
predicate identityLocalStepExclude(Node n) { none() }
}
private class RelevantNode extends Node {
RelevantNode() {
this instanceof ArgumentNode or
this instanceof ParameterNode or
this instanceof ReturnNode or
this = getAnOutNode(_, _) or
simpleLocalFlowStep(this, _) or
simpleLocalFlowStep(_, this) or
jumpStep(this, _) or
jumpStep(_, this) or
storeStep(this, _, _) or
storeStep(_, _, this) or
readStep(this, _, _) or
readStep(_, _, this) or
defaultAdditionalTaintStep(this, _) or
defaultAdditionalTaintStep(_, this)
}
}
query predicate uniqueEnclosingCallable(Node n, string msg) {
exists(int c |
n instanceof RelevantNode and
c = count(nodeGetEnclosingCallable(n)) and
c != 1 and
not any(ConsistencyConfiguration conf).uniqueEnclosingCallableExclude(n) and
msg = "Node should have one enclosing callable but has " + c + "."
)
}
query predicate uniqueCallEnclosingCallable(DataFlowCall call, string msg) {
exists(int c |
c = count(call.getEnclosingCallable()) and
c != 1 and
not any(ConsistencyConfiguration conf).uniqueCallEnclosingCallableExclude(call) and
msg = "Call should have one enclosing callable but has " + c + "."
)
}
query predicate uniqueType(Node n, string msg) {
exists(int c |
n instanceof RelevantNode and
c = count(getNodeType(n)) and
c != 1 and
msg = "Node should have one type but has " + c + "."
)
}
query predicate uniqueNodeLocation(Node n, string msg) {
exists(int c |
c =
count(string filepath, int startline, int startcolumn, int endline, int endcolumn |
n.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
) and
c != 1 and
not any(ConsistencyConfiguration conf).uniqueNodeLocationExclude(n) and
msg = "Node should have one location but has " + c + "."
)
}
query predicate missingLocation(string msg) {
exists(int c |
c =
strictcount(Node n |
not n.hasLocationInfo(_, _, _, _, _) and
not any(ConsistencyConfiguration conf).missingLocationExclude(n)
) and
msg = "Nodes without location: " + c
)
}
query predicate uniqueNodeToString(Node n, string msg) {
exists(int c |
c = count(n.toString()) and
c != 1 and
msg = "Node should have one toString but has " + c + "."
)
}
query predicate missingToString(string msg) {
exists(int c |
c = strictcount(Node n | not exists(n.toString())) and
msg = "Nodes without toString: " + c
)
}
query predicate parameterCallable(ParameterNode p, string msg) {
exists(DataFlowCallable c | isParameterNode(p, c, _) and c != nodeGetEnclosingCallable(p)) and
msg = "Callable mismatch for parameter."
}
query predicate localFlowIsLocal(Node n1, Node n2, string msg) {
simpleLocalFlowStep(n1, n2) and
nodeGetEnclosingCallable(n1) != nodeGetEnclosingCallable(n2) and
msg = "Local flow step does not preserve enclosing callable."
}
query predicate readStepIsLocal(Node n1, Node n2, string msg) {
readStep(n1, _, n2) and
nodeGetEnclosingCallable(n1) != nodeGetEnclosingCallable(n2) and
msg = "Read step does not preserve enclosing callable."
}
query predicate storeStepIsLocal(Node n1, Node n2, string msg) {
storeStep(n1, _, n2) and
nodeGetEnclosingCallable(n1) != nodeGetEnclosingCallable(n2) and
msg = "Store step does not preserve enclosing callable."
}
private DataFlowType typeRepr() { result = getNodeType(_) }
query predicate compatibleTypesReflexive(DataFlowType t, string msg) {
t = typeRepr() and
not compatibleTypes(t, t) and
msg = "Type compatibility predicate is not reflexive."
}
query predicate unreachableNodeCCtx(Node n, DataFlowCall call, string msg) {
isUnreachableInCall(n, call) and
exists(DataFlowCallable c |
c = nodeGetEnclosingCallable(n) and
not viableCallable(call) = c
) and
msg = "Call context for isUnreachableInCall is inconsistent with call graph."
}
query predicate localCallNodes(DataFlowCall call, Node n, string msg) {
(
n = getAnOutNode(call, _) and
msg = "OutNode and call does not share enclosing callable."
or
n.(ArgumentNode).argumentOf(call, _) and
msg = "ArgumentNode and call does not share enclosing callable."
) and
nodeGetEnclosingCallable(n) != call.getEnclosingCallable()
}
// This predicate helps the compiler forget that in some languages
// it is impossible for a result of `getPreUpdateNode` to be an
// instance of `PostUpdateNode`.
private Node getPre(PostUpdateNode n) {
result = n.getPreUpdateNode()
predicate argHasPostUpdateExclude(ArgumentNode n) {
exists(ArgumentPosition apos | n.argumentOf(_, apos) and apos.isStarArgs(_))
or
none()
exists(ArgumentPosition apos | n.argumentOf(_, apos) and apos.isDictSplat())
}
query predicate postIsNotPre(PostUpdateNode n, string msg) {
getPre(n) = n and
msg = "PostUpdateNode should not equal its pre-update node."
predicate reverseReadExclude(Node n) {
// since `self`/`cls` parameters can be marked as implicit argument to `super()`,
// they will have PostUpdateNodes. We have a read-step from the synthetic `**kwargs`
// parameter, but dataflow-consistency queries should _not_ complain about there not
// being a post-update node for the synthetic `**kwargs` parameter.
n instanceof SynthDictSplatParameterNode
}
query predicate postHasUniquePre(PostUpdateNode n, string msg) {
not any(ConsistencyConfiguration conf).postHasUniquePreExclude(n) and
exists(int c |
c = count(n.getPreUpdateNode()) and
c != 1 and
msg = "PostUpdateNode should have one pre-update node but has " + c + "."
predicate uniqueParameterNodePositionExclude(DataFlowCallable c, ParameterPosition pos, Node p) {
// For normal parameters that can both be passed as positional arguments or keyword
// arguments, we currently have parameter positions for both cases..
//
// TODO: Figure out how bad breaking this consistency check is
exists(Function func, Parameter param |
c.getScope() = func and
p = parameterNode(param) and
c.getParameter(pos) = p and
param = func.getArg(_) and
param = func.getArgByName(_)
)
}
query predicate uniquePostUpdate(Node n, string msg) {
not any(ConsistencyConfiguration conf).uniquePostUpdateExclude(n) and
1 < strictcount(PostUpdateNode post | post.getPreUpdateNode() = n) and
msg = "Node has multiple PostUpdateNodes."
predicate uniqueCallEnclosingCallableExclude(DataFlowCall call) {
not exists(call.getLocation().getFile().getRelativePath())
}
query predicate postIsInSameCallable(PostUpdateNode n, string msg) {
nodeGetEnclosingCallable(n) != nodeGetEnclosingCallable(n.getPreUpdateNode()) and
msg = "PostUpdateNode does not share callable with its pre-update node."
predicate identityLocalStepExclude(Node n) {
not exists(n.getLocation().getFile().getRelativePath())
}
private predicate hasPost(Node n) { exists(PostUpdateNode post | post.getPreUpdateNode() = n) }
query predicate reverseRead(Node n, string msg) {
exists(Node n2 | readStep(n, _, n2) and hasPost(n2) and not hasPost(n)) and
not any(ConsistencyConfiguration conf).reverseReadExclude(n) and
msg = "Origin of readStep is missing a PostUpdateNode."
}
query predicate argHasPostUpdate(ArgumentNode n, string msg) {
not hasPost(n) and
not any(ConsistencyConfiguration c).argHasPostUpdateExclude(n) and
msg = "ArgumentNode is missing PostUpdateNode."
}
// This predicate helps the compiler forget that in some languages
// it is impossible for a `PostUpdateNode` to be the target of
// `simpleLocalFlowStep`.
private predicate isPostUpdateNode(Node n) { n instanceof PostUpdateNode or none() }
query predicate postWithInFlow(Node n, string msg) {
isPostUpdateNode(n) and
not clearsContent(n, _) and
simpleLocalFlowStep(_, n) and
not any(ConsistencyConfiguration c).postWithInFlowExclude(n) and
msg = "PostUpdateNode should not be the target of local flow."
}
query predicate viableImplInCallContextTooLarge(
DataFlowCall call, DataFlowCall ctx, DataFlowCallable callable
) {
callable = viableImplInCallContext(call, ctx) and
not callable = viableCallable(call) and
not any(ConsistencyConfiguration c).viableImplInCallContextTooLargeExclude(call, ctx, callable)
}
query predicate uniqueParameterNodeAtPosition(
DataFlowCallable c, ParameterPosition pos, Node p, string msg
) {
not any(ConsistencyConfiguration conf).uniqueParameterNodeAtPositionExclude(c, pos, p) and
isParameterNode(p, c, pos) and
not exists(unique(Node p0 | isParameterNode(p0, c, pos))) and
msg = "Parameters with overlapping positions."
}
query predicate uniqueParameterNodePosition(
DataFlowCallable c, ParameterPosition pos, Node p, string msg
) {
not any(ConsistencyConfiguration conf).uniqueParameterNodePositionExclude(c, pos, p) and
isParameterNode(p, c, pos) and
not exists(unique(ParameterPosition pos0 | isParameterNode(p, c, pos0))) and
msg = "Parameter node with multiple positions."
}
query predicate uniqueContentApprox(Content c, string msg) {
not exists(unique(ContentApprox approx | approx = getContentApprox(c))) and
msg = "Non-unique content approximation."
}
query predicate identityLocalStep(Node n, string msg) {
simpleLocalFlowStep(n, n) and
not any(ConsistencyConfiguration c).identityLocalStepExclude(n) and
msg = "Node steps to itself"
predicate multipleArgumentCallExclude(ArgumentNode arg, DataFlowCall call) {
isArgumentNode(arg, call, _)
}
}
module Consistency = MakeConsistency<PythonDataFlow, PythonTaintTracking, Input>;

View File

@@ -2,6 +2,7 @@
* Provides Python-specific definitions for use in the data flow library.
*/
private import codeql.dataflow.DataFlow
// we need to export `Unit` for the DataFlowImpl* files
private import python as Python
@@ -13,3 +14,12 @@ module Public {
import DataFlowPublic
import DataFlowUtil
}
module PythonDataFlow implements InputSig {
import Private
import Public
predicate neverSkipInPathGraph = Private::neverSkipInPathGraph/1;
Node exprNode(DataFlowExpr e) { result = Public::exprNode(e) }
}

View File

@@ -22,8 +22,8 @@ import DataFlowDispatch
DataFlowCallable nodeGetEnclosingCallable(Node n) { result = n.getEnclosingCallable() }
/** Holds if `p` is a `ParameterNode` of `c` with position `pos`. */
predicate isParameterNode(ParameterNodeImpl p, DataFlowCallable c, ParameterPosition pos) {
p.isParameterOf(c, pos)
predicate isParameterNode(ParameterNode p, DataFlowCallable c, ParameterPosition pos) {
p.(ParameterNodeImpl).isParameterOf(c, pos)
}
/** Holds if `arg` is an `ArgumentNode` of `c` with position `pos`. */
@@ -513,15 +513,21 @@ class CastNode extends Node {
* explanations.
*/
predicate neverSkipInPathGraph(Node n) {
// We include read- and store steps here to force them to be
// shown in path explanations.
// This hack is necessary, because we have included some of these
// steps as default taint steps, making them be suppressed in path
// explanations.
// We should revert this once, we can remove this steps from the
// default taint steps; this should be possible once we have
// implemented flow summaries and recursive content.
readStep(_, _, n) or storeStep(_, _, n)
// NOTE: We could use RHS of a definition, but since we have use-use flow, in an
// example like
// ```py
// x = SOURCE()
// if <cond>:
// y = x
// SINK(x)
// ```
// we would end up saying that the path MUST not skip the x in `y = x`, which is just
// annoying and doesn't help the path explanation become clearer.
n.asVar() instanceof EssaDefinition and
// For a parameter we have flow from ControlFlowNode to SSA node, and then onwards
// with use-use flow, and since the CFN is already part of the path graph, we don't
// want to force showing the SSA node as well.
not n.asVar() instanceof ParameterDefinition
}
/**
@@ -533,6 +539,8 @@ predicate compatibleTypes(DataFlowType t1, DataFlowType t2) { any() }
predicate typeStrongerThan(DataFlowType t1, DataFlowType t2) { none() }
predicate localMustFlowStep(Node node1, Node node2) { none() }
/**
* Gets the type of `node`.
*/
@@ -608,7 +616,7 @@ predicate jumpStepNotSharedWithTypeTracker(Node nodeFrom, Node nodeTo) {
* Holds if data can flow from `nodeFrom` to `nodeTo` via an assignment to
* content `c`.
*/
predicate storeStep(Node nodeFrom, Content c, Node nodeTo) {
predicate storeStep(Node nodeFrom, ContentSet c, Node nodeTo) {
listStoreStep(nodeFrom, c, nodeTo)
or
setStoreStep(nodeFrom, c, nodeTo)
@@ -806,7 +814,7 @@ predicate attributeStoreStep(Node nodeFrom, AttributeContent c, PostUpdateNode n
/**
* Holds if data can flow from `nodeFrom` to `nodeTo` via a read of content `c`.
*/
predicate readStep(Node nodeFrom, Content c, Node nodeTo) {
predicate readStep(Node nodeFrom, ContentSet c, Node nodeTo) {
subscriptReadStep(nodeFrom, c, nodeTo)
or
iterableUnpackingReadStep(nodeFrom, c, nodeTo)
@@ -881,7 +889,7 @@ predicate attributeReadStep(Node nodeFrom, AttributeContent c, AttrRead nodeTo)
* any value stored inside `f` is cleared at the pre-update node associated with `x`
* in `x.f = newValue`.
*/
predicate clearsContent(Node n, Content c) {
predicate clearsContent(Node n, ContentSet c) {
matchClearStep(n, c)
or
attributeClearStep(n, c)
@@ -933,8 +941,6 @@ DataFlowCallable viableImplInCallContext(DataFlowCall call, DataFlowCall ctx) {
*/
predicate mayBenefitFromCallContext(DataFlowCall call, DataFlowCallable c) { none() }
int accessPathLimit() { result = 5 }
/**
* Holds if access paths with `c` at their head always should be tracked at high
* precision. This disables adaptive access path precision for such access paths.

View File

@@ -296,11 +296,21 @@ module Public {
predicate hasProvenance(Provenance provenance) { provenance = "manual" }
}
/** A callable where there is no flow via the callable. */
class NeutralCallable extends SummarizedCallableBase {
/**
* A callable where there is no flow via the callable.
*/
class NeutralSummaryCallable extends NeutralCallable {
NeutralSummaryCallable() { this.getKind() = "summary" }
}
/**
* A callable that has a neutral model.
*/
class NeutralCallable extends NeutralCallableBase {
private string kind;
private Provenance provenance;
NeutralCallable() { neutralSummaryElement(this, provenance) }
NeutralCallable() { neutralElement(this, kind, provenance) }
/**
* Holds if the neutral is auto generated.
@@ -316,6 +326,11 @@ module Public {
* Holds if the neutral has provenance `p`.
*/
predicate hasProvenance(Provenance p) { p = provenance }
/**
* Gets the kind of the neutral.
*/
string getKind() { result = kind }
}
}
@@ -1318,6 +1333,11 @@ module Private {
/** Gets the string representation of this callable used by `neutral/1`. */
abstract string getCallableCsv();
/**
* Gets the kind of the neutral.
*/
string getKind() { result = super.getKind() }
string toString() { result = super.toString() }
}
@@ -1358,12 +1378,13 @@ module Private {
/**
* Holds if a neutral model `csv` exists (semi-colon separated format). Used for testing purposes.
* The syntax is: "namespace;type;name;signature;provenance"",
* The syntax is: "namespace;type;name;signature;kind;provenance"",
*/
query predicate neutral(string csv) {
exists(RelevantNeutralCallable c |
csv =
c.getCallableCsv() // Callable information
+ c.getKind() + ";" // kind
+ renderProvenanceNeutral(c) // provenance
)
}

View File

@@ -39,8 +39,16 @@ private import FlowSummaryImpl::Private
private import FlowSummaryImpl::Public
private import semmle.python.dataflow.new.FlowSummary as FlowSummary
/**
* A class of callables that are candidates for flow summary modeling.
*/
class SummarizedCallableBase = string;
/**
* A class of callables that are candidates for neutral modeling.
*/
class NeutralCallableBase = string;
/** View a `SummarizedCallable` as a `DataFlowCallable`. */
DataFlowCallable inject(SummarizedCallable c) { result.asLibraryCallable() = c }
@@ -91,11 +99,11 @@ predicate summaryElement(
}
/**
* Holds if a neutral summary model exists for `c` with provenance `provenance`,
* which means that there is no flow through `c`.
* Holds if a neutral model exists for `c` of kind `kind`
* and with provenance `provenance`.
* Note. Neutral models have not been implemented for Python.
*/
predicate neutralSummaryElement(FlowSummary::SummarizedCallable c, string provenance) { none() }
predicate neutralElement(NeutralCallableBase c, string kind, string provenance) { none() }
/**
* Gets the summary component for specification component `c`, if any.

View File

@@ -0,0 +1,10 @@
/**
* Provides Python-specific definitions for use in the taint tracking library.
*/
private import codeql.dataflow.TaintTracking
private import DataFlowImplSpecific
module PythonTaintTracking implements InputSig<PythonDataFlow> {
import TaintTrackingPrivate
}

View File

@@ -16,7 +16,7 @@ predicate defaultTaintSanitizer(DataFlow::Node node) { none() }
* of `c` at sinks and inputs to additional taint steps.
*/
bindingset[node]
predicate defaultImplicitTaintRead(DataFlow::Node node, DataFlow::Content c) { none() }
predicate defaultImplicitTaintRead(DataFlow::Node node, DataFlow::ContentSet c) { none() }
private module Cached {
/**

View File

@@ -1,74 +0,0 @@
/**
* Provides classes for performing local (intra-procedural) and
* global (inter-procedural) taint-tracking analyses.
*/
import TaintTrackingParameter::Public
private import TaintTrackingParameter::Private
private module AddTaintDefaults<DataFlowInternal::FullStateConfigSig Config> implements
DataFlowInternal::FullStateConfigSig
{
import Config
predicate isBarrier(DataFlow::Node node) {
Config::isBarrier(node) or defaultTaintSanitizer(node)
}
predicate isAdditionalFlowStep(DataFlow::Node node1, DataFlow::Node node2) {
Config::isAdditionalFlowStep(node1, node2) or
defaultAdditionalTaintStep(node1, node2)
}
predicate allowImplicitRead(DataFlow::Node node, DataFlow::ContentSet c) {
Config::allowImplicitRead(node, c)
or
(
Config::isSink(node, _) or
Config::isAdditionalFlowStep(node, _) or
Config::isAdditionalFlowStep(node, _, _, _)
) and
defaultImplicitTaintRead(node, c)
}
}
/**
* Constructs a global taint tracking computation.
*/
module Global<DataFlow::ConfigSig Config> implements DataFlow::GlobalFlowSig {
private module Config0 implements DataFlowInternal::FullStateConfigSig {
import DataFlowInternal::DefaultState<Config>
import Config
}
private module C implements DataFlowInternal::FullStateConfigSig {
import AddTaintDefaults<Config0>
}
import DataFlowInternal::Impl<C>
}
/** DEPRECATED: Use `Global` instead. */
deprecated module Make<DataFlow::ConfigSig Config> implements DataFlow::GlobalFlowSig {
import Global<Config>
}
/**
* Constructs a global taint tracking computation using flow state.
*/
module GlobalWithState<DataFlow::StateConfigSig Config> implements DataFlow::GlobalFlowSig {
private module Config0 implements DataFlowInternal::FullStateConfigSig {
import Config
}
private module C implements DataFlowInternal::FullStateConfigSig {
import AddTaintDefaults<Config0>
}
import DataFlowInternal::Impl<C>
}
/** DEPRECATED: Use `GlobalWithState` instead. */
deprecated module MakeWithState<DataFlow::StateConfigSig Config> implements DataFlow::GlobalFlowSig {
import GlobalWithState<Config>
}

View File

@@ -468,6 +468,27 @@ module AiohttpWebModel {
override string getSourceType() { result = "aiohttp.web.Request" }
}
/**
* A parameter that has a type annotation of `aiohttp.web.Request`, so with all
* likelihood will receive an `aiohttp.web.Request` instance at some point when a
* request handler is invoked.
*/
class AiohttpRequestParamFromTypeAnnotation extends Request::InstanceSource,
DataFlow::ParameterNode, RemoteFlowSource::Range
{
AiohttpRequestParamFromTypeAnnotation() {
not this instanceof AiohttpRequestHandlerRequestParam and
this.getParameter().getAnnotation() =
API::moduleImport("aiohttp")
.getMember("web")
.getMember("Request")
.getAValueReachableFromSource()
.asExpr()
}
override string getSourceType() { result = "aiohttp.web.Request from type-annotation" }
}
/**
* A read of the `request` attribute on an instance of an aiohttp.web View class,
* which is the request being processed currently.
@@ -498,14 +519,17 @@ module AiohttpWebModel {
* - https://docs.aiohttp.org/en/stable/web_quickstart.html#aiohttp-web-exceptions
*/
class AiohttpWebResponseInstantiation extends Http::Server::HttpResponse::Range,
Response::InstanceSource, DataFlow::CallCfgNode
Response::InstanceSource, API::CallNode
{
API::Node apiNode;
AiohttpWebResponseInstantiation() {
this = apiNode.getACall() and
(
apiNode = API::moduleImport("aiohttp").getMember("web").getMember("Response")
apiNode =
API::moduleImport("aiohttp")
.getMember("web")
.getMember(["FileResponse", "Response", "StreamResponse"])
or
exists(string httpExceptionClassName |
httpExceptionClassName in [
@@ -545,6 +569,10 @@ module AiohttpWebModel {
override DataFlow::Node getMimetypeOrContentTypeArg() {
result = this.getArgByName("content_type")
or
exists(string key | key.toLowerCase() = "content-type" |
result = this.getKeywordParameter("headers").getSubscript(key).getAValueReachingSink()
)
}
override string getMimetypeDefault() {
@@ -556,6 +584,37 @@ module AiohttpWebModel {
}
}
/**
* A call to the `aiohttp.web.FileResponse` constructor as a sink for Filesystem access.
*/
class FileResponseCall extends FileSystemAccess::Range, API::CallNode {
FileResponseCall() {
this = API::moduleImport("aiohttp").getMember("web").getMember("FileResponse").getACall()
}
override DataFlow::Node getAPathArgument() { result = this.getParameter(0, "path").asSink() }
}
/**
* An instantiation of `aiohttp.web.StreamResponse`.
*
* See https://docs.aiohttp.org/en/stable/web_reference.html#aiohttp.web.StreamResponse
*/
class StreamResponse extends AiohttpWebResponseInstantiation {
StreamResponse() {
this = API::moduleImport("aiohttp").getMember("web").getMember("StreamResponse").getACall()
}
override DataFlow::Node getBody() {
result =
this.getReturn()
.getMember(["write", "write_eof"])
.getACall()
.getParameter(0, "data")
.asSink()
}
}
/** Gets an HTTP response instance. */
private API::Node aiohttpResponseInstance() {
result = any(AiohttpWebResponseInstantiation call).getApiNode().getReturn()
@@ -670,14 +729,14 @@ private module AiohttpClientModel {
string methodName;
OutgoingRequestCall() {
methodName in [Http::httpVerbLower(), "request"] and
methodName in [Http::httpVerbLower(), "request", "ws_connect"] and
this = instance().getMember(methodName).getACall()
}
override DataFlow::Node getAUrlPart() {
result = this.getArgByName("url")
or
not methodName = "request" and
methodName in [Http::httpVerbLower(), "ws_connect"] and
result = this.getArg(0)
or
methodName = "request" and

View File

@@ -15,6 +15,7 @@ private import semmle.python.regex
private import semmle.python.frameworks.internal.PoorMansFunctionResolution
private import semmle.python.frameworks.internal.SelfRefMixin
private import semmle.python.frameworks.internal.InstanceTaintStepsHelper
private import semmle.python.security.dataflow.UrlRedirectCustomizations
/**
* INTERNAL: Do not use.
@@ -2788,4 +2789,31 @@ module PrivateDjango {
override predicate csrfEnabled() { decoratorName in ["csrf_protect", "requires_csrf_token"] }
}
private predicate djangoUrlHasAllowedHostAndScheme(
DataFlow::GuardNode g, ControlFlowNode node, boolean branch
) {
exists(API::CallNode call |
call =
API::moduleImport("django")
.getMember("utils")
.getMember("http")
.getMember("url_has_allowed_host_and_scheme")
.getACall() and
g = call.asCfgNode() and
node = call.getParameter(0, "url").asSink().asCfgNode() and
branch = true
)
}
/**
* A call to `django.utils.http.url_has_allowed_host_and_scheme`, considered as a sanitizer-guard for URL redirection.
*
* See https://docs.djangoproject.com/en/4.2/_modules/django/utils/http/
*/
private class DjangoAllowedUrl extends UrlRedirect::Sanitizer {
DjangoAllowedUrl() {
this = DataFlow::BarrierGuard<djangoUrlHasAllowedHostAndScheme/3>::getABarrierNode()
}
}
}

View File

@@ -179,7 +179,13 @@ module Flask {
* - https://flask.palletsprojects.com/en/2.2.x/api/#flask.json.jsonify
*/
private class FlaskJsonifyCall extends InstanceSource, DataFlow::CallCfgNode {
FlaskJsonifyCall() { this = API::moduleImport("flask").getMember("jsonify").getACall() }
FlaskJsonifyCall() {
this = API::moduleImport("flask").getMember("jsonify").getACall()
or
this = API::moduleImport("flask").getMember("json").getMember("jsonify").getACall()
or
this = FlaskApp::instance().getMember("json").getMember("response").getACall()
}
override DataFlow::Node getBody() { result in [this.getArg(_), this.getArgByName(_)] }
@@ -453,7 +459,8 @@ module Flask {
FlaskRouteHandlerReturn() {
exists(Function routeHandler |
routeHandler = any(FlaskRouteSetup rs).getARequestHandler() and
node = routeHandler.getAReturnValueFlowNode()
node = routeHandler.getAReturnValueFlowNode() and
not this instanceof Flask::Response::InstanceSource
)
}

View File

@@ -0,0 +1,67 @@
/**
* Provides classes and predicates for working with those serverless handlers,
* handled by the shared library.
*
* E.g. [AWS](https://docs.aws.amazon.com/lambda/latest/dg/python-handler.html).
*
* In particular a `RemoteFlowSource` is added for each.
*/
import python
import codeql.serverless.ServerLess
import semmle.python.dataflow.new.DataFlow
import semmle.python.dataflow.new.RemoteFlowSources
private module YamlImpl implements Input {
import semmle.python.Files
import semmle.python.Yaml
}
module SL = ServerLess<YamlImpl>;
/**
* Gets a function that is a serverless request handler.
*
* For example: if an AWS serverless resource contains the following properties (in the "template.yml" file):
* ```yaml
* Handler: mylibrary.handler
* Runtime: pythonXXX
* CodeUri: backend/src/
* ```
*
* And a file "mylibrary.py" exists in the folder "backend/src" (relative to the "template.yml" file).
* Then the result of this predicate is a function exported as "handler" from "mylibrary.py".
* The "mylibrary.py" file could for example look like:
*
* ```python
* def handler(event):
* ...
* ```
*/
private Function getAServerlessHandler() {
exists(File file, string stem, string handler, string runtime, Module mod |
SL::hasServerlessHandler(stem, handler, _, runtime) and
file.getAbsolutePath() = stem + ".py" and
// if runtime is specified, it should be python
(runtime = "" or runtime.matches("python%"))
|
mod.getFile() = file and
result.getScope() = mod and
result.getName() = handler
)
}
private DataFlow::ParameterNode getAHandlerEventParameter() {
exists(Function func | func = getAServerlessHandler() |
result.getParameter() in [func.getArg(0), func.getArgByName("event")]
)
}
/**
* A serverless request handler event, seen as a RemoteFlowSource.
*/
private class ServerlessHandlerEventAsRemoteFlow extends RemoteFlowSource::Range {
ServerlessHandlerEventAsRemoteFlow() { this = getAHandlerEventParameter() }
override string getSourceType() { result = "Serverless event" }
}

View File

@@ -1815,51 +1815,95 @@ private module StdlibPrivate {
// ---------------------------------------------------------------------------
// BaseHTTPServer (Python 2 only)
// ---------------------------------------------------------------------------
/** Gets a reference to the `BaseHttpServer` module. */
API::Node baseHttpServer() { result = API::moduleImport("BaseHTTPServer") }
/**
* DEPRECATED: Use API-graphs directly instead.
*
* Gets a reference to the `BaseHttpServer` module.
*/
deprecated API::Node baseHttpServer() { result = API::moduleImport("BaseHTTPServer") }
/** Provides models for the `BaseHttpServer` module. */
module BaseHttpServer {
/**
* DEPRECATED: Use API-graphs directly instead.
*
* Provides models for the `BaseHttpServer` module.
*/
deprecated module BaseHttpServer {
/**
* DEPRECATED: Use API-graphs directly instead.
*
* Provides models for the `BaseHTTPServer.BaseHTTPRequestHandler` class (Python 2 only).
*/
module BaseHttpRequestHandler {
/** Gets a reference to the `BaseHttpServer.BaseHttpRequestHandler` class. */
API::Node classRef() { result = baseHttpServer().getMember("BaseHTTPRequestHandler") }
deprecated module BaseHttpRequestHandler {
/**
* DEPRECATED: Use API-graphs directly instead.
*
* Gets a reference to the `BaseHttpServer.BaseHttpRequestHandler` class.
*/
deprecated API::Node classRef() {
result = baseHttpServer().getMember("BaseHTTPRequestHandler")
}
}
}
// ---------------------------------------------------------------------------
// SimpleHTTPServer (Python 2 only)
// ---------------------------------------------------------------------------
/** Gets a reference to the `SimpleHttpServer` module. */
API::Node simpleHttpServer() { result = API::moduleImport("SimpleHTTPServer") }
/**
* DEPRECATED: Use API-graphs directly instead.
*
* Gets a reference to the `SimpleHttpServer` module.
*/
deprecated API::Node simpleHttpServer() { result = API::moduleImport("SimpleHTTPServer") }
/** Provides models for the `SimpleHttpServer` module. */
module SimpleHttpServer {
/**
* DEPRECATED: Use API-graphs directly instead.
*
* Provides models for the `SimpleHttpServer` module.
*/
deprecated module SimpleHttpServer {
/**
* DEPRECATED: Use API-graphs directly instead.
*
* Provides models for the `SimpleHTTPServer.SimpleHTTPRequestHandler` class (Python 2 only).
*/
module SimpleHttpRequestHandler {
/** Gets a reference to the `SimpleHttpServer.SimpleHttpRequestHandler` class. */
API::Node classRef() { result = simpleHttpServer().getMember("SimpleHTTPRequestHandler") }
deprecated module SimpleHttpRequestHandler {
/**
* DEPRECATED: Use API-graphs directly instead.
*
* Gets a reference to the `SimpleHttpServer.SimpleHttpRequestHandler` class.
*/
deprecated API::Node classRef() {
result = simpleHttpServer().getMember("SimpleHTTPRequestHandler")
}
}
}
// ---------------------------------------------------------------------------
// CGIHTTPServer (Python 2 only)
// ---------------------------------------------------------------------------
/** Gets a reference to the `CGIHTTPServer` module. */
API::Node cgiHttpServer() { result = API::moduleImport("CGIHTTPServer") }
/**
* DEPRECATED: Use API-graphs directly instead.
*
* Gets a reference to the `CGIHTTPServer` module.
*/
deprecated API::Node cgiHttpServer() { result = API::moduleImport("CGIHTTPServer") }
/** Provides models for the `CGIHTTPServer` module. */
module CgiHttpServer {
deprecated module CgiHttpServer {
/**
* DEPRECATED: Use API-graphs directly instead.
*
* Provides models for the `CGIHTTPServer.CGIHTTPRequestHandler` class (Python 2 only).
*/
module CgiHttpRequestHandler {
/** Gets a reference to the `CGIHTTPServer.CgiHttpRequestHandler` class. */
API::Node classRef() { result = cgiHttpServer().getMember("CGIHTTPRequestHandler") }
deprecated module CgiHttpRequestHandler {
/**
* DEPRECATED: Use API-graphs directly instead.
*
* Gets a reference to the `CGIHTTPServer.CgiHttpRequestHandler` class.
*/
deprecated API::Node classRef() {
result = cgiHttpServer().getMember("CGIHTTPRequestHandler")
}
}
/** DEPRECATED: Alias for CgiHttpRequestHandler */
@@ -1872,47 +1916,69 @@ private module StdlibPrivate {
// ---------------------------------------------------------------------------
// http (Python 3 only)
// ---------------------------------------------------------------------------
/** Gets a reference to the `http` module. */
API::Node http() { result = API::moduleImport("http") }
/**
* DEPRECATED: Use API-graphs directly instead.
*
* Gets a reference to the `http` module.
*/
deprecated API::Node http() { result = API::moduleImport("http") }
/** Provides models for the `http` module. */
module StdlibHttp {
deprecated module StdlibHttp {
// -------------------------------------------------------------------------
// http.server
// -------------------------------------------------------------------------
/** Gets a reference to the `http.server` module. */
API::Node server() { result = http().getMember("server") }
/**
* DEPRECATED: Use API-graphs directly instead.
*
* Gets a reference to the `http.server` module.
*/
deprecated API::Node server() { result = http().getMember("server") }
/** Provides models for the `http.server` module */
module Server {
/**
* DEPRECATED: Use API-graphs directly instead.
*
* Provides models for the `http.server` module
*/
deprecated module Server {
/**
* DEPRECATED: Use API-graphs directly instead.
*
* Provides models for the `http.server.BaseHTTPRequestHandler` class (Python 3 only).
*
* See https://docs.python.org/3.9/library/http.server.html#http.server.BaseHTTPRequestHandler.
*/
module BaseHttpRequestHandler {
deprecated module BaseHttpRequestHandler {
/** Gets a reference to the `http.server.BaseHttpRequestHandler` class. */
API::Node classRef() { result = server().getMember("BaseHTTPRequestHandler") }
deprecated API::Node classRef() { result = server().getMember("BaseHTTPRequestHandler") }
}
/**
* DEPRECATED: Use API-graphs directly instead.
*
* Provides models for the `http.server.SimpleHTTPRequestHandler` class (Python 3 only).
*
* See https://docs.python.org/3.9/library/http.server.html#http.server.SimpleHTTPRequestHandler.
*/
module SimpleHttpRequestHandler {
deprecated module SimpleHttpRequestHandler {
/** Gets a reference to the `http.server.SimpleHttpRequestHandler` class. */
API::Node classRef() { result = server().getMember("SimpleHTTPRequestHandler") }
deprecated API::Node classRef() { result = server().getMember("SimpleHTTPRequestHandler") }
}
/**
* DEPRECATED: Use API-graphs directly instead.
*
* Provides models for the `http.server.CGIHTTPRequestHandler` class (Python 3 only).
*
* See https://docs.python.org/3.9/library/http.server.html#http.server.CGIHTTPRequestHandler.
*/
module CgiHttpRequestHandler {
/** Gets a reference to the `http.server.CGIHTTPRequestHandler` class. */
API::Node classRef() { result = server().getMember("CGIHTTPRequestHandler") }
deprecated module CgiHttpRequestHandler {
/**
* DEPRECATED: Use API-graphs directly instead.
*
* Gets a reference to the `http.server.CGIHTTPRequestHandler` class.
*/
deprecated API::Node classRef() { result = server().getMember("CGIHTTPRequestHandler") }
}
/** DEPRECATED: Alias for CgiHttpRequestHandler */
@@ -1933,13 +1999,13 @@ private module StdlibPrivate {
result =
[
// Python 2
BaseHttpServer::BaseHttpRequestHandler::classRef(),
SimpleHttpServer::SimpleHttpRequestHandler::classRef(),
CgiHttpServer::CgiHttpRequestHandler::classRef(),
API::moduleImport("BaseHTTPServer").getMember("BaseHTTPRequestHandler"),
API::moduleImport("SimpleHTTPServer").getMember("SimpleHTTPRequestHandler"),
API::moduleImport("CGIHTTPServer").getMember("CGIHTTPRequestHandler"),
// Python 3
StdlibHttp::Server::BaseHttpRequestHandler::classRef(),
StdlibHttp::Server::SimpleHttpRequestHandler::classRef(),
StdlibHttp::Server::CgiHttpRequestHandler::classRef()
API::moduleImport("http").getMember("server").getMember("BaseHTTPRequestHandler"),
API::moduleImport("http").getMember("server").getMember("SimpleHTTPRequestHandler"),
API::moduleImport("http").getMember("server").getMember("CGIHTTPRequestHandler"),
].getASubclass*()
}
@@ -4372,6 +4438,141 @@ private module StdlibPrivate {
preservesValue = false
}
}
/**
* A flow summary for `os.getenv` / `os.getenvb`
*
* See https://devdocs.io/python~3.11/library/os#os.getenv
*/
class OsGetEnv extends SummarizedCallable {
OsGetEnv() { this = "os.getenv" }
override DataFlow::CallCfgNode getACall() {
result = API::moduleImport("os").getMember(["getenv", "getenvb"]).getACall()
}
override DataFlow::ArgumentNode getACallback() {
result =
API::moduleImport("os").getMember(["getenv", "getenvb"]).getAValueReachableFromSource()
}
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
input in ["Argument[1]", "Argument[default:]"] and
output = "ReturnValue" and
preservesValue = true
}
}
// ---------------------------------------------------------------------------
// asyncio
// ---------------------------------------------------------------------------
/** Provides models for the `asyncio` module. */
module AsyncIO {
/**
* A call to the `asyncio.create_subprocess_exec` function (also accessible via the `subprocess` module of `asyncio`)
*
* See https://docs.python.org/3/library/asyncio-subprocess.html#creating-subprocesses
*/
private class CreateSubprocessExec extends SystemCommandExecution::Range,
FileSystemAccess::Range, API::CallNode
{
CreateSubprocessExec() {
this = API::moduleImport("asyncio").getMember("create_subprocess_exec").getACall()
or
this =
API::moduleImport("asyncio")
.getMember("subprocess")
.getMember("create_subprocess_exec")
.getACall()
}
override DataFlow::Node getCommand() { result = this.getParameter(0, "program").asSink() }
override DataFlow::Node getAPathArgument() { result = this.getCommand() }
override predicate isShellInterpreted(DataFlow::Node arg) {
none() // this is a safe API.
}
}
/**
* A call to the `asyncio.create_subprocess_shell` function (also accessible via the `subprocess` module of `asyncio`)
*
* See https://docs.python.org/3/library/asyncio-subprocess.html#asyncio.create_subprocess_shell
*/
private class CreateSubprocessShell extends SystemCommandExecution::Range,
FileSystemAccess::Range, API::CallNode
{
CreateSubprocessShell() {
this = API::moduleImport("asyncio").getMember("create_subprocess_shell").getACall()
or
this =
API::moduleImport("asyncio")
.getMember("subprocess")
.getMember("create_subprocess_shell")
.getACall()
}
override DataFlow::Node getCommand() { result = this.getParameter(0, "cmd").asSink() }
override DataFlow::Node getAPathArgument() { result = this.getCommand() }
override predicate isShellInterpreted(DataFlow::Node arg) { arg = this.getCommand() }
}
/**
* Get an asyncio event loop (an object with basetype `AbstractEventLoop`).
*
* See https://docs.python.org/3/library/asyncio-eventloop.html
*/
private API::Node getAsyncioEventLoop() {
result = API::moduleImport("asyncio").getMember("get_running_loop").getReturn()
or
result = API::moduleImport("asyncio").getMember("get_event_loop").getReturn() // deprecated in Python 3.10.0 and later
or
result = API::moduleImport("asyncio").getMember("new_event_loop").getReturn()
}
/**
* A call to `subprocess_exec` on an event loop instance.
*
* See https://docs.python.org/3/library/asyncio-eventloop.html#asyncio.loop.subprocess_exec
*/
private class EventLoopSubprocessExec extends API::CallNode, SystemCommandExecution::Range,
FileSystemAccess::Range
{
EventLoopSubprocessExec() {
this = getAsyncioEventLoop().getMember("subprocess_exec").getACall()
}
override DataFlow::Node getCommand() { result = this.getArg(1) }
override DataFlow::Node getAPathArgument() { result = this.getCommand() }
override predicate isShellInterpreted(DataFlow::Node arg) {
none() // this is a safe API.
}
}
/**
* A call to `subprocess_shell` on an event loop instance.
*
* See https://docs.python.org/3/library/asyncio-eventloop.html#asyncio.loop.subprocess_shell
*/
private class EventLoopSubprocessShell extends API::CallNode, SystemCommandExecution::Range,
FileSystemAccess::Range
{
EventLoopSubprocessShell() {
this = getAsyncioEventLoop().getMember("subprocess_shell").getACall()
}
override DataFlow::Node getCommand() { result = this.getParameter(1, "cmd").asSink() }
override DataFlow::Node getAPathArgument() { result = this.getCommand() }
override predicate isShellInterpreted(DataFlow::Node arg) { arg = this.getCommand() }
}
}
}
// ---------------------------------------------------------------------------

View File

@@ -454,6 +454,14 @@ private API::Node getNodeFromPath(string type, AccessPath path, int n) {
or
// Apply a type step
typeStep(getNodeFromPath(type, path, n), result)
or
// Apply a fuzzy step (without advancing 'n')
path.getToken(n).getName() = "Fuzzy" and
result = Specific::getAFuzzySuccessor(getNodeFromPath(type, path, n))
or
// Skip a fuzzy step (advance 'n' without changing the current node)
path.getToken(n - 1).getName() = "Fuzzy" and
result = getNodeFromPath(type, path, n - 1)
}
/**
@@ -500,6 +508,14 @@ private API::Node getNodeFromSubPath(API::Node base, AccessPath subPath, int n)
// will themselves find by following type-steps.
n > 0 and
n < subPath.getNumToken()
or
// Apply a fuzzy step (without advancing 'n')
subPath.getToken(n).getName() = "Fuzzy" and
result = Specific::getAFuzzySuccessor(getNodeFromSubPath(base, subPath, n))
or
// Skip a fuzzy step (advance 'n' without changing the current node)
subPath.getToken(n - 1).getName() = "Fuzzy" and
result = getNodeFromSubPath(base, subPath, n - 1)
}
/**
@@ -561,7 +577,7 @@ private Specific::InvokeNode getInvocationFromPath(string type, AccessPath path)
*/
bindingset[name]
private predicate isValidTokenNameInIdentifyingAccessPath(string name) {
name = ["Argument", "Parameter", "ReturnValue", "WithArity", "TypeVar"]
name = ["Argument", "Parameter", "ReturnValue", "WithArity", "TypeVar", "Fuzzy"]
or
Specific::isExtraValidTokenNameInIdentifyingAccessPath(name)
}
@@ -572,7 +588,7 @@ private predicate isValidTokenNameInIdentifyingAccessPath(string name) {
*/
bindingset[name]
private predicate isValidNoArgumentTokenInIdentifyingAccessPath(string name) {
name = "ReturnValue"
name = ["ReturnValue", "Fuzzy"]
or
Specific::isExtraValidNoArgumentTokenInIdentifyingAccessPath(name)
}

View File

@@ -108,6 +108,23 @@ API::Node getExtraSuccessorFromInvoke(API::CallNode node, AccessPathToken token)
)
}
pragma[inline]
API::Node getAFuzzySuccessor(API::Node node) {
result = node.getAMember()
or
result = node.getParameter(_)
or
result = node.getKeywordParameter(_)
or
result = node.getReturn()
or
result = node.getASubscript()
or
result = node.getAwaited()
or
result = node.getASubclass()
}
/**
* Holds if `invoke` matches the PY-specific call site filter in `token`.
*/

View File

@@ -227,10 +227,11 @@ module Impl implements RegexTreeViewSig {
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
) {
exists(int re_start |
exists(int re_start, int prefix_len | prefix_len = re.getPrefix().length() |
re.getLocation().hasLocationInfo(filepath, startline, re_start, endline, _) and
startcolumn = re_start + start + 4 and
endcolumn = re_start + end + 3
startcolumn = re_start + start + prefix_len and
endcolumn = re_start + end + prefix_len - 1
/* inclusive vs exclusive */
)
}

View File

@@ -101,7 +101,7 @@ private module FindRegexMode {
}
/**
* DEPRECATED: Use `Regex` instead.
* DEPRECATED: Use `RegExp` instead.
*/
deprecated class Regex = RegExp;
@@ -327,6 +327,17 @@ class RegExp extends Expr instanceof StrConst {
/** Gets the text of this regex */
string getText() { result = super.getText() }
/**
* Gets the prefix of this regex
*
* Examples:
*
* - The prefix of `'x*y'` is `'`.
* - The prefix of `r''` is `r'`.
* - The prefix of `r"""x*y"""` is `r"""`.
*/
string getPrefix() { result = super.getPrefix() }
/** Gets the `i`th character of this regex */
string getChar(int i) { result = this.getText().charAt(i) }
@@ -617,7 +628,7 @@ class RegExp extends Expr instanceof StrConst {
private predicate group_start(int start, int end) {
this.non_capturing_group_start(start, end)
or
this.flag_group_start(start, end, _)
this.flag_group_start(start, end)
or
this.named_group_start(start, end)
or
@@ -679,12 +690,48 @@ class RegExp extends Expr instanceof StrConst {
end = min(int i | i > start + 4 and this.getChar(i) = "?")
}
private predicate flag_group_start(int start, int end, string c) {
/**
* Holds if a parse mode starts between `start` and `end`.
*/
private predicate flag_group_start(int start, int end) {
this.flag_group_start_no_modes(start, _) and
end = max(int i | this.mode_character(start, i) | i + 1)
}
/**
* Holds if the initial part of a parse mode, not containing any
* mode characters is between `start` and `end`.
*/
private predicate flag_group_start_no_modes(int start, int end) {
this.isGroupStart(start) and
this.getChar(start + 1) = "?" and
end = start + 3 and
c = this.getChar(start + 2) and
c in ["i", "L", "m", "s", "u", "x"]
this.getChar(start + 2) in ["i", "L", "m", "s", "u", "x"] and
end = start + 2
}
/**
* Holds if `pos` contains a mo character from the
* flag group starting at `start`.
*/
private predicate mode_character(int start, int pos) {
this.flag_group_start_no_modes(start, pos)
or
this.mode_character(start, pos - 1) and
this.getChar(pos) in ["i", "L", "m", "s", "u", "x"]
}
/**
* Holds if a parse mode group includes the mode flag `c`.
* For example the following parse mode group, with mode flag `i`:
* ```
* (?i)
* ```
*/
private predicate flag(string c) {
exists(int pos |
this.mode_character(_, pos) and
this.getChar(pos) = c
)
}
/**
@@ -692,7 +739,7 @@ class RegExp extends Expr instanceof StrConst {
* it is defined by a prefix.
*/
string getModeFromPrefix() {
exists(string c | this.flag_group_start(_, _, c) |
exists(string c | this.flag(c) |
c = "i" and result = "IGNORECASE"
or
c = "L" and result = "LOCALE"

View File

@@ -16,9 +16,11 @@ private import semmle.python.dataflow.new.SensitiveDataSources
import CleartextLoggingCustomizations::CleartextLogging
/**
* DEPRECATED: Use `CleartextLoggingFlow` module instead.
*
* A taint-tracking configuration for detecting "Clear-text logging of sensitive information".
*/
class Configuration extends TaintTracking::Configuration {
deprecated class Configuration extends TaintTracking::Configuration {
Configuration() { this = "CleartextLogging" }
override predicate isSource(DataFlow::Node source) { source instanceof Source }
@@ -31,3 +33,14 @@ class Configuration extends TaintTracking::Configuration {
node instanceof Sanitizer
}
}
private module CleartextLoggingConfig implements DataFlow::ConfigSig {
predicate isSource(DataFlow::Node source) { source instanceof Source }
predicate isSink(DataFlow::Node sink) { sink instanceof Sink }
predicate isBarrier(DataFlow::Node node) { node instanceof Sanitizer }
}
/** Global taint-tracking for detecting "Clear-text logging of sensitive information" vulnerabilities. */
module CleartextLoggingFlow = TaintTracking::Global<CleartextLoggingConfig>;

View File

@@ -16,9 +16,11 @@ private import semmle.python.dataflow.new.SensitiveDataSources
import CleartextStorageCustomizations::CleartextStorage
/**
* DEPRECATED: Use `CleartextStorageFlow` module instead.
*
* A taint-tracking configuration for detecting "Clear-text storage of sensitive information".
*/
class Configuration extends TaintTracking::Configuration {
deprecated class Configuration extends TaintTracking::Configuration {
Configuration() { this = "CleartextStorage" }
override predicate isSource(DataFlow::Node source) { source instanceof Source }
@@ -31,3 +33,14 @@ class Configuration extends TaintTracking::Configuration {
node instanceof Sanitizer
}
}
private module CleartextStorageConfig implements DataFlow::ConfigSig {
predicate isSource(DataFlow::Node source) { source instanceof Source }
predicate isSink(DataFlow::Node sink) { sink instanceof Sink }
predicate isBarrier(DataFlow::Node node) { node instanceof Sanitizer }
}
/** Global taint-tracking for detecting "Clear-text storage of sensitive information" vulnerabilities. */
module CleartextStorageFlow = TaintTracking::Global<CleartextStorageConfig>;

View File

@@ -12,9 +12,11 @@ import semmle.python.dataflow.new.TaintTracking
import CodeInjectionCustomizations::CodeInjection
/**
* DEPRECATED: Use `CodeInjectionFlow` module instead.
*
* A taint-tracking configuration for detecting "code injection" vulnerabilities.
*/
class Configuration extends TaintTracking::Configuration {
deprecated class Configuration extends TaintTracking::Configuration {
Configuration() { this = "CodeInjection" }
override predicate isSource(DataFlow::Node source) { source instanceof Source }
@@ -27,3 +29,14 @@ class Configuration extends TaintTracking::Configuration {
guard instanceof SanitizerGuard
}
}
private module CodeInjectionConfig implements DataFlow::ConfigSig {
predicate isSource(DataFlow::Node source) { source instanceof Source }
predicate isSink(DataFlow::Node sink) { sink instanceof Sink }
predicate isBarrier(DataFlow::Node node) { node instanceof Sanitizer }
}
/** Global taint-tracking for detecting "code injection" vulnerabilities. */
module CodeInjectionFlow = TaintTracking::Global<CodeInjectionConfig>;

View File

@@ -12,9 +12,11 @@ import semmle.python.dataflow.new.TaintTracking
import CommandInjectionCustomizations::CommandInjection
/**
* DEPRECATED: Use `CommandInjectionFlow` module instead.
*
* A taint-tracking configuration for detecting "command injection" vulnerabilities.
*/
class Configuration extends TaintTracking::Configuration {
deprecated class Configuration extends TaintTracking::Configuration {
Configuration() { this = "CommandInjection" }
override predicate isSource(DataFlow::Node source) { source instanceof Source }
@@ -27,3 +29,17 @@ class Configuration extends TaintTracking::Configuration {
guard instanceof SanitizerGuard
}
}
/**
* A taint-tracking configuration for detecting "command injection" vulnerabilities.
*/
module CommandInjectionConfig implements DataFlow::ConfigSig {
predicate isSource(DataFlow::Node source) { source instanceof Source }
predicate isSink(DataFlow::Node sink) { sink instanceof Sink }
predicate isBarrier(DataFlow::Node node) { node instanceof Sanitizer }
}
/** Global taint-tracking for detecting "command injection" vulnerabilities. */
module CommandInjectionFlow = TaintTracking::Global<CommandInjectionConfig>;

View File

@@ -14,10 +14,12 @@ import semmle.python.dataflow.new.RemoteFlowSources
import LdapInjectionCustomizations::LdapInjection
/**
* DEPRECATED: Use `LdapInjectionDnFlow` module instead.
*
* A taint-tracking configuration for detecting LDAP injection vulnerabilities
* via the distinguished name (DN) parameter of an LDAP search.
*/
class DnConfiguration extends TaintTracking::Configuration {
deprecated class DnConfiguration extends TaintTracking::Configuration {
DnConfiguration() { this = "LdapDnInjection" }
override predicate isSource(DataFlow::Node source) { source instanceof Source }
@@ -31,11 +33,24 @@ class DnConfiguration extends TaintTracking::Configuration {
}
}
private module LdapInjectionDnConfig implements DataFlow::ConfigSig {
predicate isSource(DataFlow::Node source) { source instanceof Source }
predicate isSink(DataFlow::Node sink) { sink instanceof DnSink }
predicate isBarrier(DataFlow::Node node) { node instanceof DnSanitizer }
}
/** Global taint-tracking for detecting "LDAP injection via the distinguished name (DN) parameter" vulnerabilities. */
module LdapInjectionDnFlow = TaintTracking::Global<LdapInjectionDnConfig>;
/**
* DEPRECATED: Use `LdapInjectionFilterFlow` module instead.
*
* A taint-tracking configuration for detecting LDAP injection vulnerabilities
* via the filter parameter of an LDAP search.
*/
class FilterConfiguration extends TaintTracking::Configuration {
deprecated class FilterConfiguration extends TaintTracking::Configuration {
FilterConfiguration() { this = "LdapFilterInjection" }
override predicate isSource(DataFlow::Node source) { source instanceof Source }
@@ -48,3 +63,19 @@ class FilterConfiguration extends TaintTracking::Configuration {
guard instanceof FilterSanitizerGuard
}
}
private module LdapInjectionFilterConfig implements DataFlow::ConfigSig {
predicate isSource(DataFlow::Node source) { source instanceof Source }
predicate isSink(DataFlow::Node sink) { sink instanceof FilterSink }
predicate isBarrier(DataFlow::Node node) { node instanceof FilterSanitizer }
}
/** Global taint-tracking for detecting "LDAP injection via the filter parameter" vulnerabilities. */
module LdapInjectionFilterFlow = TaintTracking::Global<LdapInjectionFilterConfig>;
/** Global taint-tracking for detecting "LDAP injection" vulnerabilities. */
module LdapInjectionFlow =
DataFlow::MergePathGraph<LdapInjectionDnFlow::PathNode, LdapInjectionFilterFlow::PathNode,
LdapInjectionDnFlow::PathGraph, LdapInjectionFilterFlow::PathGraph>;

View File

@@ -47,7 +47,35 @@ module LogInjection {
* A logging operation, considered as a flow sink.
*/
class LoggingAsSink extends Sink {
LoggingAsSink() { this = any(Logging write).getAnInput() }
LoggingAsSink() {
this = any(Logging write).getAnInput() and
// since the inner implementation of the `logging.Logger.warn` function is
// ```py
// class Logger:
// def warn(self, msg, *args, **kwargs):
// warnings.warn("The 'warn' method is deprecated, "
// "use 'warning' instead", DeprecationWarning, 2)
// self.warning(msg, *args, **kwargs)
// ```
// any time we would report flow to such a logging sink, we can ALSO report
// the flow to the `self.warning` sink -- obviously we don't want that.
//
// However, simply removing taint edges out of a sink is not a good enough solution,
// since we would only flag one of the `logging.info` calls in the following example
// due to use-use flow
// ```py
// logger.warn(user_controlled)
// logger.warn(user_controlled)
// ```
//
// The same approach is used in the command injection query.
not exists(Module loggingInit |
loggingInit.getName() = "logging.__init__" and
this.getScope().getEnclosingModule() = loggingInit and
// do allow this call if we're analyzing logging/__init__.py as part of CPython though
not exists(loggingInit.getFile().getRelativePath())
)
}
}
/**

View File

@@ -1,5 +1,5 @@
/**
* Provides a taint-tracking configuration for tracking untrusted user input used in log entries.
* Provides a taint-tracking configuration for tracking "log injection" vulnerabilities.
*
* Note, for performance reasons: only import this file if
* `LogInjection::Configuration` is needed, otherwise
@@ -12,9 +12,11 @@ import semmle.python.dataflow.new.TaintTracking
import LogInjectionCustomizations::LogInjection
/**
* DEPRECATED: Use `LogInjectionFlow` module instead.
*
* A taint-tracking configuration for tracking untrusted user input used in log entries.
*/
class Configuration extends TaintTracking::Configuration {
deprecated class Configuration extends TaintTracking::Configuration {
Configuration() { this = "LogInjection" }
override predicate isSource(DataFlow::Node source) { source instanceof Source }
@@ -27,3 +29,14 @@ class Configuration extends TaintTracking::Configuration {
guard instanceof SanitizerGuard
}
}
private module LogInjectionConfig implements DataFlow::ConfigSig {
predicate isSource(DataFlow::Node source) { source instanceof Source }
predicate isSink(DataFlow::Node sink) { sink instanceof Sink }
predicate isBarrier(DataFlow::Node node) { node instanceof Sanitizer }
}
/** Global taint-tracking for detecting "log injection" vulnerabilities. */
module LogInjectionFlow = TaintTracking::Global<LogInjectionConfig>;

View File

@@ -12,9 +12,11 @@ import semmle.python.dataflow.new.TaintTracking
import PamAuthorizationCustomizations::PamAuthorizationCustomizations
/**
* DEPRECATED: Use `PamAuthorizationFlow` module instead.
*
* A taint-tracking configuration for detecting "PAM Authorization" vulnerabilities.
*/
class Configuration extends TaintTracking::Configuration {
deprecated class Configuration extends TaintTracking::Configuration {
Configuration() { this = "PamAuthorization" }
override predicate isSource(DataFlow::Node node) { node instanceof Source }
@@ -37,3 +39,28 @@ class Configuration extends TaintTracking::Configuration {
exists(VulnPamAuthCall c | c.getArg(0) = node1 | node2 = c)
}
}
private module PamAuthorizationConfig implements DataFlow::ConfigSig {
predicate isSource(DataFlow::Node source) { source instanceof Source }
predicate isSink(DataFlow::Node sink) { sink instanceof Sink }
predicate isAdditionalFlowStep(DataFlow::Node node1, DataFlow::Node node2) {
// Models flow from a remotely supplied username field to a PAM `handle`.
// `retval = pam_start(service, username, byref(conv), byref(handle))`
exists(API::CallNode pamStart, DataFlow::Node handle, API::CallNode pointer |
pointer = API::moduleImport("ctypes").getMember(["pointer", "byref"]).getACall() and
pamStart = libPam().getMember("pam_start").getACall() and
pointer = pamStart.getArg(3) and
handle = pointer.getArg(0) and
pamStart.getArg(1) = node1 and
handle = node2
)
or
// Flow from handle to the authenticate call in the final step
exists(VulnPamAuthCall c | c.getArg(0) = node1 | node2 = c)
}
}
/** Global taint-tracking for detecting "PAM Authorization" vulnerabilities. */
module PamAuthorizationFlow = TaintTracking::Global<PamAuthorizationConfig>;

View File

@@ -13,6 +13,8 @@ import semmle.python.dataflow.new.TaintTracking
import PathInjectionCustomizations::PathInjection
/**
* DEPRECATED: Use `PathInjectionFlow` module instead.
*
* A taint-tracking configuration for detecting "path injection" vulnerabilities.
*
* This configuration uses two flow states, `NotNormalized` and `NormalizedUnchecked`,
@@ -25,7 +27,7 @@ import PathInjectionCustomizations::PathInjection
*
* Such checks are ineffective in the `NotNormalized` state.
*/
class Configuration extends TaintTracking::Configuration {
deprecated class Configuration extends TaintTracking::Configuration {
Configuration() { this = "PathInjection" }
override predicate isSource(DataFlow::Node source, DataFlow::FlowState state) {
@@ -74,3 +76,52 @@ class NotNormalized extends DataFlow::FlowState {
class NormalizedUnchecked extends DataFlow::FlowState {
NormalizedUnchecked() { this = "NormalizedUnchecked" }
}
/**
* This configuration uses two flow states, `NotNormalized` and `NormalizedUnchecked`,
* to track the requirement that a file path must be first normalized and then checked
* before it is safe to use.
*
* At sources, paths are assumed not normalized. At normalization points, they change
* state to `NormalizedUnchecked` after which they can be made safe by an appropriate
* check of the prefix.
*
* Such checks are ineffective in the `NotNormalized` state.
*/
module PathInjectionConfig implements DataFlow::StateConfigSig {
class FlowState = DataFlow::FlowState;
predicate isSource(DataFlow::Node source, FlowState state) {
source instanceof Source and state instanceof NotNormalized
}
predicate isSink(DataFlow::Node sink, FlowState state) {
sink instanceof Sink and
(
state instanceof NotNormalized or
state instanceof NormalizedUnchecked
)
}
predicate isBarrier(DataFlow::Node node) { node instanceof Sanitizer }
predicate isBarrier(DataFlow::Node node, FlowState state) {
// Block `NotNormalized` paths here, since they change state to `NormalizedUnchecked`
node instanceof Path::PathNormalization and
state instanceof NotNormalized
or
node instanceof Path::SafeAccessCheck and
state instanceof NormalizedUnchecked
}
predicate isAdditionalFlowStep(
DataFlow::Node nodeFrom, FlowState stateFrom, DataFlow::Node nodeTo, FlowState stateTo
) {
nodeFrom = nodeTo.(Path::PathNormalization).getPathArg() and
stateFrom instanceof NotNormalized and
stateTo instanceof NormalizedUnchecked
}
}
/** Global taint-tracking for detecting "path injection" vulnerabilities. */
module PathInjectionFlow = TaintTracking::GlobalWithState<PathInjectionConfig>;

View File

@@ -6,7 +6,6 @@
private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.DataFlow2
private import semmle.python.dataflow.new.TaintTracking
private import semmle.python.Concepts
private import semmle.python.dataflow.new.RemoteFlowSources

View File

@@ -12,9 +12,11 @@ import semmle.python.dataflow.new.TaintTracking
import PolynomialReDoSCustomizations::PolynomialReDoS
/**
* DEPRECATED: Use `PolynomialReDoSFlow` module instead.
*
* A taint-tracking configuration for detecting "polynomial regular expression denial of service (ReDoS)" vulnerabilities.
*/
class Configuration extends TaintTracking::Configuration {
deprecated class Configuration extends TaintTracking::Configuration {
Configuration() { this = "PolynomialReDoS" }
override predicate isSource(DataFlow::Node source) { source instanceof Source }
@@ -27,3 +29,14 @@ class Configuration extends TaintTracking::Configuration {
guard instanceof SanitizerGuard
}
}
private module PolynomialReDoSConfig implements DataFlow::ConfigSig {
predicate isSource(DataFlow::Node source) { source instanceof Source }
predicate isSink(DataFlow::Node sink) { sink instanceof Sink }
predicate isBarrier(DataFlow::Node node) { node instanceof Sanitizer }
}
/** Global taint-tracking for detecting "polynomial regular expression denial of service (ReDoS)" vulnerabilities. */
module PolynomialReDoSFlow = TaintTracking::Global<PolynomialReDoSConfig>;

View File

@@ -12,9 +12,11 @@ import semmle.python.dataflow.new.TaintTracking
import ReflectedXSSCustomizations::ReflectedXss
/**
* DEPRECATED: Use `ReflectedXssFlow` module instead.
*
* A taint-tracking configuration for detecting "reflected server-side cross-site scripting" vulnerabilities.
*/
class Configuration extends TaintTracking::Configuration {
deprecated class Configuration extends TaintTracking::Configuration {
Configuration() { this = "ReflectedXSS" }
override predicate isSource(DataFlow::Node source) { source instanceof Source }
@@ -27,3 +29,14 @@ class Configuration extends TaintTracking::Configuration {
guard instanceof SanitizerGuard
}
}
private module ReflectedXssConfig implements DataFlow::ConfigSig {
predicate isSource(DataFlow::Node source) { source instanceof Source }
predicate isSink(DataFlow::Node sink) { sink instanceof Sink }
predicate isBarrier(DataFlow::Node node) { node instanceof Sanitizer }
}
/** Global taint-tracking for detecting "reflected server-side cross-site scripting" vulnerabilities. */
module ReflectedXssFlow = TaintTracking::Global<ReflectedXssConfig>;

View File

@@ -1,5 +1,5 @@
/**
* Provides a taint-tracking configuration for detecting regular expression injection
* Provides a taint-tracking configuration for detecting "regular expression injection"
* vulnerabilities.
*
* Note, for performance reasons: only import this file if
@@ -13,9 +13,11 @@ import semmle.python.dataflow.new.TaintTracking
import RegexInjectionCustomizations::RegexInjection
/**
* DEPRECATED: Use `RegexInjectionFlow` module instead.
*
* A taint-tracking configuration for detecting "reflected server-side cross-site scripting" vulnerabilities.
*/
class Configuration extends TaintTracking::Configuration {
deprecated class Configuration extends TaintTracking::Configuration {
Configuration() { this = "RegexInjection" }
override predicate isSource(DataFlow::Node source) { source instanceof Source }
@@ -28,3 +30,14 @@ class Configuration extends TaintTracking::Configuration {
guard instanceof SanitizerGuard
}
}
private module RegexInjectionConfig implements DataFlow::ConfigSig {
predicate isSource(DataFlow::Node source) { source instanceof Source }
predicate isSink(DataFlow::Node sink) { sink instanceof Sink }
predicate isBarrier(DataFlow::Node node) { node instanceof Sanitizer }
}
/** Global taint-tracking for detecting "regular expression injection" vulnerabilities. */
module RegexInjectionFlow = TaintTracking::Global<RegexInjectionConfig>;

View File

@@ -13,6 +13,8 @@ import semmle.python.Concepts
import ServerSideRequestForgeryCustomizations::ServerSideRequestForgery
/**
* DEPRECATED: Use `FullServerSideRequestForgeryFlow` module instead.
*
* A taint-tracking configuration for detecting "Server-side request forgery" vulnerabilities.
*
* This configuration has a sanitizer to limit results to cases where attacker has full control of URL.
@@ -21,7 +23,7 @@ import ServerSideRequestForgeryCustomizations::ServerSideRequestForgery
* You should use the `fullyControlledRequest` to only select results where all
* URL parts are fully controlled.
*/
class FullServerSideRequestForgeryConfiguration extends TaintTracking::Configuration {
deprecated class FullServerSideRequestForgeryConfiguration extends TaintTracking::Configuration {
FullServerSideRequestForgeryConfiguration() { this = "FullServerSideRequestForgery" }
override predicate isSource(DataFlow::Node source) { source instanceof Source }
@@ -39,24 +41,51 @@ class FullServerSideRequestForgeryConfiguration extends TaintTracking::Configura
}
}
/**
* This configuration has a sanitizer to limit results to cases where attacker has full control of URL.
* See `PartialServerSideRequestForgery` for a variant without this requirement.
*
* You should use the `fullyControlledRequest` to only select results where all
* URL parts are fully controlled.
*/
private module FullServerSideRequestForgeryConfig implements DataFlow::ConfigSig {
predicate isSource(DataFlow::Node source) { source instanceof Source }
predicate isSink(DataFlow::Node sink) { sink instanceof Sink }
predicate isBarrier(DataFlow::Node node) {
node instanceof Sanitizer
or
node instanceof FullUrlControlSanitizer
}
}
/**
* Global taint-tracking for detecting "Full server-side request forgery" vulnerabilities.
*
* You should use the `fullyControlledRequest` to only select results where all
* URL parts are fully controlled.
*/
module FullServerSideRequestForgeryFlow = TaintTracking::Global<FullServerSideRequestForgeryConfig>;
/**
* Holds if all URL parts of `request` is fully user controlled.
*/
predicate fullyControlledRequest(Http::Client::Request request) {
exists(FullServerSideRequestForgeryConfiguration fullConfig |
forall(DataFlow::Node urlPart | urlPart = request.getAUrlPart() |
fullConfig.hasFlow(_, urlPart)
)
forall(DataFlow::Node urlPart | urlPart = request.getAUrlPart() |
FullServerSideRequestForgeryFlow::flow(_, urlPart)
)
}
/**
* DEPRECATED: Use `FullServerSideRequestForgeryFlow` module instead.
*
* A taint-tracking configuration for detecting "Server-side request forgery" vulnerabilities.
*
* This configuration has results, even when the attacker does not have full control over the URL.
* See `FullServerSideRequestForgeryConfiguration`, and the `fullyControlledRequest` predicate.
*/
class PartialServerSideRequestForgeryConfiguration extends TaintTracking::Configuration {
deprecated class PartialServerSideRequestForgeryConfiguration extends TaintTracking::Configuration {
PartialServerSideRequestForgeryConfiguration() { this = "PartialServerSideRequestForgery" }
override predicate isSource(DataFlow::Node source) { source instanceof Source }
@@ -69,3 +98,21 @@ class PartialServerSideRequestForgeryConfiguration extends TaintTracking::Config
guard instanceof SanitizerGuard
}
}
/**
* This configuration has results, even when the attacker does not have full control over the URL.
* See `FullServerSideRequestForgeryConfiguration`, and the `fullyControlledRequest` predicate.
*/
private module PartialServerSideRequestForgeryConfig implements DataFlow::ConfigSig {
predicate isSource(DataFlow::Node source) { source instanceof Source }
predicate isSink(DataFlow::Node sink) { sink instanceof Sink }
predicate isBarrier(DataFlow::Node node) { node instanceof Sanitizer }
}
/**
* Global taint-tracking for detecting "partial server-side request forgery" vulnerabilities.
*/
module PartialServerSideRequestForgeryFlow =
TaintTracking::Global<PartialServerSideRequestForgeryConfig>;

View File

@@ -9,7 +9,6 @@ private import semmle.python.dataflow.new.DataFlow
private import semmle.python.Concepts
private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.dataflow.new.BarrierGuards
private import semmle.python.frameworks.SqlAlchemy
/**
* Provides default sources, sinks and sanitizers for detecting

View File

@@ -12,9 +12,11 @@ import semmle.python.dataflow.new.TaintTracking
import SqlInjectionCustomizations::SqlInjection
/**
* DEPRECATED: Use `SqlInjectionFlow` module instead.
*
* A taint-tracking configuration for detecting "SQL injection" vulnerabilities.
*/
class Configuration extends TaintTracking::Configuration {
deprecated class Configuration extends TaintTracking::Configuration {
Configuration() { this = "SqlInjection" }
override predicate isSource(DataFlow::Node source) { source instanceof Source }
@@ -27,3 +29,14 @@ class Configuration extends TaintTracking::Configuration {
guard instanceof SanitizerGuard
}
}
private module SqlInjectionConfig implements DataFlow::ConfigSig {
predicate isSource(DataFlow::Node source) { source instanceof Source }
predicate isSink(DataFlow::Node sink) { sink instanceof Sink }
predicate isBarrier(DataFlow::Node node) { node instanceof Sanitizer }
}
/** Global taint-tracking for detecting "SQL injection" vulnerabilities. */
module SqlInjectionFlow = TaintTracking::Global<SqlInjectionConfig>;

View File

@@ -12,9 +12,11 @@ import semmle.python.dataflow.new.TaintTracking
import StackTraceExposureCustomizations::StackTraceExposure
/**
* DEPRECATED: Use `StackTraceExposureFlow` module instead.
*
* A taint-tracking configuration for detecting "stack trace exposure" vulnerabilities.
*/
class Configuration extends TaintTracking::Configuration {
deprecated class Configuration extends TaintTracking::Configuration {
Configuration() { this = "StackTraceExposure" }
override predicate isSource(DataFlow::Node source) { source instanceof Source }
@@ -36,3 +38,23 @@ class Configuration extends TaintTracking::Configuration {
)
}
}
private module StackTraceExposureConfig implements DataFlow::ConfigSig {
predicate isSource(DataFlow::Node source) { source instanceof Source }
predicate isSink(DataFlow::Node sink) { sink instanceof Sink }
predicate isBarrier(DataFlow::Node node) { node instanceof Sanitizer }
// A stack trace is accessible as the `__traceback__` attribute of a caught exception.
// see https://docs.python.org/3/reference/datamodel.html#traceback-objects
predicate isAdditionalFlowStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
exists(DataFlow::AttrRead attr | attr.getAttributeName() = "__traceback__" |
nodeFrom = attr.getObject() and
nodeTo = attr
)
}
}
/** Global taint-tracking for detecting "stack trace exposure" vulnerabilities. */
module StackTraceExposureFlow = TaintTracking::Global<StackTraceExposureConfig>;

View File

@@ -1,5 +1,5 @@
/**
* Provides a taint-tracking configuration for detecting "command injection" vulnerabilities.
* Provides a taint-tracking configuration for detecting "tar slip" vulnerabilities.
*
* Note, for performance reasons: only import this file if
* `TarSlip::Configuration` is needed, otherwise
@@ -12,9 +12,11 @@ import semmle.python.dataflow.new.TaintTracking
import TarSlipCustomizations::TarSlip
/**
* A taint-tracking configuration for detecting "command injection" vulnerabilities.
* DEPRECATED: Use `TarSlipFlow` module instead.
*
* A taint-tracking configuration for detecting "tar slip" vulnerabilities.
*/
class Configuration extends TaintTracking::Configuration {
deprecated class Configuration extends TaintTracking::Configuration {
Configuration() { this = "TarSlip" }
override predicate isSource(DataFlow::Node source) { source instanceof Source }
@@ -23,3 +25,14 @@ class Configuration extends TaintTracking::Configuration {
override predicate isSanitizer(DataFlow::Node node) { node instanceof Sanitizer }
}
private module TarSlipConfig implements DataFlow::ConfigSig {
predicate isSource(DataFlow::Node source) { source instanceof Source }
predicate isSink(DataFlow::Node sink) { sink instanceof Sink }
predicate isBarrier(DataFlow::Node node) { node instanceof Sanitizer }
}
/** Global taint-tracking for detecting "tar slip" vulnerabilities. */
module TarSlipFlow = TaintTracking::Global<TarSlipConfig>;

View File

@@ -12,9 +12,11 @@ import semmle.python.dataflow.new.TaintTracking
import UnsafeDeserializationCustomizations::UnsafeDeserialization
/**
* DEPRECATED: Use `UnsafeDeserializationFlow` module instead.
*
* A taint-tracking configuration for detecting "code execution from deserialization" vulnerabilities.
*/
class Configuration extends TaintTracking::Configuration {
deprecated class Configuration extends TaintTracking::Configuration {
Configuration() { this = "UnsafeDeserialization" }
override predicate isSource(DataFlow::Node source) { source instanceof Source }
@@ -27,3 +29,14 @@ class Configuration extends TaintTracking::Configuration {
guard instanceof SanitizerGuard
}
}
private module UnsafeDeserializationConfig implements DataFlow::ConfigSig {
predicate isSource(DataFlow::Node source) { source instanceof Source }
predicate isSink(DataFlow::Node sink) { sink instanceof Sink }
predicate isBarrier(DataFlow::Node node) { node instanceof Sanitizer }
}
/** Global taint-tracking for detecting "code execution from deserialization" vulnerabilities. */
module UnsafeDeserializationFlow = TaintTracking::Global<UnsafeDeserializationConfig>;

View File

@@ -9,6 +9,7 @@ private import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.TaintTracking
private import CommandInjectionCustomizations::CommandInjection as CommandInjection
private import semmle.python.Concepts as Concepts
private import semmle.python.ApiGraphs
/**
* Module containing sources, sinks, and sanitizers for shell command constructed from library input.
@@ -17,6 +18,9 @@ module UnsafeShellCommandConstruction {
/** A source for shell command constructed from library input vulnerabilities. */
abstract class Source extends DataFlow::Node { }
/** A sanitizer for shell command constructed from library input vulnerabilities. */
abstract class Sanitizer extends DataFlow::Node { }
private import semmle.python.frameworks.Setuptools
/** An input parameter to a gem seen as a source. */
@@ -156,4 +160,13 @@ module UnsafeShellCommandConstruction {
override DataFlow::Node getStringConstruction() { result = formatCall }
}
/**
* A call to `shlex.quote`, considered as a sanitizer.
*/
class ShlexQuoteAsSanitizer extends Sanitizer, DataFlow::Node {
ShlexQuoteAsSanitizer() {
this = API::moduleImport("shlex").getMember("quote").getACall().getArg(0)
}
}
}

View File

@@ -14,9 +14,11 @@ private import CommandInjectionCustomizations::CommandInjection as CommandInject
private import semmle.python.dataflow.new.BarrierGuards
/**
* DEPRECATED: Use `UnsafeShellCommandConstructionFlow` module instead.
*
* A taint-tracking configuration for detecting shell command constructed from library input vulnerabilities.
*/
class Configuration extends TaintTracking::Configuration {
deprecated class Configuration extends TaintTracking::Configuration {
Configuration() { this = "UnsafeShellCommandConstruction" }
override predicate isSource(DataFlow::Node source) { source instanceof Source }
@@ -24,7 +26,8 @@ class Configuration extends TaintTracking::Configuration {
override predicate isSink(DataFlow::Node sink) { sink instanceof Sink }
override predicate isSanitizer(DataFlow::Node node) {
node instanceof CommandInjection::Sanitizer // using all sanitizers from `rb/command-injection`
node instanceof Sanitizer or
node instanceof CommandInjection::Sanitizer // using all sanitizers from `py/command-injection`
}
// override to require the path doesn't have unmatched return steps
@@ -32,3 +35,23 @@ class Configuration extends TaintTracking::Configuration {
result instanceof DataFlow::FeatureHasSourceCallContext
}
}
/**
* A taint-tracking configuration for detecting "shell command constructed from library input" vulnerabilities.
*/
module UnsafeShellCommandConstructionConfig implements DataFlow::ConfigSig {
predicate isSource(DataFlow::Node source) { source instanceof Source }
predicate isSink(DataFlow::Node sink) { sink instanceof Sink }
predicate isBarrier(DataFlow::Node node) {
node instanceof CommandInjection::Sanitizer // using all sanitizers from `py/command-injection`
}
// override to require the path doesn't have unmatched return steps
DataFlow::FlowFeature getAFeature() { result instanceof DataFlow::FeatureHasSourceCallContext }
}
/** Global taint-tracking for detecting "shell command constructed from library input" vulnerabilities. */
module UnsafeShellCommandConstructionFlow =
TaintTracking::Global<UnsafeShellCommandConstructionConfig>;

View File

@@ -12,9 +12,11 @@ import semmle.python.dataflow.new.TaintTracking
import UrlRedirectCustomizations::UrlRedirect
/**
* DEPRECATED: Use `UrlRedirectFlow` module instead.
*
* A taint-tracking configuration for detecting "URL redirection" vulnerabilities.
*/
class Configuration extends TaintTracking::Configuration {
deprecated class Configuration extends TaintTracking::Configuration {
Configuration() { this = "UrlRedirect" }
override predicate isSource(DataFlow::Node source) { source instanceof Source }
@@ -27,3 +29,14 @@ class Configuration extends TaintTracking::Configuration {
guard instanceof SanitizerGuard
}
}
private module UrlRedirectConfig implements DataFlow::ConfigSig {
predicate isSource(DataFlow::Node source) { source instanceof Source }
predicate isSink(DataFlow::Node sink) { sink instanceof Sink }
predicate isBarrier(DataFlow::Node node) { node instanceof Sanitizer }
}
/** Global taint-tracking for detecting "URL redirection" vulnerabilities. */
module UrlRedirectFlow = TaintTracking::Global<UrlRedirectConfig>;

View File

@@ -24,10 +24,12 @@ module NormalHashFunction {
import WeakSensitiveDataHashingCustomizations::NormalHashFunction
/**
* DEPRECATED: Use `Flow` module instead.
*
* A taint-tracking configuration for detecting use of a broken or weak
* cryptographic hashing algorithm on sensitive data.
*/
class Configuration extends TaintTracking::Configuration {
deprecated class Configuration extends TaintTracking::Configuration {
Configuration() { this = "NormalHashFunction" }
override predicate isSource(DataFlow::Node source) { source instanceof Source }
@@ -44,6 +46,21 @@ module NormalHashFunction {
sensitiveDataExtraStepForCalls(node1, node2)
}
}
private module Config implements DataFlow::ConfigSig {
predicate isSource(DataFlow::Node source) { source instanceof Source }
predicate isSink(DataFlow::Node sink) { sink instanceof Sink }
predicate isBarrier(DataFlow::Node node) { node instanceof Sanitizer }
predicate isAdditionalFlowStep(DataFlow::Node node1, DataFlow::Node node2) {
sensitiveDataExtraStepForCalls(node1, node2)
}
}
/** Global taint-tracking for detecting "use of a broken or weak cryptographic hashing algorithm on sensitive data" vulnerabilities. */
module Flow = TaintTracking::Global<Config>;
}
/**
@@ -57,13 +74,15 @@ module ComputationallyExpensiveHashFunction {
import WeakSensitiveDataHashingCustomizations::ComputationallyExpensiveHashFunction
/**
* DEPRECATED: Use `Flow` module instead.
*
* A taint-tracking configuration for detecting use of a broken or weak
* cryptographic hashing algorithm on passwords.
*
* Passwords has stricter requirements on the hashing algorithm used (must be
* computationally expensive to prevent brute-force attacks).
*/
class Configuration extends TaintTracking::Configuration {
deprecated class Configuration extends TaintTracking::Configuration {
Configuration() { this = "ComputationallyExpensiveHashFunction" }
override predicate isSource(DataFlow::Node source) { source instanceof Source }
@@ -80,4 +99,49 @@ module ComputationallyExpensiveHashFunction {
sensitiveDataExtraStepForCalls(node1, node2)
}
}
/**
* Passwords has stricter requirements on the hashing algorithm used (must be
* computationally expensive to prevent brute-force attacks).
*/
private module Config implements DataFlow::ConfigSig {
predicate isSource(DataFlow::Node source) { source instanceof Source }
predicate isSink(DataFlow::Node sink) { sink instanceof Sink }
predicate isBarrier(DataFlow::Node node) { node instanceof Sanitizer }
predicate isAdditionalFlowStep(DataFlow::Node node1, DataFlow::Node node2) {
sensitiveDataExtraStepForCalls(node1, node2)
}
}
/** Global taint-tracking for detecting "use of a broken or weak cryptographic hashing algorithm on passwords" vulnerabilities. */
module Flow = TaintTracking::Global<Config>;
}
/**
* Global taint-tracking for detecting both variants of "use of a broken or weak
* cryptographic hashing algorithm on sensitive data" vulnerabilities.
*
* See convenience predicates `normalHashFunctionFlowPath` and
* `computationallyExpensiveHashFunctionFlowPath`.
*/
module WeakSensitiveDataHashingFlow =
DataFlow::MergePathGraph<NormalHashFunction::Flow::PathNode,
ComputationallyExpensiveHashFunction::Flow::PathNode, NormalHashFunction::Flow::PathGraph,
ComputationallyExpensiveHashFunction::Flow::PathGraph>;
/** Holds if data can flow from `source` to `sink` with `NormalHashFunction::Flow`. */
predicate normalHashFunctionFlowPath(
WeakSensitiveDataHashingFlow::PathNode source, WeakSensitiveDataHashingFlow::PathNode sink
) {
NormalHashFunction::Flow::flowPath(source.asPathNode1(), sink.asPathNode1())
}
/** Holds if data can flow from `source` to `sink` with `ComputationallyExpensiveHashFunction::Flow`. */
predicate computationallyExpensiveHashFunctionFlowPath(
WeakSensitiveDataHashingFlow::PathNode source, WeakSensitiveDataHashingFlow::PathNode sink
) {
ComputationallyExpensiveHashFunction::Flow::flowPath(source.asPathNode2(), sink.asPathNode2())
}

View File

@@ -12,9 +12,11 @@ import semmle.python.dataflow.new.TaintTracking
import XmlBombCustomizations::XmlBomb
/**
* DEPRECATED: Use `XmlBombFlow` module instead.
*
* A taint-tracking configuration for detecting "XML bomb" vulnerabilities.
*/
class Configuration extends TaintTracking::Configuration {
deprecated class Configuration extends TaintTracking::Configuration {
Configuration() { this = "XmlBomb" }
override predicate isSource(DataFlow::Node source) { source instanceof Source }
@@ -26,3 +28,14 @@ class Configuration extends TaintTracking::Configuration {
node instanceof Sanitizer
}
}
private module XmlBombConfig implements DataFlow::ConfigSig {
predicate isSource(DataFlow::Node source) { source instanceof Source }
predicate isSink(DataFlow::Node sink) { sink instanceof Sink }
predicate isBarrier(DataFlow::Node node) { node instanceof Sanitizer }
}
/** Global taint-tracking for detecting "XML bomb" vulnerabilities. */
module XmlBombFlow = TaintTracking::Global<XmlBombConfig>;

View File

@@ -12,9 +12,11 @@ import semmle.python.dataflow.new.TaintTracking
import XpathInjectionCustomizations::XpathInjection
/**
* DEPRECATED: Use `XpathInjectionFlow` module instead.
*
* A taint-tracking configuration for detecting "Xpath Injection" vulnerabilities.
*/
class Configuration extends TaintTracking::Configuration {
deprecated class Configuration extends TaintTracking::Configuration {
Configuration() { this = "Xpath Injection" }
override predicate isSource(DataFlow::Node source) { source instanceof Source }
@@ -27,3 +29,14 @@ class Configuration extends TaintTracking::Configuration {
guard instanceof SanitizerGuard
}
}
private module XpathInjectionConfig implements DataFlow::ConfigSig {
predicate isSource(DataFlow::Node source) { source instanceof Source }
predicate isSink(DataFlow::Node sink) { sink instanceof Sink }
predicate isBarrier(DataFlow::Node node) { node instanceof Sanitizer }
}
/** Global taint-tracking for detecting "Xpath Injection" vulnerabilities. */
module XpathInjectionFlow = TaintTracking::Global<XpathInjectionConfig>;

View File

@@ -12,9 +12,11 @@ import semmle.python.dataflow.new.TaintTracking
import XxeCustomizations::Xxe
/**
* DEPRECATED: Use `XxeFlow` module instead.
*
* A taint-tracking configuration for detecting "XML External Entity (XXE)" vulnerabilities.
*/
class Configuration extends TaintTracking::Configuration {
deprecated class Configuration extends TaintTracking::Configuration {
Configuration() { this = "Xxe" }
override predicate isSource(DataFlow::Node source) { source instanceof Source }
@@ -26,3 +28,14 @@ class Configuration extends TaintTracking::Configuration {
node instanceof Sanitizer
}
}
private module XxeConfig implements DataFlow::ConfigSig {
predicate isSource(DataFlow::Node source) { source instanceof Source }
predicate isSink(DataFlow::Node sink) { sink instanceof Sink }
predicate isBarrier(DataFlow::Node node) { node instanceof Sanitizer }
}
/** Global taint-tracking for detecting "XML External Entity (XXE)" vulnerabilities. */
module XxeFlow = TaintTracking::Global<XxeConfig>;