Merge branch 'main' of https://github.com/github/codeql into python/captured-variables-for-typetracking

This commit is contained in:
Rasmus Lerchedahl Petersen
2023-04-24 11:50:32 +02:00
3246 changed files with 275867 additions and 170048 deletions

View File

@@ -1,3 +1,43 @@
## 0.9.0
### Deprecated APIs
* The recently introduced new data flow and taint tracking APIs have had a
number of module and predicate renamings. The old APIs remain in place for
now.
### Minor Analysis Improvements
* Added modeling of SQL execution in the packages `sqlite3.dbapi2`, `cassandra-driver`, `aiosqlite`, and the functions `sqlite3.Connection.executescript`/`sqlite3.Cursor.executescript` and `asyncpg.connection.connect()`.
* Fixed module resolution so we allow imports of definitions that have had an attribute assigned to it, such as `class Foo; Foo.bar = 42`.
### Bug Fixes
* Fixed some accidental predicate visibility in the backwards-compatible wrapper for data flow configurations. In particular, `DataFlow::hasFlowPath`, `DataFlow::hasFlow`, `DataFlow::hasFlowTo`, and `DataFlow::hasFlowToExpr` were accidentally exposed in a single version.
## 0.8.3
No user-facing changes.
## 0.8.2
### New Features
* Added support for merging two `PathGraph`s via disjoint union to allow results from multiple data flow computations in a single `path-problem` query.
### Major Analysis Improvements
* The main data flow and taint tracking APIs have been changed. The old APIs
remain in place for now and translate to the new through a
backwards-compatible wrapper. If multiple configurations are in scope
simultaneously, then this may affect results slightly. The new API is quite
similar to the old, but makes use of a configuration module instead of a
configuration class.
### Minor Analysis Improvements
* Deleted the deprecated `getPath` and `getFolder` predicates from the `XmlFile` class.
## 0.8.1
### Major Analysis Improvements

View File

@@ -1,4 +0,0 @@
---
category: minorAnalysis
---
* Deleted the deprecated `getPath` and `getFolder` predicates from the `XmlFile` class.

View File

@@ -1,4 +0,0 @@
---
category: feature
---
* Added support for merging two `PathGraph`s via disjoint union to allow results from multiple data flow computations in a single `path-problem` query.

View File

@@ -1,9 +1,18 @@
---
category: majorAnalysis
---
## 0.8.2
### New Features
* Added support for merging two `PathGraph`s via disjoint union to allow results from multiple data flow computations in a single `path-problem` query.
### Major Analysis Improvements
* The main data flow and taint tracking APIs have been changed. The old APIs
remain in place for now and translate to the new through a
backwards-compatible wrapper. If multiple configurations are in scope
simultaneously, then this may affect results slightly. The new API is quite
similar to the old, but makes use of a configuration module instead of a
configuration class.
### Minor Analysis Improvements
* Deleted the deprecated `getPath` and `getFolder` predicates from the `XmlFile` class.

View File

@@ -0,0 +1,3 @@
## 0.8.3
No user-facing changes.

View File

@@ -0,0 +1,16 @@
## 0.9.0
### Deprecated APIs
* The recently introduced new data flow and taint tracking APIs have had a
number of module and predicate renamings. The old APIs remain in place for
now.
### Minor Analysis Improvements
* Added modeling of SQL execution in the packages `sqlite3.dbapi2`, `cassandra-driver`, `aiosqlite`, and the functions `sqlite3.Connection.executescript`/`sqlite3.Cursor.executescript` and `asyncpg.connection.connect()`.
* Fixed module resolution so we allow imports of definitions that have had an attribute assigned to it, such as `class Foo; Foo.bar = 42`.
### Bug Fixes
* Fixed some accidental predicate visibility in the backwards-compatible wrapper for data flow configurations. In particular, `DataFlow::hasFlowPath`, `DataFlow::hasFlow`, `DataFlow::hasFlowTo`, and `DataFlow::hasFlowToExpr` were accidentally exposed in a single version.

View File

@@ -1,2 +1,2 @@
---
lastReleaseVersion: 0.8.1
lastReleaseVersion: 0.9.0

View File

@@ -1,5 +1,5 @@
name: codeql/python-all
version: 0.8.2-dev
version: 0.9.1-dev
groups: python
dbscheme: semmlecode.python.dbscheme
extractor: python
@@ -8,5 +8,6 @@ upgrades: upgrades
dependencies:
codeql/regex: ${workspace}
codeql/tutorial: ${workspace}
codeql/util: ${workspace}
dataExtensions:
- semmle/python/frameworks/**/model.yml

View File

@@ -3,12 +3,14 @@
*/
// If you add modeling of a new framework/library, remember to add it to the docs in
// `docs/codeql/support/reusables/frameworks.rst`
// `docs/codeql/reusables/supported-frameworks.rst`
private import semmle.python.frameworks.Aioch
private import semmle.python.frameworks.Aiohttp
private import semmle.python.frameworks.Aiomysql
private import semmle.python.frameworks.Aiosqlite
private import semmle.python.frameworks.Aiopg
private import semmle.python.frameworks.Asyncpg
private import semmle.python.frameworks.CassandraDriver
private import semmle.python.frameworks.ClickhouseDriver
private import semmle.python.frameworks.Cryptodome
private import semmle.python.frameworks.Cryptography

View File

@@ -468,6 +468,8 @@ module Impl implements RegexTreeViewSig {
*/
class RegExpCharEscape = RegExpEscape;
private import codeql.util.Numbers as Numbers
/**
* An escaped regular expression term, that is, a regular expression
* term starting with a backslash, which is not a backreference.
@@ -528,42 +530,8 @@ module Impl implements RegexTreeViewSig {
* E.g. for `\u0061` this returns "a".
*/
private string getUnicode() {
exists(int codepoint | codepoint = sum(this.getHexValueFromUnicode(_)) |
result = codepoint.toUnicode()
)
result = Numbers::parseHexInt(this.getText().suffix(2)).toUnicode()
}
/**
* Gets int value for the `index`th char in the hex number of the unicode escape.
* E.g. for `\u0061` and `index = 2` this returns 96 (the number `6` interpreted as hex).
*/
private int getHexValueFromUnicode(int index) {
this.isUnicode() and
exists(string hex, string char | hex = this.getText().suffix(2) |
char = hex.charAt(index) and
result = 16.pow(hex.length() - index - 1) * toHex(char)
)
}
}
/**
* Gets the hex number for the `hex` char.
*/
private int toHex(string hex) {
hex = [0 .. 9].toString() and
result = hex.toInt()
or
result = 10 and hex = ["a", "A"]
or
result = 11 and hex = ["b", "B"]
or
result = 12 and hex = ["c", "C"]
or
result = 13 and hex = ["d", "D"]
or
result = 14 and hex = ["e", "E"]
or
result = 15 and hex = ["f", "F"]
}
/**

View File

@@ -1,10 +1,3 @@
/** Provides the `Unit` class. */
/** The unit type. */
private newtype TUnit = TMkUnit()
/** The trivial type with a single element. */
class Unit extends TUnit {
/** Gets a textual representation of this element. */
string toString() { result = "unit" }
}
import codeql.util.Unit

View File

@@ -2,7 +2,7 @@
* Provides an implementation of global (interprocedural) data flow. This file
* re-exports the local (intraprocedural) data flow analysis from
* `DataFlowImplSpecific::Public` and adds a global analysis, mainly exposed
* through the `Make` and `MakeWithState` modules.
* through the `Global` and `GlobalWithState` modules.
*/
private import DataFlowImplCommon
@@ -73,10 +73,10 @@ signature module ConfigSig {
*/
default FlowFeature getAFeature() { none() }
/** Holds if sources should be grouped in the result of `hasFlowPath`. */
/** Holds if sources should be grouped in the result of `flowPath`. */
default predicate sourceGrouping(Node source, string sourceGroup) { none() }
/** Holds if sinks should be grouped in the result of `hasFlowPath`. */
/** Holds if sinks should be grouped in the result of `flowPath`. */
default predicate sinkGrouping(Node sink, string sinkGroup) { none() }
/**
@@ -166,10 +166,10 @@ signature module StateConfigSig {
*/
default FlowFeature getAFeature() { none() }
/** Holds if sources should be grouped in the result of `hasFlowPath`. */
/** Holds if sources should be grouped in the result of `flowPath`. */
default predicate sourceGrouping(Node source, string sourceGroup) { none() }
/** Holds if sinks should be grouped in the result of `hasFlowPath`. */
/** Holds if sinks should be grouped in the result of `flowPath`. */
default predicate sinkGrouping(Node sink, string sinkGroup) { none() }
/**
@@ -182,15 +182,15 @@ signature module StateConfigSig {
}
/**
* Gets the exploration limit for `hasPartialFlow` and `hasPartialFlowRev`
* Gets the exploration limit for `partialFlow` and `partialFlowRev`
* measured in approximate number of interprocedural steps.
*/
signature int explorationLimitSig();
/**
* The output of a data flow computation.
* The output of a global data flow computation.
*/
signature module DataFlowSig {
signature module GlobalFlowSig {
/**
* A `Node` augmented with a call context (except for sinks) and an access path.
* Only those `PathNode`s that are reachable from a source, and which can reach a sink, are generated.
@@ -203,28 +203,28 @@ signature module DataFlowSig {
* The corresponding paths are generated from the end-points and the graph
* included in the module `PathGraph`.
*/
predicate hasFlowPath(PathNode source, PathNode sink);
predicate flowPath(PathNode source, PathNode sink);
/**
* Holds if data can flow from `source` to `sink`.
*/
predicate hasFlow(Node source, Node sink);
predicate flow(Node source, Node sink);
/**
* Holds if data can flow from some source to `sink`.
*/
predicate hasFlowTo(Node sink);
predicate flowTo(Node sink);
/**
* Holds if data can flow from some source to `sink`.
*/
predicate hasFlowToExpr(DataFlowExpr sink);
predicate flowToExpr(DataFlowExpr sink);
}
/**
* Constructs a standard data flow computation.
* Constructs a global data flow computation.
*/
module Make<ConfigSig Config> implements DataFlowSig {
module Global<ConfigSig Config> implements GlobalFlowSig {
private module C implements FullStateConfigSig {
import DefaultState<Config>
import Config
@@ -233,10 +233,15 @@ module Make<ConfigSig Config> implements DataFlowSig {
import Impl<C>
}
/** DEPRECATED: Use `Global` instead. */
deprecated module Make<ConfigSig Config> implements GlobalFlowSig {
import Global<Config>
}
/**
* Constructs a data flow computation using flow state.
* Constructs a global data flow computation using flow state.
*/
module MakeWithState<StateConfigSig Config> implements DataFlowSig {
module GlobalWithState<StateConfigSig Config> implements GlobalFlowSig {
private module C implements FullStateConfigSig {
import Config
}
@@ -244,6 +249,11 @@ module MakeWithState<StateConfigSig Config> implements DataFlowSig {
import Impl<C>
}
/** DEPRECATED: Use `GlobalWithState` instead. */
deprecated module MakeWithState<StateConfigSig Config> implements GlobalFlowSig {
import GlobalWithState<Config>
}
signature class PathNodeSig {
/** Gets a textual representation of this element. */
string toString();

View File

@@ -1614,3 +1614,13 @@ private module OutNodes {
* `kind`.
*/
OutNode getAnOutNode(DataFlowCall call, ReturnKind kind) { call = result.getCall(kind) }
/**
* Holds if flow from `call`'s argument `arg` to parameter `p` is permissible.
*
* This is a temporary hook to support technical debt in the Go language; do not use.
*/
pragma[inline]
predicate golangSpecificParamArgFilter(DataFlowCall call, ParameterNode p, ArgumentNode arg) {
any()
}

View File

@@ -8,6 +8,7 @@ private import DataFlowImplCommon
private import DataFlowImplSpecific::Private
private import DataFlowImplSpecific::Public
private import DataFlowImplCommonPublic
private import codeql.util.Unit
import DataFlow
/**
@@ -91,10 +92,10 @@ signature module FullStateConfigSig {
*/
FlowFeature getAFeature();
/** Holds if sources should be grouped in the result of `hasFlowPath`. */
/** Holds if sources should be grouped in the result of `flowPath`. */
predicate sourceGrouping(Node source, string sourceGroup);
/** Holds if sinks should be grouped in the result of `hasFlowPath`. */
/** Holds if sinks should be grouped in the result of `flowPath`. */
predicate sinkGrouping(Node sink, string sinkGroup);
/**
@@ -418,6 +419,10 @@ module Impl<FullStateConfigSig Config> {
)
}
private predicate sourceCallCtx(CallContext cc) {
if hasSourceCallCtx() then cc instanceof CallContextSomeCall else cc instanceof CallContextAny
}
private predicate hasSinkCallCtx() {
exists(FlowFeature feature | feature = Config::getAFeature() |
feature instanceof FeatureHasSinkCallContext or
@@ -441,11 +446,7 @@ module Impl<FullStateConfigSig Config> {
}
private module Stage1 implements StageSig {
class Ap extends int {
// workaround for bad functionality-induced joins (happens when using `Unit`)
pragma[nomagic]
Ap() { this in [0 .. 1] and this < 1 }
}
class Ap = Unit;
private class Cc = boolean;
@@ -1141,19 +1142,13 @@ module Impl<FullStateConfigSig Config> {
import Param
/* Begin: Stage logic. */
// use an alias as a workaround for bad functionality-induced joins
pragma[nomagic]
private predicate revFlowApAlias(NodeEx node, ApApprox apa) {
PrevStage::revFlowAp(node, apa)
}
pragma[nomagic]
private predicate flowIntoCallApa(
DataFlowCall call, ArgNodeEx arg, ParamNodeEx p, boolean allowsFieldFlow, ApApprox apa
) {
flowIntoCall(call, arg, p, allowsFieldFlow) and
PrevStage::revFlowAp(p, pragma[only_bind_into](apa)) and
revFlowApAlias(arg, pragma[only_bind_into](apa))
PrevStage::revFlowAp(arg, pragma[only_bind_into](apa))
}
pragma[nomagic]
@@ -1163,7 +1158,7 @@ module Impl<FullStateConfigSig Config> {
) {
flowOutOfCall(call, ret, kind, out, allowsFieldFlow) and
PrevStage::revFlowAp(out, pragma[only_bind_into](apa)) and
revFlowApAlias(ret, pragma[only_bind_into](apa))
PrevStage::revFlowAp(ret, pragma[only_bind_into](apa))
}
pragma[nomagic]
@@ -1691,16 +1686,6 @@ module Impl<FullStateConfigSig Config> {
pragma[nomagic]
predicate revFlowAp(NodeEx node, Ap ap) { revFlow(node, _, _, _, ap) }
// use an alias as a workaround for bad functionality-induced joins
pragma[nomagic]
additional predicate revFlowAlias(NodeEx node) { revFlow(node, _, _, _, _) }
// use an alias as a workaround for bad functionality-induced joins
pragma[nomagic]
additional predicate revFlowAlias(NodeEx node, FlowState state, Ap ap) {
revFlow(node, state, ap)
}
private predicate fwdConsCand(TypedContent tc, Ap ap) { storeStepFwd(_, ap, tc, _, _) }
private predicate revConsCand(TypedContent tc, Ap ap) { storeStepCand(_, ap, tc, _, _) }
@@ -1974,7 +1959,7 @@ module Impl<FullStateConfigSig Config> {
) {
flowOutOfCallNodeCand1(call, node1, kind, node2, allowsFieldFlow) and
Stage2::revFlow(node2) and
Stage2::revFlowAlias(node1)
Stage2::revFlow(node1)
}
pragma[nomagic]
@@ -1983,7 +1968,7 @@ module Impl<FullStateConfigSig Config> {
) {
flowIntoCallNodeCand1(call, node1, node2, allowsFieldFlow) and
Stage2::revFlow(node2) and
Stage2::revFlowAlias(node1)
Stage2::revFlow(node1)
}
private module LocalFlowBigStep {
@@ -2065,11 +2050,11 @@ module Impl<FullStateConfigSig Config> {
additionalLocalFlowStepNodeCand1(node1, node2) and
state1 = state2 and
Stage2::revFlow(node1, pragma[only_bind_into](state1), false) and
Stage2::revFlowAlias(node2, pragma[only_bind_into](state2), false)
Stage2::revFlow(node2, pragma[only_bind_into](state2), false)
or
additionalLocalStateStep(node1, state1, node2, state2) and
Stage2::revFlow(node1, state1, false) and
Stage2::revFlowAlias(node2, state2, false)
Stage2::revFlow(node2, state2, false)
}
/**
@@ -2262,7 +2247,7 @@ module Impl<FullStateConfigSig Config> {
) {
localFlowBigStep(node1, state1, node2, state2, preservesValue, ap.getType(), _) and
PrevStage::revFlow(node1, pragma[only_bind_into](state1), _) and
PrevStage::revFlowAlias(node2, pragma[only_bind_into](state2), _) and
PrevStage::revFlow(node2, pragma[only_bind_into](state2), _) and
exists(lcc)
}
@@ -2273,7 +2258,7 @@ module Impl<FullStateConfigSig Config> {
exists(FlowState state |
flowOutOfCallNodeCand2(call, node1, kind, node2, allowsFieldFlow) and
PrevStage::revFlow(node2, pragma[only_bind_into](state), _) and
PrevStage::revFlowAlias(node1, pragma[only_bind_into](state), _)
PrevStage::revFlow(node1, pragma[only_bind_into](state), _)
)
}
@@ -2284,7 +2269,7 @@ module Impl<FullStateConfigSig Config> {
exists(FlowState state |
flowIntoCallNodeCand2(call, node1, node2, allowsFieldFlow) and
PrevStage::revFlow(node2, pragma[only_bind_into](state), _) and
PrevStage::revFlowAlias(node1, pragma[only_bind_into](state), _)
PrevStage::revFlow(node1, pragma[only_bind_into](state), _)
)
}
@@ -2586,7 +2571,7 @@ module Impl<FullStateConfigSig Config> {
) {
localFlowBigStep(node1, state1, node2, state2, preservesValue, ap.getType(), lcc) and
PrevStage::revFlow(node1, pragma[only_bind_into](state1), _) and
PrevStage::revFlowAlias(node2, pragma[only_bind_into](state2), _)
PrevStage::revFlow(node2, pragma[only_bind_into](state2), _)
}
pragma[nomagic]
@@ -2596,7 +2581,7 @@ module Impl<FullStateConfigSig Config> {
exists(FlowState state |
flowOutOfCallNodeCand2(call, node1, kind, node2, allowsFieldFlow) and
PrevStage::revFlow(node2, pragma[only_bind_into](state), _) and
PrevStage::revFlowAlias(node1, pragma[only_bind_into](state), _)
PrevStage::revFlow(node1, pragma[only_bind_into](state), _)
)
}
@@ -2607,7 +2592,7 @@ module Impl<FullStateConfigSig Config> {
exists(FlowState state |
flowIntoCallNodeCand2(call, node1, node2, allowsFieldFlow) and
PrevStage::revFlow(node2, pragma[only_bind_into](state), _) and
PrevStage::revFlowAlias(node1, pragma[only_bind_into](state), _)
PrevStage::revFlow(node1, pragma[only_bind_into](state), _)
)
}
@@ -2804,11 +2789,7 @@ module Impl<FullStateConfigSig Config> {
// A PathNode is introduced by a source ...
Stage5::revFlow(node, state) and
sourceNode(node, state) and
(
if hasSourceCallCtx()
then cc instanceof CallContextSomeCall
else cc instanceof CallContextAny
) and
sourceCallCtx(cc) and
sc instanceof SummaryCtxNone and
ap = TAccessPathNil(node.getDataFlowType())
or
@@ -3214,11 +3195,7 @@ module Impl<FullStateConfigSig Config> {
override predicate isSource() {
sourceNode(node, state) and
(
if hasSourceCallCtx()
then cc instanceof CallContextSomeCall
else cc instanceof CallContextAny
) and
sourceCallCtx(cc) and
sc instanceof SummaryCtxNone and
ap = TAccessPathNil(node.getDataFlowType())
}
@@ -3653,7 +3630,7 @@ module Impl<FullStateConfigSig Config> {
* The corresponding paths are generated from the end-points and the graph
* included in the module `PathGraph`.
*/
predicate hasFlowPath(PathNode source, PathNode sink) {
predicate flowPath(PathNode source, PathNode sink) {
exists(PathNodeImpl flowsource, PathNodeImpl flowsink |
source = flowsource and sink = flowsink
|
@@ -3663,6 +3640,9 @@ module Impl<FullStateConfigSig Config> {
)
}
/** DEPRECATED: Use `flowPath` instead. */
deprecated predicate hasFlowPath = flowPath/2;
private predicate flowsTo(PathNodeImpl flowsource, PathNodeSink flowsink, Node source, Node sink) {
flowsource.isSource() and
flowsource.getNodeEx().asNode() = source and
@@ -3673,17 +3653,26 @@ module Impl<FullStateConfigSig Config> {
/**
* Holds if data can flow from `source` to `sink`.
*/
predicate hasFlow(Node source, Node sink) { flowsTo(_, _, source, sink) }
predicate flow(Node source, Node sink) { flowsTo(_, _, source, sink) }
/** DEPRECATED: Use `flow` instead. */
deprecated predicate hasFlow = flow/2;
/**
* Holds if data can flow from some source to `sink`.
*/
predicate hasFlowTo(Node sink) { sink = any(PathNodeSink n).getNodeEx().asNode() }
predicate flowTo(Node sink) { sink = any(PathNodeSink n).getNodeEx().asNode() }
/** DEPRECATED: Use `flowTo` instead. */
deprecated predicate hasFlowTo = flowTo/1;
/**
* Holds if data can flow from some source to `sink`.
*/
predicate hasFlowToExpr(DataFlowExpr sink) { hasFlowTo(exprNode(sink)) }
predicate flowToExpr(DataFlowExpr sink) { flowTo(exprNode(sink)) }
/** DEPRECATED: Use `flowToExpr` instead. */
deprecated predicate hasFlowToExpr = flowToExpr/1;
private predicate finalStats(
boolean fwd, int nodes, int fields, int conscand, int states, int tuples
@@ -4594,7 +4583,7 @@ module Impl<FullStateConfigSig Config> {
*
* To use this in a `path-problem` query, import the module `PartialPathGraph`.
*/
predicate hasPartialFlow(PartialPathNode source, PartialPathNode node, int dist) {
predicate partialFlow(PartialPathNode source, PartialPathNode node, int dist) {
partialFlow(source, node) and
dist = node.getSourceDistance()
}
@@ -4614,7 +4603,7 @@ module Impl<FullStateConfigSig Config> {
* Note that reverse flow has slightly lower precision than the corresponding
* forward flow, as reverse flow disregards type pruning among other features.
*/
predicate hasPartialFlowRev(PartialPathNode node, PartialPathNode sink, int dist) {
predicate partialFlowRev(PartialPathNode node, PartialPathNode sink, int dist) {
revPartialFlow(node, sink) and
dist = node.getSinkDistance()
}

View File

@@ -1,5 +1,5 @@
/**
* DEPRECATED: Use `Make` and `MakeWithState` instead.
* DEPRECATED: Use `Global` and `GlobalWithState` instead.
*
* Provides a `Configuration` class backwards-compatible interface to the data
* flow library.
@@ -11,6 +11,7 @@ import DataFlowImplSpecific::Public
private import DataFlowImpl
import DataFlowImplCommonPublic
import FlowStateString
private import codeql.util.Unit
/**
* A configuration of interprocedural data flow analysis. This defines
@@ -328,7 +329,6 @@ private module Config implements FullStateConfigSig {
}
private import Impl<Config> as I
import I
/**
* A `Node` augmented with a call context (except for sinks), an access path, and a configuration.
@@ -379,6 +379,8 @@ class PathNode instanceof I::PathNode {
final predicate isSinkGroup(string group) { super.isSinkGroup(group) }
}
module PathGraph = I::PathGraph;
private predicate hasFlow(Node source, Node sink, Configuration config) {
exists(PathNode source0, PathNode sink0 |
hasFlowPath(source0, sink0, config) and
@@ -388,7 +390,7 @@ private predicate hasFlow(Node source, Node sink, Configuration config) {
}
private predicate hasFlowPath(PathNode source, PathNode sink, Configuration config) {
hasFlowPath(source, sink) and source.getConfiguration() = config
I::flowPath(source, sink) and source.getConfiguration() = config
}
private predicate hasFlowTo(Node sink, Configuration config) { hasFlow(_, sink, config) }

View File

@@ -1,5 +1,5 @@
/**
* DEPRECATED: Use `Make` and `MakeWithState` instead.
* DEPRECATED: Use `Global` and `GlobalWithState` instead.
*
* Provides a `Configuration` class backwards-compatible interface to the data
* flow library.
@@ -11,6 +11,7 @@ import DataFlowImplSpecific::Public
private import DataFlowImpl
import DataFlowImplCommonPublic
import FlowStateString
private import codeql.util.Unit
/**
* A configuration of interprocedural data flow analysis. This defines
@@ -328,7 +329,6 @@ private module Config implements FullStateConfigSig {
}
private import Impl<Config> as I
import I
/**
* A `Node` augmented with a call context (except for sinks), an access path, and a configuration.
@@ -379,6 +379,8 @@ class PathNode instanceof I::PathNode {
final predicate isSinkGroup(string group) { super.isSinkGroup(group) }
}
module PathGraph = I::PathGraph;
private predicate hasFlow(Node source, Node sink, Configuration config) {
exists(PathNode source0, PathNode sink0 |
hasFlowPath(source0, sink0, config) and
@@ -388,7 +390,7 @@ private predicate hasFlow(Node source, Node sink, Configuration config) {
}
private predicate hasFlowPath(PathNode source, PathNode sink, Configuration config) {
hasFlowPath(source, sink) and source.getConfiguration() = config
I::flowPath(source, sink) and source.getConfiguration() = config
}
private predicate hasFlowTo(Node sink, Configuration config) { hasFlow(_, sink, config) }

View File

@@ -1,5 +1,5 @@
/**
* DEPRECATED: Use `Make` and `MakeWithState` instead.
* DEPRECATED: Use `Global` and `GlobalWithState` instead.
*
* Provides a `Configuration` class backwards-compatible interface to the data
* flow library.
@@ -11,6 +11,7 @@ import DataFlowImplSpecific::Public
private import DataFlowImpl
import DataFlowImplCommonPublic
import FlowStateString
private import codeql.util.Unit
/**
* A configuration of interprocedural data flow analysis. This defines
@@ -328,7 +329,6 @@ private module Config implements FullStateConfigSig {
}
private import Impl<Config> as I
import I
/**
* A `Node` augmented with a call context (except for sinks), an access path, and a configuration.
@@ -379,6 +379,8 @@ class PathNode instanceof I::PathNode {
final predicate isSinkGroup(string group) { super.isSinkGroup(group) }
}
module PathGraph = I::PathGraph;
private predicate hasFlow(Node source, Node sink, Configuration config) {
exists(PathNode source0, PathNode sink0 |
hasFlowPath(source0, sink0, config) and
@@ -388,7 +390,7 @@ private predicate hasFlow(Node source, Node sink, Configuration config) {
}
private predicate hasFlowPath(PathNode source, PathNode sink, Configuration config) {
hasFlowPath(source, sink) and source.getConfiguration() = config
I::flowPath(source, sink) and source.getConfiguration() = config
}
private predicate hasFlowTo(Node sink, Configuration config) { hasFlow(_, sink, config) }

View File

@@ -1,5 +1,5 @@
/**
* DEPRECATED: Use `Make` and `MakeWithState` instead.
* DEPRECATED: Use `Global` and `GlobalWithState` instead.
*
* Provides a `Configuration` class backwards-compatible interface to the data
* flow library.
@@ -11,6 +11,7 @@ import DataFlowImplSpecific::Public
private import DataFlowImpl
import DataFlowImplCommonPublic
import FlowStateString
private import codeql.util.Unit
/**
* A configuration of interprocedural data flow analysis. This defines
@@ -328,7 +329,6 @@ private module Config implements FullStateConfigSig {
}
private import Impl<Config> as I
import I
/**
* A `Node` augmented with a call context (except for sinks), an access path, and a configuration.
@@ -379,6 +379,8 @@ class PathNode instanceof I::PathNode {
final predicate isSinkGroup(string group) { super.isSinkGroup(group) }
}
module PathGraph = I::PathGraph;
private predicate hasFlow(Node source, Node sink, Configuration config) {
exists(PathNode source0, PathNode sink0 |
hasFlowPath(source0, sink0, config) and
@@ -388,7 +390,7 @@ private predicate hasFlow(Node source, Node sink, Configuration config) {
}
private predicate hasFlowPath(PathNode source, PathNode sink, Configuration config) {
hasFlowPath(source, sink) and source.getConfiguration() = config
I::flowPath(source, sink) and source.getConfiguration() = config
}
private predicate hasFlowTo(Node sink, Configuration config) { hasFlow(_, sink, config) }

View File

@@ -140,10 +140,8 @@ private module LambdaFlow {
}
pragma[nomagic]
private TReturnPositionSimple viableReturnPosLambda(
DataFlowCall call, DataFlowCallOption lastCall, ReturnKind kind
) {
result = TReturnPositionSimple0(viableCallableLambda(call, lastCall), kind)
private TReturnPositionSimple viableReturnPosLambda(DataFlowCall call, ReturnKind kind) {
result = TReturnPositionSimple0(viableCallableLambda(call, _), kind)
}
private predicate viableReturnPosOutNonLambda(
@@ -155,11 +153,12 @@ private module LambdaFlow {
)
}
pragma[nomagic]
private predicate viableReturnPosOutLambda(
DataFlowCall call, DataFlowCallOption lastCall, TReturnPositionSimple pos, OutNode out
DataFlowCall call, TReturnPositionSimple pos, OutNode out
) {
exists(ReturnKind kind |
pos = viableReturnPosLambda(call, lastCall, kind) and
pos = viableReturnPosLambda(call, kind) and
out = getAnOutNode(call, kind)
)
}
@@ -188,6 +187,7 @@ private module LambdaFlow {
else any()
}
pragma[assume_small_delta]
pragma[nomagic]
predicate revLambdaFlow0(
DataFlowCall lambdaCall, LambdaCallKind kind, Node node, DataFlowType t, boolean toReturn,
@@ -274,6 +274,7 @@ private module LambdaFlow {
)
}
pragma[assume_small_delta]
pragma[nomagic]
predicate revLambdaFlowOut(
DataFlowCall lambdaCall, LambdaCallKind kind, TReturnPositionSimple pos, DataFlowType t,
@@ -285,7 +286,7 @@ private module LambdaFlow {
or
// non-linear recursion
revLambdaFlowOutLambdaCall(lambdaCall, kind, out, t, toJump, call, lastCall) and
viableReturnPosOutLambda(call, _, pos, out)
viableReturnPosOutLambda(call, pos, out)
)
}
@@ -424,7 +425,8 @@ private module Cached {
exists(ParameterPosition ppos |
viableParam(call, ppos, p) and
argumentPositionMatch(call, arg, ppos) and
compatibleTypes(getNodeDataFlowType(arg), getNodeDataFlowType(p))
compatibleTypes(getNodeDataFlowType(arg), getNodeDataFlowType(p)) and
golangSpecificParamArgFilter(call, p, arg)
)
}

View File

@@ -1,5 +1,5 @@
/**
* DEPRECATED: Use `Make` and `MakeWithState` instead.
* DEPRECATED: Use `Global` and `GlobalWithState` instead.
*
* Provides a `Configuration` class backwards-compatible interface to the data
* flow library.
@@ -11,6 +11,7 @@ import DataFlowImplSpecific::Public
private import DataFlowImpl
import DataFlowImplCommonPublic
import FlowStateString
private import codeql.util.Unit
/**
* A configuration of interprocedural data flow analysis. This defines
@@ -328,7 +329,6 @@ private module Config implements FullStateConfigSig {
}
private import Impl<Config> as I
import I
/**
* A `Node` augmented with a call context (except for sinks), an access path, and a configuration.
@@ -379,6 +379,8 @@ class PathNode instanceof I::PathNode {
final predicate isSinkGroup(string group) { super.isSinkGroup(group) }
}
module PathGraph = I::PathGraph;
private predicate hasFlow(Node source, Node sink, Configuration config) {
exists(PathNode source0, PathNode sink0 |
hasFlowPath(source0, sink0, config) and
@@ -388,7 +390,7 @@ private predicate hasFlow(Node source, Node sink, Configuration config) {
}
private predicate hasFlowPath(PathNode source, PathNode sink, Configuration config) {
hasFlowPath(source, sink) and source.getConfiguration() = config
I::flowPath(source, sink) and source.getConfiguration() = config
}
private predicate hasFlowTo(Node sink, Configuration config) { hasFlow(_, sink, config) }

View File

@@ -7,9 +7,6 @@ private import python as Python
module Private {
import DataFlowPrivate
// import DataFlowDispatch
class Unit = Python::Unit;
}
module Public {

View File

@@ -10,6 +10,7 @@ private import FlowSummaryImplSpecific
private import DataFlowImplSpecific::Private
private import DataFlowImplSpecific::Public
private import DataFlowImplCommon
private import codeql.util.Unit
/** Provides classes and predicates for defining flow summaries. */
module Public {
@@ -109,6 +110,7 @@ module Public {
}
/** Gets the stack obtained by dropping the first `i` elements, if any. */
pragma[assume_small_delta]
SummaryComponentStack drop(int i) {
i = 0 and result = this
or
@@ -213,6 +215,54 @@ module Public {
abstract predicate required(SummaryComponent head, SummaryComponentStack tail);
}
/**
* Gets the valid model origin values.
*/
private string getValidModelOrigin() {
result =
[
"ai", // AI (machine learning)
"df", // Dataflow (model generator)
"tb", // Type based (model generator)
"hq", // Heuristic query
]
}
/**
* A class used to represent provenance values for MaD models.
*
* The provenance value is a string of the form `origin-verification`
* (or just `manual`), where `origin` is a value indicating the
* origin of the model, and `verification` is a value indicating, how
* the model was verified.
*
* Examples could be:
* - `df-generated`: A model produced by the model generator, but not verified by a human.
* - `ai-manual`: A model produced by AI, but verified by a human.
*/
class Provenance extends string {
private string verification;
Provenance() {
exists(string origin | origin = getValidModelOrigin() |
this = origin + "-" + verification and
verification = ["manual", "generated"]
)
or
this = verification and verification = "manual"
}
/**
* Holds if this is a valid generated provenance value.
*/
predicate isGenerated() { verification = "generated" }
/**
* Holds if this is a valid manual provenance value.
*/
predicate isManual() { verification = "manual" }
}
/** A callable with a flow summary. */
abstract class SummarizedCallable extends SummarizedCallableBase {
bindingset[this]
@@ -246,41 +296,61 @@ module Public {
}
/**
* Holds if all the summaries that apply to `this` are auto generated and not manually created.
* Holds if there exists a generated summary that applies to this callable.
*/
final predicate isAutoGenerated() {
this.hasProvenance(["generated", "ai-generated"]) and not this.isManual()
final predicate hasGeneratedModel() {
exists(Provenance p | p.isGenerated() and this.hasProvenance(p))
}
/**
* Holds if there exists a manual summary that applies to `this`.
* Holds if all the summaries that apply to this callable are auto generated and not manually created.
* That is, only apply generated models, when there are no manual models.
*/
final predicate isManual() { this.hasProvenance("manual") }
final predicate applyGeneratedModel() {
this.hasGeneratedModel() and
not this.hasManualModel()
}
/**
* Holds if there exists a summary that applies to `this` that has provenance `provenance`.
* Holds if there exists a manual summary that applies to this callable.
*/
predicate hasProvenance(string provenance) { none() }
final predicate hasManualModel() {
exists(Provenance p | p.isManual() and this.hasProvenance(p))
}
/**
* Holds if there exists a manual summary that applies to this callable.
* Always apply manual models if they exist.
*/
final predicate applyManualModel() { this.hasManualModel() }
/**
* Holds if there exists a summary that applies to this callable
* that has provenance `provenance`.
*/
predicate hasProvenance(Provenance provenance) { provenance = "manual" }
}
/** A callable where there is no flow via the callable. */
class NeutralCallable extends SummarizedCallableBase {
NeutralCallable() { neutralElement(this, _) }
private Provenance provenance;
NeutralCallable() { neutralElement(this, provenance) }
/**
* Holds if the neutral is auto generated.
*/
predicate isAutoGenerated() { neutralElement(this, ["generated", "ai-generated"]) }
final predicate hasGeneratedModel() { provenance.isGenerated() }
/**
* Holds if there exists a manual neutral that applies to `this`.
* Holds if there exists a manual neutral that applies to this callable.
*/
final predicate isManual() { this.hasProvenance("manual") }
final predicate hasManualModel() { provenance.isManual() }
/**
* Holds if the neutral has provenance `provenance`.
* Holds if the neutral has provenance `p`.
*/
predicate hasProvenance(string provenance) { neutralElement(this, provenance) }
predicate hasProvenance(Provenance p) { p = provenance }
}
}
@@ -1015,12 +1085,18 @@ module Private {
private predicate relevantSummaryElementGenerated(
AccessPath inSpec, AccessPath outSpec, string kind
) {
summaryElement(this, inSpec, outSpec, kind, ["generated", "ai-generated"]) and
not summaryElement(this, _, _, _, "manual")
exists(Provenance provenance |
provenance.isGenerated() and
summaryElement(this, inSpec, outSpec, kind, provenance)
) and
not this.applyManualModel()
}
private predicate relevantSummaryElement(AccessPath inSpec, AccessPath outSpec, string kind) {
summaryElement(this, inSpec, outSpec, kind, "manual")
exists(Provenance provenance |
provenance.isManual() and
summaryElement(this, inSpec, outSpec, kind, provenance)
)
or
this.relevantSummaryElementGenerated(inSpec, outSpec, kind)
}
@@ -1039,7 +1115,7 @@ module Private {
)
}
override predicate hasProvenance(string provenance) {
override predicate hasProvenance(Provenance provenance) {
summaryElement(this, _, _, _, provenance)
}
}
@@ -1050,6 +1126,20 @@ module Private {
not exists(interpretComponent(c))
}
/** Holds if `provenance` is not a valid provenance value. */
bindingset[provenance]
predicate invalidProvenance(string provenance) { not provenance instanceof Provenance }
/**
* Holds if token `part` of specification `spec` has an invalid index.
* E.g., `Argument[-1]`.
*/
predicate invalidIndexComponent(AccessPath spec, AccessPathToken part) {
part = spec.getToken(_) and
part.getName() = ["Parameter", "Argument"] and
AccessPath::parseInt(part.getArgumentList()) < 0
}
private predicate inputNeedsReference(AccessPathToken c) {
c.getName() = "Argument" or
inputNeedsReferenceSpecific(c)
@@ -1207,11 +1297,11 @@ module Private {
}
private string renderProvenance(SummarizedCallable c) {
if c.isManual() then result = "manual" else c.hasProvenance(result)
if c.applyManualModel() then result = "manual" else c.hasProvenance(result)
}
private string renderProvenanceNeutral(NeutralCallable c) {
if c.isManual() then result = "manual" else c.hasProvenance(result)
if c.hasManualModel() then result = "manual" else c.hasProvenance(result)
}
/**

View File

@@ -65,31 +65,75 @@ private import semmle.python.dataflow.new.internal.DataFlowPrivate
*/
module ImportResolution {
/**
* Holds if the module `m` defines a name `name` by assigning `defn` to it. This is an
* overapproximation, as `name` may not in fact be exported (e.g. by defining an `__all__` that does
* not include `name`).
* Holds if there is an ESSA step from `defFrom` to `defTo`, which should be allowed
* for import resolution.
*/
private predicate allowedEssaImportStep(EssaDefinition defFrom, EssaDefinition defTo) {
// to handle definitions guarded by if-then-else
defFrom = defTo.(PhiFunction).getAnInput()
or
// refined variable
// example: https://github.com/nvbn/thefuck/blob/ceeaeab94b5df5a4fe9d94d61e4f6b0bbea96378/thefuck/utils.py#L25-L45
defFrom = defTo.(EssaNodeRefinement).getInput().getDefinition()
}
/**
* Holds if the module `m` defines a name `name` with the value `val`. The value
* represents the value `name` will have at the end of the module (the last place we
* have def-use flow to).
*
* Note: The handling of re-exporting imports is a bit simplistic. We assume that if
* an import is made, it will be re-exported (which will not be the case if a new
* value is assigned to the name, or it is deleted).
*/
pragma[nomagic]
predicate module_export(Module m, string name, DataFlow::CfgNode defn) {
exists(EssaVariable v, EssaDefinition essaDef |
v.getName() = name and
v.getAUse() = ImportStar::getStarImported*(m).getANormalExit() and
(
essaDef = v.getDefinition()
or
// to handle definitions guarded by if-then-else
essaDef = v.getDefinition().(PhiFunction).getAnInput()
)
predicate module_export(Module m, string name, DataFlow::Node val) {
// Definitions made inside `m` itself
//
// for code such as `foo = ...; foo.bar = ...` there will be TWO
// EssaDefinition/EssaVariable. One for `foo = ...` (AssignmentDefinition) and one
// for `foo.bar = ...`. The one for `foo.bar = ...` (EssaNodeRefinement). The
// EssaNodeRefinement is the one that will reach the end of the module (normal
// exit).
//
// However, we cannot just use the EssaNodeRefinement as the `val`, because the
// normal data-flow depends on use-use flow, and use-use flow targets CFG nodes not
// EssaNodes. So we need to go back from the EssaDefinition/EssaVariable that
// reaches the end of the module, to the first definition of the variable, and then
// track forwards using use-use flow to find a suitable CFG node that has flow into
// it from use-use flow.
exists(EssaVariable lastUseVar, EssaVariable firstDef |
lastUseVar.getName() = name and
// we ignore special variable $ introduced by our analysis (not used for anything)
// we ignore special variable * introduced by `from <pkg> import *` -- TODO: understand why we even have this?
not name in ["$", "*"] and
lastUseVar.getAUse() = m.getANormalExit() and
allowedEssaImportStep*(firstDef, lastUseVar) and
not allowedEssaImportStep(_, firstDef)
|
defn.getNode() = essaDef.(AssignmentDefinition).getValue()
not EssaFlow::defToFirstUse(firstDef, _) and
val.asVar() = firstDef
or
defn.getNode() = essaDef.(ArgumentRefinement).getArgument()
exists(ControlFlowNode mid, ControlFlowNode end |
EssaFlow::defToFirstUse(firstDef, mid) and
EssaFlow::useToNextUse*(mid, end) and
not EssaFlow::useToNextUse(end, _) and
val.asCfgNode() = end
)
)
or
// re-exports from `from <pkg> import *`
exists(Module importedFrom |
importedFrom = ImportStar::getStarImported(m) and
module_export(importedFrom, name, val) and
potential_module_export(importedFrom, name)
)
or
// re-exports from `import <pkg>` or `from <pkg> import <stuff>`
exists(Alias a |
defn.asExpr() = [a.getValue(), a.getValue().(ImportMember).getModule()] and
val.asExpr() = a.getValue() and
a.getAsname().(Name).getId() = name and
defn.getScope() = m
val.getScope() = m
)
}
@@ -263,9 +307,21 @@ module ImportResolution {
module_reexport(reexporter, attr_name, m)
)
or
// Submodules that are implicitly defined with relative imports of the form `from .foo import ...`.
// In practice, we create a definition for each module in a package, even if it is not imported.
// submodules of packages will be available as `<pkg>.<submodule>` after doing
// `import <pkg>.<submodule>` at least once in the program, or can be directly
// imported with `from <pkg> import <submodule>` (even with an empty
// `<pkg>.__init__` file).
//
// Until an import of `<pkg>.<submodule>` is executed, it is technically possible
// that `<pkg>.<submodule>` (or `from <pkg> import <submodule>`) can refer to an
// attribute set in `<pkg>.__init__`.
//
// Therefore, if there is an attribute defined in `<pkg>.__init__` with the same
// name as a submodule, we always consider that this attribute _could_ be a
// reference to the submodule, even if we don't know that the submodule has been
// imported yet.
exists(string submodule, Module package |
submodule = result.asVar().getName() and
SsaSource::init_module_submodule_defn(result.asVar().getSourceVariable(),
package.getEntryNode()) and
m = getModuleFromName(package.getPackageName() + "." + submodule)

View File

@@ -33,9 +33,9 @@ private module AddTaintDefaults<DataFlowInternal::FullStateConfigSig Config> imp
}
/**
* Constructs a standard taint tracking computation.
* Constructs a global taint tracking computation.
*/
module Make<DataFlow::ConfigSig Config> implements DataFlow::DataFlowSig {
module Global<DataFlow::ConfigSig Config> implements DataFlow::GlobalFlowSig {
private module Config0 implements DataFlowInternal::FullStateConfigSig {
import DataFlowInternal::DefaultState<Config>
import Config
@@ -48,10 +48,15 @@ module Make<DataFlow::ConfigSig Config> implements DataFlow::DataFlowSig {
import DataFlowInternal::Impl<C>
}
/** DEPRECATED: Use `Global` instead. */
deprecated module Make<DataFlow::ConfigSig Config> implements DataFlow::GlobalFlowSig {
import Global<Config>
}
/**
* Constructs a taint tracking computation using flow state.
* Constructs a global taint tracking computation using flow state.
*/
module MakeWithState<DataFlow::StateConfigSig Config> implements DataFlow::DataFlowSig {
module GlobalWithState<DataFlow::StateConfigSig Config> implements DataFlow::GlobalFlowSig {
private module Config0 implements DataFlowInternal::FullStateConfigSig {
import Config
}
@@ -62,3 +67,8 @@ module MakeWithState<DataFlow::StateConfigSig Config> implements DataFlow::DataF
import DataFlowInternal::Impl<C>
}
/** DEPRECATED: Use `GlobalWithState` instead. */
deprecated module MakeWithState<DataFlow::StateConfigSig Config> implements DataFlow::GlobalFlowSig {
import GlobalWithState<Config>
}

View File

@@ -9,11 +9,10 @@ private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.Concepts
private import semmle.python.ApiGraphs
private import semmle.python.frameworks.PEP249
/** Provides models for the `aiomysql` PyPI package. */
private module Aiomysql {
private import semmle.python.internal.Awaited
/**
* Gets a `ConnectionPool` that is created when the result of `aiomysql.create_pool()` is awaited.
* See https://aiomysql.readthedocs.io/en/stable/pool.html
@@ -23,49 +22,29 @@ private module Aiomysql {
}
/**
* Gets a `Connection` that is created when
* A Connection that is created when
* - the result of `aiomysql.connect()` is awaited.
* - the result of calling `acquire` on a `ConnectionPool` is awaited.
* See https://aiomysql.readthedocs.io/en/stable/connection.html#connection
* See
* - https://aiomysql.readthedocs.io/en/stable/connection.html#connection
* - https://aiomysql.readthedocs.io/en/stable/pool.html#Pool.acquire
*/
API::Node connection() {
result = API::moduleImport("aiomysql").getMember("connect").getReturn().getAwaited()
or
result = connectionPool().getMember("acquire").getReturn().getAwaited()
class AiomysqlConnection extends PEP249::AsyncDatabaseConnection {
AiomysqlConnection() {
this = API::moduleImport("aiomysql").getMember("connect").getReturn().getAwaited()
or
this = connectionPool().getMember("acquire").getReturn().getAwaited()
}
}
/**
* Gets a `Cursor` that is created when
* An additional cursor, that is created when
* - the result of calling `cursor` on a `ConnectionPool` is awaited.
* - the result of calling `cursor` on a `Connection` is awaited.
* See https://aiomysql.readthedocs.io/en/stable/cursors.html
* See
* - https://aiomysql.readthedocs.io/en/stable/pool.html##Pool.cursor
*/
API::Node cursor() {
result = connectionPool().getMember("cursor").getReturn().getAwaited()
or
result = connection().getMember("cursor").getReturn().getAwaited()
}
/**
* A query. Calling `execute` on a `Cursor` constructs a query.
* See https://aiomysql.readthedocs.io/en/stable/cursors.html#Cursor.execute
*/
class CursorExecuteCall extends SqlConstruction::Range, API::CallNode {
CursorExecuteCall() { this = cursor().getMember("execute").getACall() }
override DataFlow::Node getSql() { result = this.getParameter(0, "operation").asSink() }
}
/**
* An awaited query. Awaiting the result of calling `execute` executes the query.
* See https://aiomysql.readthedocs.io/en/stable/cursors.html#Cursor.execute
*/
class AwaitedCursorExecuteCall extends SqlExecution::Range {
CursorExecuteCall executeCall;
AwaitedCursorExecuteCall() { this = executeCall.getReturn().getAwaited().asSource() }
override DataFlow::Node getSql() { result = executeCall.getSql() }
class AiomysqlCursor extends PEP249::AsyncDatabaseCursor {
AiomysqlCursor() { this = connectionPool().getMember("cursor").getReturn().getAwaited() }
}
/**

View File

@@ -9,11 +9,10 @@ private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.Concepts
private import semmle.python.ApiGraphs
private import semmle.python.frameworks.PEP249
/** Provides models for the `aiopg` PyPI package. */
private module Aiopg {
private import semmle.python.internal.Awaited
/**
* Gets a `ConnectionPool` that is created when the result of `aiopg.create_pool()` is awaited.
* See https://aiopg.readthedocs.io/en/stable/core.html#pool
@@ -23,49 +22,29 @@ private module Aiopg {
}
/**
* Gets a `Connection` that is created when
* A Connection that is created when
* - the result of `aiopg.connect()` is awaited.
* - the result of calling `acquire` on a `ConnectionPool` is awaited.
* See https://aiopg.readthedocs.io/en/stable/core.html#connection
* See
* - https://aiopg.readthedocs.io/en/stable/core.html#connection
* - https://aiopg.readthedocs.io/en/stable/core.html#aiopg.Pool.acquire
*/
API::Node connection() {
result = API::moduleImport("aiopg").getMember("connect").getReturn().getAwaited()
or
result = connectionPool().getMember("acquire").getReturn().getAwaited()
class AiopgConnection extends PEP249::AsyncDatabaseConnection {
AiopgConnection() {
this = API::moduleImport("aiopg").getMember("connect").getReturn().getAwaited()
or
this = connectionPool().getMember("acquire").getReturn().getAwaited()
}
}
/**
* Gets a `Cursor` that is created when
* An additional cursor, that is created when
* - the result of calling `cursor` on a `ConnectionPool` is awaited.
* - the result of calling `cursor` on a `Connection` is awaited.
* See https://aiopg.readthedocs.io/en/stable/core.html#cursor
* See
* - https://aiopg.readthedocs.io/en/stable/core.html#aiopg.Pool.cursor
*/
API::Node cursor() {
result = connectionPool().getMember("cursor").getReturn().getAwaited()
or
result = connection().getMember("cursor").getReturn().getAwaited()
}
/**
* A query. Calling `execute` on a `Cursor` constructs a query.
* See https://aiopg.readthedocs.io/en/stable/core.html#aiopg.Cursor.execute
*/
class CursorExecuteCall extends SqlConstruction::Range, API::CallNode {
CursorExecuteCall() { this = cursor().getMember("execute").getACall() }
override DataFlow::Node getSql() { result = this.getParameter(0, "operation").asSink() }
}
/**
* An awaited query. Awaiting the result of calling `execute` executes the query.
* See https://aiopg.readthedocs.io/en/stable/core.html#aiopg.Cursor.execute
*/
class AwaitedCursorExecuteCall extends SqlExecution::Range {
CursorExecuteCall execute;
AwaitedCursorExecuteCall() { this = execute.getReturn().getAwaited().asSource() }
override DataFlow::Node getSql() { result = execute.getSql() }
class AiopgCursor extends PEP249::AsyncDatabaseCursor {
AiopgCursor() { this = connectionPool().getMember("cursor").getReturn().getAwaited() }
}
/**

View File

@@ -0,0 +1,39 @@
/**
* Provides classes modeling security-relevant aspects of the `aiosqlite` PyPI package.
* See
* - https://pypi.org/project/aiosqlite/
*/
private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.Concepts
private import semmle.python.ApiGraphs
private import semmle.python.frameworks.PEP249
/** Provides models for the `aiosqlite` PyPI package. */
private module Aiosqlite {
/**
* A model of `aiosqlite` as a module that implements PEP 249 using asyncio, providing
* ways to execute SQL statements against a database.
*/
class AiosqlitePEP249 extends PEP249::AsyncPEP249ModuleApiNode {
AiosqlitePEP249() { this = API::moduleImport("aiosqlite") }
}
/**
* An additional cursor, that is return from the coroutine Connection.execute,
* see https://aiosqlite.omnilib.dev/en/latest/api.html#aiosqlite.Connection.execute
*/
class AiosqliteCursor extends PEP249::AsyncDatabaseCursor {
AiosqliteCursor() {
this =
API::moduleImport("aiosqlite")
.getMember("connect")
.getReturn()
.getAwaited()
.getMember("execute")
.getReturn()
.getAwaited()
}
}
}

View File

@@ -22,6 +22,7 @@ private module Asyncpg {
// * - the result of `asyncpg.connect()` is awaited.
// * - the result of calling `acquire` on a `ConnectionPool` is awaited.
"asyncpg.Connection;asyncpg;Member[connect].ReturnValue.Awaited",
"asyncpg.Connection;asyncpg;Member[connection].Member[connect].ReturnValue.Awaited",
"asyncpg.Connection;asyncpg.ConnectionPool;Member[acquire].ReturnValue.Awaited",
// Creating an internal `~Connection` type that contains both `Connection` and `ConnectionPool`.
"asyncpg.~Connection;asyncpg.Connection;", //

View File

@@ -0,0 +1,61 @@
/**
* Provides classes modeling security-relevant aspects of the `cassandra-driver` PyPI package.
* See https://pypi.org/project/cassandra-driver/
*/
private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.Concepts
private import semmle.python.ApiGraphs
private import semmle.python.frameworks.PEP249
/**
* Provides models for the `cassandra-driver` PyPI package.
* See https://pypi.org/project/cassandra-driver/
*/
private module CassandraDriver {
/**
* A cassandra cluster session.
*
* see
* - https://docs.datastax.com/en/developer/python-driver/3.25/api/cassandra/cluster/#cassandra.cluster.Cluster.connect
* - https://docs.datastax.com/en/developer/python-driver/3.25/api/cassandra/cluster/#cassandra.cluster.Session
*/
API::Node session() {
result =
API::moduleImport("cassandra")
.getMember("cluster")
.getMember("Cluster")
.getReturn()
.getMember("connect")
.getReturn()
}
/**
* see https://docs.datastax.com/en/developer/python-driver/3.25/api/cassandra/cluster/#cassandra.cluster.Session.execute
*/
class CassandraSessionExecuteCall extends SqlExecution::Range, API::CallNode {
CassandraSessionExecuteCall() { this = session().getMember("execute").getACall() }
override DataFlow::Node getSql() { result = this.getParameter(0, "query").asSink() }
}
/**
* see https://docs.datastax.com/en/developer/python-driver/3.25/api/cassandra/cluster/#cassandra.cluster.Session.execute_async
*/
class CassandraSessionExecuteAsyncCall extends SqlConstruction::Range, API::CallNode {
CassandraSessionExecuteAsyncCall() { this = session().getMember("execute_async").getACall() }
override DataFlow::Node getSql() { result = this.getParameter(0, "query").asSink() }
}
/**
* see https://docs.datastax.com/en/developer/python-driver/3.25/api/cassandra/cluster/#cassandra.cluster.Session.prepare
*/
class CassandraSessionPrepareCall extends SqlConstruction::Range, API::CallNode {
CassandraSessionPrepareCall() { this = session().getMember("prepare").getACall() }
override DataFlow::Node getSql() { result = this.getParameter(0, "query").asSink() }
}
}

View File

@@ -561,8 +561,8 @@ module PrivateDjango {
API::Node connection() { result = db().getMember("connection") }
/** A `django.db.connection` is a PEP249 compliant DB connection. */
class DjangoDbConnection extends PEP249::Connection::InstanceSource {
DjangoDbConnection() { this = connection().asSource() }
class DjangoDbConnection extends PEP249::DatabaseConnection {
DjangoDbConnection() { this = connection() }
}
// -------------------------------------------------------------------------

View File

@@ -22,6 +22,148 @@ module PEP249 {
override string toString() { result = this.(API::Node).toString() }
}
/**
* An API graph node representing a database connection.
*/
abstract class DatabaseConnection extends API::Node {
/** Gets a string representation of this element. */
override string toString() { result = this.(API::Node).toString() }
}
private class DefaultDatabaseConnection extends DatabaseConnection {
DefaultDatabaseConnection() {
this = any(PEP249ModuleApiNode mod).getMember("connect").getReturn()
}
}
/**
* An API graph node representing a database cursor.
*/
abstract class DatabaseCursor extends API::Node {
/** Gets a string representation of this element. */
override string toString() { result = this.(API::Node).toString() }
}
private class DefaultDatabaseCursor extends DatabaseCursor {
DefaultDatabaseCursor() { this = any(DatabaseConnection conn).getMember("cursor").getReturn() }
}
private string getSqlKwargName() {
result in ["sql", "statement", "operation", "query", "query_string", "sql_script"]
}
private string getExecuteMethodName() {
result in ["execute", "executemany", "executescript", "execute_insert", "execute_fetchall"]
}
/**
* A call to an execute method on a database cursor or a connection, such as `execute`
* or `executemany`.
*
* See
* - https://peps.python.org/pep-0249/#execute
* - https://peps.python.org/pep-0249/#executemany
*
* Note: While `execute` method on a connection is not part of PEP249, if it is used, we
* recognize it as an alias for constructing a cursor and calling `execute` on it.
*/
private class ExecuteMethodCall extends SqlExecution::Range, API::CallNode {
ExecuteMethodCall() {
exists(API::Node start |
start instanceof DatabaseCursor or start instanceof DatabaseConnection
|
this = start.getMember(getExecuteMethodName()).getACall()
)
}
override DataFlow::Node getSql() {
result in [this.getArg(0), this.getArgByName(getSqlKwargName()),]
}
}
// ---------------------------------------------------------------------------
// asyncio implementations
// ---------------------------------------------------------------------------
//
// we differentiate between normal and asyncio implementations, since we model the
// `execute` call differently -- as a SqlExecution vs SqlConstruction, since the SQL
// is only executed in asyncio after being awaited (which might happen in something
// like `asyncio.gather`)
/**
* An API graph node representing a module that implements PEP 249 using asyncio.
*/
abstract class AsyncPEP249ModuleApiNode extends API::Node {
/** Gets a string representation of this element. */
override string toString() { result = this.(API::Node).toString() }
}
/**
* An API graph node representing a asyncio database connection (after being awaited).
*/
abstract class AsyncDatabaseConnection extends API::Node {
/** Gets a string representation of this element. */
override string toString() { result = this.(API::Node).toString() }
}
private class DefaultAsyncDatabaseConnection extends AsyncDatabaseConnection {
DefaultAsyncDatabaseConnection() {
this = any(AsyncPEP249ModuleApiNode mod).getMember("connect").getReturn().getAwaited()
}
}
/**
* An API graph node representing a asyncio database cursor (after being awaited).
*/
abstract class AsyncDatabaseCursor extends API::Node {
/** Gets a string representation of this element. */
override string toString() { result = this.(API::Node).toString() }
}
private class DefaultAsyncDatabaseCursor extends AsyncDatabaseCursor {
DefaultAsyncDatabaseCursor() {
this = any(AsyncDatabaseConnection conn).getMember("cursor").getReturn().getAwaited()
}
}
/**
* A call to an execute method on an asyncio database cursor or an asyncio connection,
* such as `execute` or `executemany`.
*
* (This is not an SqlExecution, since that only happens when the coroutine is
* awaited)
*
* See ExecuteMethodCall for more details.
*/
private class AsyncExecuteMethodCall extends SqlConstruction::Range, API::CallNode {
AsyncExecuteMethodCall() {
exists(API::Node start |
start instanceof AsyncDatabaseCursor or start instanceof AsyncDatabaseConnection
|
this = start.getMember(getExecuteMethodName()).getACall()
)
}
override DataFlow::Node getSql() {
result in [this.getArg(0), this.getArgByName(getSqlKwargName()),]
}
}
/** Actual execution of the AsyncExecuteMethodCall coroutine. */
private class AwaitedAsyncExecuteMethodCall extends SqlExecution::Range {
AsyncExecuteMethodCall execute;
AwaitedAsyncExecuteMethodCall() { this = execute.getReturn().getAwaited().asSource() }
override DataFlow::Node getSql() { result = execute.getSql() }
}
// ---------------------------------------------------------------------------
// old impl
// ---------------------------------------------------------------------------
// the goal is to deprecate it in favour of the API graph version, but currently this
// requires a rewrite of the Peewee modeling, which depends on rewriting the
// instance/instance-source stuff to use API graphs instead.
// so is postponed for now.
/** Gets a reference to the `connect` function of a module that implements PEP 249. */
DataFlow::Node connect() {
result = any(PEP249ModuleApiNode a).getMember("connect").getAValueReachableFromSource()
@@ -147,7 +289,10 @@ module PEP249 {
* recognize it as an alias for constructing a cursor and calling `execute` on it.
*/
private class ExecuteCall extends SqlExecution::Range, DataFlow::CallCfgNode {
ExecuteCall() { this.getFunction() = execute() }
ExecuteCall() {
this.getFunction() = execute() and
not this instanceof ExecuteMethodCall
}
override DataFlow::Node getSql() { result in [this.getArg(0), this.getArgByName("sql")] }
}
@@ -170,8 +315,13 @@ module PEP249 {
* recognize it as an alias for constructing a cursor and calling `executemany` on it.
*/
private class ExecutemanyCall extends SqlExecution::Range, DataFlow::CallCfgNode {
ExecutemanyCall() { this.getFunction() = executemany() }
ExecutemanyCall() {
this.getFunction() = executemany() and
not this instanceof ExecuteMethodCall
}
override DataFlow::Node getSql() { result in [this.getArg(0), this.getArgByName("sql")] }
override DataFlow::Node getSql() {
result in [this.getArg(0), this.getArgByName(getSqlKwargName())]
}
}
}

View File

@@ -163,11 +163,9 @@ private module Peewee {
* A call to the `connection` method on a `peewee.Database` instance.
* https://docs.peewee-orm.com/en/latest/peewee/api.html#Database.connection.
*/
class PeeweeDatabaseConnectionCall extends PEP249::Connection::InstanceSource,
DataFlow::CallCfgNode
{
class PeeweeDatabaseConnectionCall extends PEP249::DatabaseConnection {
PeeweeDatabaseConnectionCall() {
this = Database::instance().getMember("connection").getACall()
this = Database::instance().getMember("connection").getReturn()
}
}
@@ -175,8 +173,8 @@ private module Peewee {
* A call to the `cursor` method on a `peewee.Database` instance.
* https://docs.peewee-orm.com/en/latest/peewee/api.html#Database.cursor.
*/
class PeeweeDatabaseCursorCall extends PEP249::Cursor::InstanceSource, DataFlow::CallCfgNode {
PeeweeDatabaseCursorCall() { this = Database::instance().getMember("cursor").getACall() }
class PeeweeDatabaseCursorCall extends PEP249::DatabaseCursor {
PeeweeDatabaseCursorCall() { this = Database::instance().getMember("cursor").getReturn() }
}
/**

View File

@@ -2435,9 +2435,14 @@ private module StdlibPrivate {
* against a database.
*
* See https://devdocs.io/python~3.9/library/sqlite3
* https://github.com/python/cpython/blob/3.11/Lib/sqlite3/dbapi2.py
*/
class Sqlite3 extends PEP249::PEP249ModuleApiNode {
Sqlite3() { this = API::moduleImport("sqlite3") }
Sqlite3() {
this = API::moduleImport("sqlite3")
or
this = API::moduleImport("sqlite3").getMember("dbapi2")
}
}
// ---------------------------------------------------------------------------

View File

@@ -63,10 +63,9 @@
* the type is not intended to match a static type.
*/
private import codeql.util.Unit
private import ApiGraphModelsSpecific as Specific
private class Unit = Specific::Unit;
private module API = Specific::API;
private module DataFlow = Specific::DataFlow;

View File

@@ -22,9 +22,6 @@
private import python as PY
private import ApiGraphModels
import semmle.python.ApiGraphs::API as API
class Unit = PY::Unit;
// Re-export libraries needed by ApiGraphModels.qll
import semmle.python.dataflow.new.internal.AccessPathSyntax as AccessPathSyntax
import semmle.python.dataflow.new.DataFlow::DataFlow as DataFlow

View File

@@ -140,7 +140,11 @@ string mode_from_node(DataFlow::Node node) { node = re_flag_tracker(result) }
/** A StrConst used as a regular expression */
abstract class RegexString extends Expr {
RegexString() { (this instanceof Bytes or this instanceof Unicode) }
RegexString() {
(this instanceof Bytes or this instanceof Unicode) and
// is part of the user code
exists(this.getLocation().getFile().getRelativePath())
}
/**
* Helper predicate for `char_set_start(int start, int end)`.