Merge pull request #8883 from erik-krogh/pyMaD

Python: add MaD implementation
This commit is contained in:
Rasmus Wriedt Larsen
2022-05-30 13:31:07 +02:00
committed by GitHub
21 changed files with 1500 additions and 119 deletions

View File

@@ -525,7 +525,8 @@
"csharp/ql/lib/semmle/code/csharp/dataflow/internal/AccessPathSyntax.qll",
"java/ql/lib/semmle/code/java/dataflow/internal/AccessPathSyntax.qll",
"javascript/ql/lib/semmle/javascript/frameworks/data/internal/AccessPathSyntax.qll",
"ruby/ql/lib/codeql/ruby/dataflow/internal/AccessPathSyntax.qll"
"ruby/ql/lib/codeql/ruby/dataflow/internal/AccessPathSyntax.qll",
"python/ql/lib/semmle/python/frameworks/data/internal/AccessPathSyntax.qll"
],
"IncompleteUrlSubstringSanitization": [
"javascript/ql/src/Security/CWE-020/IncompleteUrlSubstringSanitization.qll",
@@ -543,7 +544,8 @@
],
"ApiGraphModels": [
"javascript/ql/lib/semmle/javascript/frameworks/data/internal/ApiGraphModels.qll",
"ruby/ql/lib/codeql/ruby/frameworks/data/internal/ApiGraphModels.qll"
"ruby/ql/lib/codeql/ruby/frameworks/data/internal/ApiGraphModels.qll",
"python/ql/lib/semmle/python/frameworks/data/internal/ApiGraphModels.qll"
],
"TaintedFormatStringQuery Ruby/JS": [
"javascript/ql/lib/semmle/javascript/security/dataflow/TaintedFormatStringQuery.qll",

View File

@@ -299,7 +299,7 @@ private class AccessPathRange extends AccessPath::Range {
bindingset[token]
API::Node getSuccessorFromNode(API::Node node, AccessPathToken token) {
// API graphs use the same label for arguments and parameters. An edge originating from a
// use-node represents be an argument, and an edge originating from a def-node represents a parameter.
// use-node represents an argument, and an edge originating from a def-node represents a parameter.
// We just map both to the same thing.
token.getName() = ["Argument", "Parameter"] and
result = node.getParameter(AccessPath::parseIntUnbounded(token.getAnArgument()))

View File

@@ -136,6 +136,9 @@ module API {
result = this.getASuccessor(Label::keywordParameter(name))
}
/** Gets the node representing the self parameter */
Node getSelfParameter() { result = this.getASuccessor(Label::selfParameter()) }
/**
* Gets the number of parameters of the function represented by this node.
*/
@@ -321,6 +324,12 @@ module API {
/** Gets the API node for a parameter of this invocation. */
Node getAParameter() { result = this.getParameter(_) }
/** Gets the object that this method-call is being called on, if this is a method-call */
Node getSelfParameter() {
result.getARhs() = this.(DataFlow::MethodCallNode).getObject() and
result = callee.getSelfParameter()
}
/** Gets the API node for the keyword parameter `name` of this invocation. */
Node getKeywordParameter(string name) {
result = callee.getKeywordParameter(name) and
@@ -345,6 +354,14 @@ module API {
result = callee.getReturn() and
result.getAnImmediateUse() = this
}
/**
* Gets the number of positional arguments of this call.
*
* Note: This is used for `WithArity[<n>]` in modeling-as-data, where we thought
* including keyword arguments didn't make much sense.
*/
int getNumArgument() { result = count(this.getArg(_)) }
}
/**
@@ -589,15 +606,24 @@ module API {
exists(DataFlow::Node def, PY::CallableExpr fn |
rhs(base, def) and fn = trackDefNode(def).asExpr()
|
exists(int i |
lbl = Label::parameter(i) and
exists(int i, int offset |
if exists(PY::Parameter p | p = fn.getInnerScope().getAnArg() and p.isSelf())
then offset = 1
else offset = 0
|
lbl = Label::parameter(i - offset) and
ref.asExpr() = fn.getInnerScope().getArg(i)
)
or
exists(string name |
exists(string name, PY::Parameter param |
lbl = Label::keywordParameter(name) and
ref.asExpr() = fn.getInnerScope().getArgByName(name)
param = fn.getInnerScope().getArgByName(name) and
not param.isSelf() and
ref.asExpr() = param
)
or
lbl = Label::selfParameter() and
ref.asExpr() = any(PY::Parameter p | p = fn.getInnerScope().getAnArg() and p.isSelf())
)
or
// Built-ins, treated as members of the module `builtins`
@@ -664,6 +690,9 @@ module API {
exists(string name | lbl = Label::keywordParameter(name) |
arg = pred.getACall().getArgByName(name)
)
or
lbl = Label::selfParameter() and
arg = pred.getACall().(DataFlow::MethodCallNode).getObject()
)
}
@@ -780,6 +809,7 @@ module API {
or
exists(any(PY::Function f).getArgByName(name))
} or
MkLabelSelfParameter() or
MkLabelReturn() or
MkLabelSubclass() or
MkLabelAwait()
@@ -837,6 +867,11 @@ module API {
string getName() { result = name }
}
/** A label for the self parameter. */
class LabelSelfParameter extends ApiLabel, MkLabelSelfParameter {
override string toString() { result = "getSelfParameter()" }
}
/** A label that gets the return value of a function. */
class LabelReturn extends ApiLabel, MkLabelReturn {
override string toString() { result = "getReturn()" }
@@ -876,6 +911,9 @@ module API {
/** Gets the `parameter` edge label for the keyword parameter `name`. */
LabelKeywordParameter keywordParameter(string name) { result.getName() = name }
/** Gets the edge label for the self parameter. */
LabelSelfParameter selfParameter() { any() }
/** Gets the `return` edge label. */
LabelReturn return() { any() }

View File

@@ -12,6 +12,7 @@ private import semmle.python.frameworks.Asyncpg
private import semmle.python.frameworks.ClickhouseDriver
private import semmle.python.frameworks.Cryptodome
private import semmle.python.frameworks.Cryptography
private import semmle.python.frameworks.data.ModelsAsData
private import semmle.python.frameworks.Dill
private import semmle.python.frameworks.Django
private import semmle.python.frameworks.Fabric

View File

@@ -7,91 +7,42 @@ private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.Concepts
private import semmle.python.ApiGraphs
private import semmle.python.frameworks.data.ModelsAsData
/** Provides models for the `asyncpg` PyPI package. */
private module Asyncpg {
private import semmle.python.internal.Awaited
/** Gets a `ConnectionPool` that is created when the result of `asyncpg.create_pool()` is awaited. */
API::Node connectionPool() {
result = API::moduleImport("asyncpg").getMember("create_pool").getReturn().getAwaited()
}
/**
* Gets a `Connection` that is created when
* - the result of `asyncpg.connect()` is awaited.
* - the result of calling `acquire` on a `ConnectionPool` is awaited.
*/
API::Node connection() {
result = API::moduleImport("asyncpg").getMember("connect").getReturn().getAwaited()
or
result = connectionPool().getMember("acquire").getReturn().getAwaited()
}
/** `Connection`s and `ConnectionPool`s provide some methods that execute SQL. */
class SqlExecutionOnConnection extends SqlExecution::Range, DataFlow::MethodCallNode {
string methodName;
SqlExecutionOnConnection() {
this = [connectionPool(), connection()].getMember(methodName).getACall() and
methodName in ["copy_from_query", "execute", "fetch", "fetchrow", "fetchval", "executemany"]
}
override DataFlow::Node getSql() {
methodName in ["copy_from_query", "execute", "fetch", "fetchrow", "fetchval"] and
result in [this.getArg(0), this.getArgByName("query")]
or
methodName = "executemany" and
result in [this.getArg(0), this.getArgByName("command")]
class AsyncpgModel extends ModelInput::TypeModelCsv {
override predicate row(string row) {
// package1;type1;package2;type2;path
row =
[
// a `ConnectionPool` that is created when the result of `asyncpg.create_pool()` is awaited.
"asyncpg;ConnectionPool;asyncpg;;Member[create_pool].ReturnValue.Awaited",
// a `Connection` that is created when
// * - the result of `asyncpg.connect()` is awaited.
// * - the result of calling `acquire` on a `ConnectionPool` is awaited.
"asyncpg;Connection;asyncpg;;Member[connect].ReturnValue.Awaited",
"asyncpg;Connection;asyncpg;ConnectionPool;Member[acquire].ReturnValue.Awaited",
// Creating an internal `~Connection` type that contains both `Connection` and `ConnectionPool`.
"asyncpg;~Connection;asyncpg;Connection;", "asyncpg;~Connection;asyncpg;ConnectionPool;"
]
}
}
/** A model of `Connection` and `ConnectionPool`, which provide some methods that access the file system. */
class FileAccessOnConnection extends FileSystemAccess::Range, DataFlow::MethodCallNode {
string methodName;
FileAccessOnConnection() {
this = [connectionPool(), connection()].getMember(methodName).getACall() and
methodName in ["copy_from_query", "copy_from_table", "copy_to_table"]
}
// The path argument is keyword only.
override DataFlow::Node getAPathArgument() {
methodName in ["copy_from_query", "copy_from_table"] and
result = this.getArgByName("output")
or
methodName = "copy_to_table" and
result = this.getArgByName("source")
}
}
/**
* Provides models of the `PreparedStatement` class in `asyncpg`.
* `PreparedStatement`s are created when the result of calling `prepare(query)` on a connection is awaited.
* The result of calling `prepare(query)` is a `PreparedStatementFactory` and the argument, `query` needs to
* be tracked to the place where a `PreparedStatement` is created and then further to any executing methods.
* Hence the two type trackers.
*/
module PreparedStatement {
class PreparedStatementConstruction extends SqlConstruction::Range, API::CallNode {
PreparedStatementConstruction() { this = connection().getMember("prepare").getACall() }
override DataFlow::Node getSql() { result = this.getParameter(0, "query").getARhs() }
}
class PreparedStatementExecution extends SqlExecution::Range, API::CallNode {
PreparedStatementConstruction prepareCall;
PreparedStatementExecution() {
this =
prepareCall
.getReturn()
.getAwaited()
.getMember(["executemany", "fetch", "fetchrow", "fetchval"])
.getACall()
}
override DataFlow::Node getSql() { result = prepareCall.getSql() }
class AsyncpgSink extends ModelInput::SinkModelCsv {
// package;type;path;kind
override predicate row(string row) {
row =
[
// `Connection`s and `ConnectionPool`s provide some methods that execute SQL.
"asyncpg;~Connection;Member[copy_from_query,execute,fetch,fetchrow,fetchval].Argument[0,query:];sql-injection",
"asyncpg;~Connection;Member[executemany].Argument[0,command:];sql-injection",
// A model of `Connection` and `ConnectionPool`, which provide some methods that access the file system.
"asyncpg;~Connection;Member[copy_from_query,copy_from_table].Argument[output:];path-injection",
"asyncpg;~Connection;Member[copy_to_table].Argument[source:];path-injection",
// the `PreparedStatement` class in `asyncpg`.
"asyncpg;Connection;Member[prepare].Argument[0,query:];sql-injection",
]
}
}
@@ -106,7 +57,9 @@ private module Asyncpg {
*/
module Cursor {
class CursorConstruction extends SqlConstruction::Range, API::CallNode {
CursorConstruction() { this = connection().getMember("cursor").getACall() }
CursorConstruction() {
this = ModelOutput::getATypeNode("asyncpg", "Connection").getMember("cursor").getACall()
}
override DataFlow::Node getSql() { result = this.getParameter(0, "query").getARhs() }
}
@@ -121,8 +74,11 @@ private module Asyncpg {
this = c.getReturn().getAwaited().getAnImmediateUse()
)
or
exists(PreparedStatement::PreparedStatementConstruction prepareCall |
sql = prepareCall.getSql() and
exists(API::CallNode prepareCall |
prepareCall =
ModelOutput::getATypeNode("asyncpg", "Connection").getMember("prepare").getACall()
|
sql = prepareCall.getParameter(0, "query").getARhs() and
this =
prepareCall
.getReturn()

View File

@@ -0,0 +1,47 @@
/**
* Provides classes for contributing a model, or using the interpreted results
* of a model represented as data.
*
* - Use the `ModelInput` module to contribute new models.
* - Use the `ModelOutput` module to access the model results in terms of API nodes.
*
* The package name refers to the top-level module the import comes from, and not a PyPI package.
* So for `from foo.bar import baz`, the package will be `foo`.
*/
private import python
private import internal.ApiGraphModels as Shared
private import internal.ApiGraphModelsSpecific as Specific
import Shared::ModelInput as ModelInput
import Shared::ModelOutput as ModelOutput
private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.ApiGraphs
private import semmle.python.dataflow.new.TaintTracking
/**
* A remote flow source originating from a CSV source row.
*/
private class RemoteFlowSourceFromCsv extends RemoteFlowSource {
RemoteFlowSourceFromCsv() { this = ModelOutput::getASourceNode("remote").getAnImmediateUse() }
override string getSourceType() { result = "Remote flow (from model)" }
}
/**
* Like `ModelOutput::summaryStep` but with API nodes mapped to data-flow nodes.
*/
private predicate summaryStepNodes(DataFlow::Node pred, DataFlow::Node succ, string kind) {
exists(API::Node predNode, API::Node succNode |
Specific::summaryStep(predNode, succNode, kind) and
pred = predNode.getARhs() and
succ = succNode.getAnImmediateUse()
)
}
/** Taint steps induced by summary models of kind `taint`. */
private class TaintStepFromSummary extends TaintTracking::AdditionalTaintStep {
override predicate step(DataFlow::Node pred, DataFlow::Node succ) {
summaryStepNodes(pred, succ, "taint")
}
}

View File

@@ -0,0 +1,182 @@
/**
* Module for parsing access paths from CSV models, both the identifying access path used
* by dynamic languages, and the input/output specifications for summary steps.
*
* This file is used by the shared data flow library and by the JavaScript libraries
* (which does not use the shared data flow libraries).
*/
/**
* Convenience-predicate for extracting two capture groups at once.
*/
bindingset[input, regexp]
private predicate regexpCaptureTwo(string input, string regexp, string capture1, string capture2) {
capture1 = input.regexpCapture(regexp, 1) and
capture2 = input.regexpCapture(regexp, 2)
}
/** Companion module to the `AccessPath` class. */
module AccessPath {
/** A string that should be parsed as an access path. */
abstract class Range extends string {
bindingset[this]
Range() { any() }
}
/**
* Parses an integer constant `n` or interval `n1..n2` (inclusive) and gets the value
* of the constant or any value contained in the interval.
*/
bindingset[arg]
int parseInt(string arg) {
result = arg.toInt()
or
// Match "n1..n2"
exists(string lo, string hi |
regexpCaptureTwo(arg, "(-?\\d+)\\.\\.(-?\\d+)", lo, hi) and
result = [lo.toInt() .. hi.toInt()]
)
}
/**
* Parses a lower-bounded interval `n..` and gets the lower bound.
*/
bindingset[arg]
int parseLowerBound(string arg) { result = arg.regexpCapture("(-?\\d+)\\.\\.", 1).toInt() }
/**
* Parses an integer constant or interval (bounded or unbounded) that explicitly
* references the arity, such as `N-1` or `N-3..N-1`.
*
* Note that expressions of form `N-x` will never resolve to a negative index,
* even if `N` is zero (it will have no result in that case).
*/
bindingset[arg, arity]
private int parseIntWithExplicitArity(string arg, int arity) {
result >= 0 and // do not allow N-1 to resolve to a negative index
exists(string lo |
// N-x
lo = arg.regexpCapture("N-(\\d+)", 1) and
result = arity - lo.toInt()
or
// N-x..
lo = arg.regexpCapture("N-(\\d+)\\.\\.", 1) and
result = [arity - lo.toInt(), arity - 1]
)
or
exists(string lo, string hi |
// x..N-y
regexpCaptureTwo(arg, "(-?\\d+)\\.\\.N-(\\d+)", lo, hi) and
result = [lo.toInt() .. arity - hi.toInt()]
or
// N-x..N-y
regexpCaptureTwo(arg, "N-(\\d+)\\.\\.N-(\\d+)", lo, hi) and
result = [arity - lo.toInt() .. arity - hi.toInt()] and
result >= 0
or
// N-x..y
regexpCaptureTwo(arg, "N-(\\d+)\\.\\.(\\d+)", lo, hi) and
result = [arity - lo.toInt() .. hi.toInt()] and
result >= 0
)
}
/**
* Parses an integer constant or interval (bounded or unbounded) and gets any
* of the integers contained within (of which there may be infinitely many).
*
* Has no result for arguments involving an explicit arity, such as `N-1`.
*/
bindingset[arg, result]
int parseIntUnbounded(string arg) {
result = parseInt(arg)
or
result >= parseLowerBound(arg)
}
/**
* Parses an integer constant or interval (bounded or unbounded) that
* may reference the arity of a call, such as `N-1` or `N-3..N-1`.
*
* Note that expressions of form `N-x` will never resolve to a negative index,
* even if `N` is zero (it will have no result in that case).
*/
bindingset[arg, arity]
int parseIntWithArity(string arg, int arity) {
result = parseInt(arg)
or
result in [parseLowerBound(arg) .. arity - 1]
or
result = parseIntWithExplicitArity(arg, arity)
}
}
/** Gets the `n`th token on the access path as a string. */
private string getRawToken(AccessPath path, int n) {
// Avoid splitting by '.' since tokens may contain dots, e.g. `Field[foo.Bar.x]`.
// Instead use regexpFind to match valid tokens, and supplement with a final length
// check (in `AccessPath.hasSyntaxError`) to ensure all characters were included in a token.
result = path.regexpFind("\\w+(?:\\[[^\\]]*\\])?(?=\\.|$)", n, _)
}
/**
* A string that occurs as an access path (either identifying or input/output spec)
* which might be relevant for this database.
*/
class AccessPath extends string instanceof AccessPath::Range {
/** Holds if this string is not a syntactically valid access path. */
predicate hasSyntaxError() {
// If the lengths match, all characters must haven been included in a token
// or seen by the `.` lookahead pattern.
this != "" and
not this.length() = sum(int n | | getRawToken(this, n).length() + 1) - 1
}
/** Gets the `n`th token on the access path (if there are no syntax errors). */
AccessPathToken getToken(int n) {
result = getRawToken(this, n) and
not this.hasSyntaxError()
}
/** Gets the number of tokens on the path (if there are no syntax errors). */
int getNumToken() {
result = count(int n | exists(getRawToken(this, n))) and
not this.hasSyntaxError()
}
}
/**
* An access part token such as `Argument[1]` or `ReturnValue`, appearing in one or more access paths.
*/
class AccessPathToken extends string {
AccessPathToken() { this = getRawToken(_, _) }
private string getPart(int part) {
result = this.regexpCapture("([^\\[]+)(?:\\[([^\\]]*)\\])?", part)
}
/** Gets the name of the token, such as `Member` from `Member[x]` */
string getName() { result = this.getPart(1) }
/**
* Gets the argument list, such as `1,2` from `Member[1,2]`,
* or has no result if there are no arguments.
*/
string getArgumentList() { result = this.getPart(2) }
/** Gets the `n`th argument to this token, such as `x` or `y` from `Member[x,y]`. */
string getArgument(int n) { result = this.getArgumentList().splitAt(",", n).trim() }
/** Gets the `n`th argument to this `name` token, such as `x` or `y` from `Member[x,y]`. */
pragma[nomagic]
string getArgument(string name, int n) { name = this.getName() and result = this.getArgument(n) }
/** Gets an argument to this token, such as `x` or `y` from `Member[x,y]`. */
string getAnArgument() { result = this.getArgument(_) }
/** Gets an argument to this `name` token, such as `x` or `y` from `Member[x,y]`. */
string getAnArgument(string name) { result = this.getArgument(name, _) }
/** Gets the number of arguments to this token, such as 2 for `Member[x,y]` or zero for `ReturnValue`. */
int getNumArgument() { result = count(int n | exists(this.getArgument(n))) }
}

View File

@@ -0,0 +1,522 @@
/**
* INTERNAL use only. This is an experimental API subject to change without notice.
*
* Provides classes and predicates for dealing with flow models specified in CSV format.
*
* The CSV specification has the following columns:
* - Sources:
* `package; type; path; kind`
* - Sinks:
* `package; type; path; kind`
* - Summaries:
* `package; type; path; input; output; kind`
* - Types:
* `package1; type1; package2; type2; path`
*
* The interpretation of a row is similar to API-graphs with a left-to-right
* reading.
* 1. The `package` column selects a package name, as it would be referenced in the source code,
* such as an NPM package, PIP package, or Ruby gem. (See `ModelsAsData.qll` for language-specific details).
* It may also be a synthetic package used for a type definition (see type definitions below).
* 2. The `type` column selects all instances of a named type originating from that package,
* or the empty string if referring to the package itself.
* It can also be a synthetic type name defined by a type definition (see type definitions below).
* 3. The `path` column is a `.`-separated list of "access path tokens" to resolve, starting at the node selected by `package` and `type`.
*
* Every language supports the following tokens:
* - Argument[n]: the n-th argument to a call. May be a range of form `x..y` (inclusive) and/or a comma-separated list.
* Additionally, `N-1` refers to the last argument, `N-2` refers to the second-last, and so on.
* - Parameter[n]: the n-th parameter of a callback. May be a range of form `x..y` (inclusive) and/or a comma-separated list.
* - ReturnValue: the value returned by a function call
* - WithArity[n]: match a call with the given arity. May be a range of form `x..y` (inclusive) and/or a comma-separated list.
*
* The following tokens are common and should be implemented for languages where it makes sense:
* - Member[x]: a member named `x`; exactly what a "member" is depends on the language. May be a comma-separated list of names.
* - Instance: an instance of a class
* - Subclass: a subclass of a class
* - ArrayElement: an element of array
* - Element: an element of a collection-like object
* - MapKey: a key in map-like object
* - MapValue: a value in a map-like object
* - Awaited: the value from a resolved promise/future-like object
*
* For the time being, please consult `ApiGraphModelsSpecific.qll` to see which language-specific tokens are currently supported.
*
* 4. The `input` and `output` columns specify how data enters and leaves the element selected by the
* first `(package, type, path)` tuple. Both strings are `.`-separated access paths
* of the same syntax as the `path` column.
* 5. The `kind` column is a tag that can be referenced from QL to determine to
* which classes the interpreted elements should be added. For example, for
* sources `"remote"` indicates a default remote flow source, and for summaries
* `"taint"` indicates a default additional taint step and `"value"` indicates a
* globally applicable value-preserving step.
*
* ### Types
*
* A type row of form `package1; type1; package2; type2; path` indicates that `package2; type2; path`
* should be seen as an instance of the type `package1; type1`.
*
* A `(package,type)` pair may refer to a static type or a synthetic type name used internally in the model.
* Synthetic type names can be used to reuse intermediate sub-paths, when there are multiple ways to access the same
* element.
* See `ModelsAsData.qll` for the language-specific interpretation of packages and static type names.
*
* By convention, if one wants to avoid clashes with static types from the package, the type name
* should be prefixed with a tilde character (`~`). For example, `(foo, ~Bar)` can be used to indicate that
* the type is related to the `foo` package but is not intended to match a static type.
*/
private import ApiGraphModelsSpecific as Specific
private class Unit = Specific::Unit;
private module API = Specific::API;
private import Specific::AccessPathSyntax
/** Module containing hooks for providing input data to be interpreted as a model. */
module ModelInput {
/**
* A unit class for adding additional source model rows.
*
* Extend this class to add additional source definitions.
*/
class SourceModelCsv extends Unit {
/**
* Holds if `row` specifies a source definition.
*
* A row of form
* ```
* package;type;path;kind
* ```
* indicates that the value at `(package, type, path)` should be seen as a flow
* source of the given `kind`.
*
* The kind `remote` represents a general remote flow source.
*/
abstract predicate row(string row);
}
/**
* A unit class for adding additional sink model rows.
*
* Extend this class to add additional sink definitions.
*/
class SinkModelCsv extends Unit {
/**
* Holds if `row` specifies a sink definition.
*
* A row of form
* ```
* package;type;path;kind
* ```
* indicates that the value at `(package, type, path)` should be seen as a sink
* of the given `kind`.
*/
abstract predicate row(string row);
}
/**
* A unit class for adding additional summary model rows.
*
* Extend this class to add additional flow summary definitions.
*/
class SummaryModelCsv extends Unit {
/**
* Holds if `row` specifies a summary definition.
*
* A row of form
* ```
* package;type;path;input;output;kind
* ```
* indicates that for each call to `(package, type, path)`, the value referred to by `input`
* can flow to the value referred to by `output`.
*
* `kind` should be either `value` or `taint`, for value-preserving or taint-preserving steps,
* respectively.
*/
abstract predicate row(string row);
}
/**
* A unit class for adding additional type model rows.
*
* Extend this class to add additional type definitions.
*/
class TypeModelCsv extends Unit {
/**
* Holds if `row` specifies a type definition.
*
* A row of form,
* ```
* package1;type1;package2;type2;path
* ```
* indicates that `(package2, type2, path)` should be seen as an instance of `(package1, type1)`.
*/
abstract predicate row(string row);
}
}
private import ModelInput
/**
* An empty class, except in specific tests.
*
* If this is non-empty, all models are parsed even if the package is not
* considered relevant for the current database.
*/
abstract class TestAllModels extends Unit { }
/**
* Append `;dummy` to the value of `s` to work around the fact that `string.split(delim,n)`
* does not preserve empty trailing substrings.
*/
bindingset[result]
private string inversePad(string s) { s = result + ";dummy" }
private predicate sourceModel(string row) { any(SourceModelCsv s).row(inversePad(row)) }
private predicate sinkModel(string row) { any(SinkModelCsv s).row(inversePad(row)) }
private predicate summaryModel(string row) { any(SummaryModelCsv s).row(inversePad(row)) }
private predicate typeModel(string row) { any(TypeModelCsv s).row(inversePad(row)) }
/** Holds if a source model exists for the given parameters. */
predicate sourceModel(string package, string type, string path, string kind) {
exists(string row |
sourceModel(row) and
row.splitAt(";", 0) = package and
row.splitAt(";", 1) = type and
row.splitAt(";", 2) = path and
row.splitAt(";", 3) = kind
)
}
/** Holds if a sink model exists for the given parameters. */
private predicate sinkModel(string package, string type, string path, string kind) {
exists(string row |
sinkModel(row) and
row.splitAt(";", 0) = package and
row.splitAt(";", 1) = type and
row.splitAt(";", 2) = path and
row.splitAt(";", 3) = kind
)
}
/** Holds if a summary model `row` exists for the given parameters. */
private predicate summaryModel(
string package, string type, string path, string input, string output, string kind
) {
exists(string row |
summaryModel(row) and
row.splitAt(";", 0) = package and
row.splitAt(";", 1) = type and
row.splitAt(";", 2) = path and
row.splitAt(";", 3) = input and
row.splitAt(";", 4) = output and
row.splitAt(";", 5) = kind
)
}
/** Holds if an type model exists for the given parameters. */
private predicate typeModel(
string package1, string type1, string package2, string type2, string path
) {
exists(string row |
typeModel(row) and
row.splitAt(";", 0) = package1 and
row.splitAt(";", 1) = type1 and
row.splitAt(";", 2) = package2 and
row.splitAt(";", 3) = type2 and
row.splitAt(";", 4) = path
)
}
/**
* Gets a package that should be seen as an alias for the given other `package`,
* or the `package` itself.
*/
bindingset[package]
bindingset[result]
string getAPackageAlias(string package) {
typeModel(package, "", result, "", "")
or
result = package
}
/**
* Holds if CSV rows involving `package` might be relevant for the analysis of this database.
*/
private predicate isRelevantPackage(string package) {
(
sourceModel(package, _, _, _) or
sinkModel(package, _, _, _) or
summaryModel(package, _, _, _, _, _) or
typeModel(package, _, _, _, _)
) and
(
Specific::isPackageUsed(package)
or
exists(TestAllModels t)
)
or
exists(string other |
isRelevantPackage(other) and
typeModel(package, _, other, _, _)
)
}
/**
* Holds if `package,type,path` is used in some CSV row.
*/
pragma[nomagic]
predicate isRelevantFullPath(string package, string type, string path) {
isRelevantPackage(package) and
(
sourceModel(package, type, path, _) or
sinkModel(package, type, path, _) or
summaryModel(package, type, path, _, _, _) or
typeModel(_, _, package, type, path)
)
}
/** A string from a CSV row that should be parsed as an access path. */
private class AccessPathRange extends AccessPath::Range {
AccessPathRange() {
isRelevantFullPath(_, _, this)
or
exists(string package | isRelevantPackage(package) |
summaryModel(package, _, _, this, _, _) or
summaryModel(package, _, _, _, this, _)
)
}
}
/**
* Gets a successor of `node` in the API graph.
*/
bindingset[token]
API::Node getSuccessorFromNode(API::Node node, AccessPathToken token) {
// API graphs use the same label for arguments and parameters. An edge originating from a
// use-node represents an argument, and an edge originating from a def-node represents a parameter.
// We just map both to the same thing.
token.getName() = ["Argument", "Parameter"] and
result = node.getParameter(AccessPath::parseIntUnbounded(token.getAnArgument()))
or
token.getName() = "ReturnValue" and
result = node.getReturn()
or
// Language-specific tokens
result = Specific::getExtraSuccessorFromNode(node, token)
}
/**
* Gets an API-graph successor for the given invocation.
*/
bindingset[token]
API::Node getSuccessorFromInvoke(Specific::InvokeNode invoke, AccessPathToken token) {
token.getName() = "Argument" and
result =
invoke
.getParameter(AccessPath::parseIntWithArity(token.getAnArgument(), invoke.getNumArgument()))
or
token.getName() = "ReturnValue" and
result = invoke.getReturn()
or
// Language-specific tokens
result = Specific::getExtraSuccessorFromInvoke(invoke, token)
}
/**
* Holds if `invoke` invokes a call-site filter given by `token`.
*/
pragma[inline]
private predicate invocationMatchesCallSiteFilter(Specific::InvokeNode invoke, AccessPathToken token) {
token.getName() = "WithArity" and
invoke.getNumArgument() = AccessPath::parseIntUnbounded(token.getAnArgument())
or
Specific::invocationMatchesExtraCallSiteFilter(invoke, token)
}
/**
* Gets the API node identified by the first `n` tokens of `path` in the given `(package, type, path)` tuple.
*/
pragma[nomagic]
private API::Node getNodeFromPath(string package, string type, AccessPath path, int n) {
isRelevantFullPath(package, type, path) and
(
n = 0 and
exists(string package2, string type2, AccessPath path2 |
typeModel(package, type, package2, type2, path2) and
result = getNodeFromPath(package2, type2, path2, path2.getNumToken())
)
or
// Language-specific cases, such as handling of global variables
result = Specific::getExtraNodeFromPath(package, type, path, n)
)
or
result = getSuccessorFromNode(getNodeFromPath(package, type, path, n - 1), path.getToken(n - 1))
or
// Similar to the other recursive case, but where the path may have stepped through one or more call-site filters
result =
getSuccessorFromInvoke(getInvocationFromPath(package, type, path, n - 1), path.getToken(n - 1))
}
/** Gets the node identified by the given `(package, type, path)` tuple. */
API::Node getNodeFromPath(string package, string type, AccessPath path) {
result = getNodeFromPath(package, type, path, path.getNumToken())
}
/**
* Gets an invocation identified by the given `(package, type, path)` tuple.
*
* Unlike `getNodeFromPath`, the `path` may end with one or more call-site filters.
*/
Specific::InvokeNode getInvocationFromPath(string package, string type, AccessPath path, int n) {
result = Specific::getAnInvocationOf(getNodeFromPath(package, type, path, n))
or
result = getInvocationFromPath(package, type, path, n - 1) and
invocationMatchesCallSiteFilter(result, path.getToken(n - 1))
}
/** Gets an invocation identified by the given `(package, type, path)` tuple. */
Specific::InvokeNode getInvocationFromPath(string package, string type, AccessPath path) {
result = getInvocationFromPath(package, type, path, path.getNumToken())
}
/**
* Holds if `name` is a valid name for an access path token in the identifying access path.
*/
bindingset[name]
predicate isValidTokenNameInIdentifyingAccessPath(string name) {
name = ["Argument", "Parameter", "ReturnValue", "WithArity"]
or
Specific::isExtraValidTokenNameInIdentifyingAccessPath(name)
}
/**
* Holds if `name` is a valid name for an access path token with no arguments, occurring
* in an identifying access path.
*/
bindingset[name]
predicate isValidNoArgumentTokenInIdentifyingAccessPath(string name) {
name = "ReturnValue"
or
Specific::isExtraValidNoArgumentTokenInIdentifyingAccessPath(name)
}
/**
* Holds if `argument` is a valid argument to an access path token with the given `name`, occurring
* in an identifying access path.
*/
bindingset[name, argument]
predicate isValidTokenArgumentInIdentifyingAccessPath(string name, string argument) {
name = ["Argument", "Parameter"] and
argument.regexpMatch("(N-|-)?\\d+(\\.\\.((N-|-)?\\d+)?)?")
or
name = "WithArity" and
argument.regexpMatch("\\d+(\\.\\.(\\d+)?)?")
or
Specific::isExtraValidTokenArgumentInIdentifyingAccessPath(name, argument)
}
/**
* Module providing access to the imported models in terms of API graph nodes.
*/
module ModelOutput {
/**
* Holds if a CSV source model contributed `source` with the given `kind`.
*/
API::Node getASourceNode(string kind) {
exists(string package, string type, string path |
sourceModel(package, type, path, kind) and
result = getNodeFromPath(package, type, path)
)
}
/**
* Holds if a CSV sink model contributed `sink` with the given `kind`.
*/
API::Node getASinkNode(string kind) {
exists(string package, string type, string path |
sinkModel(package, type, path, kind) and
result = getNodeFromPath(package, type, path)
)
}
/**
* Holds if a relevant CSV summary exists for these parameters.
*/
predicate relevantSummaryModel(
string package, string type, string path, string input, string output, string kind
) {
isRelevantPackage(package) and
summaryModel(package, type, path, input, output, kind)
}
/**
* Holds if a `baseNode` is an invocation identified by the `package,type,path` part of a summary row.
*/
predicate resolvedSummaryBase(
string package, string type, string path, Specific::InvokeNode baseNode
) {
summaryModel(package, type, path, _, _, _) and
baseNode = getInvocationFromPath(package, type, path)
}
/**
* Holds if `node` is seen as an instance of `(package,type)` due to a type definition
* contributed by a CSV model.
*/
API::Node getATypeNode(string package, string type) {
exists(string package2, string type2, AccessPath path |
typeModel(package, type, package2, type2, path) and
result = getNodeFromPath(package2, type2, path)
)
}
/**
* Gets an error message relating to an invalid CSV row in a model.
*/
string getAWarning() {
// Check number of columns
exists(string row, string kind, int expectedArity, int actualArity |
any(SourceModelCsv csv).row(row) and kind = "source" and expectedArity = 4
or
any(SinkModelCsv csv).row(row) and kind = "sink" and expectedArity = 4
or
any(SummaryModelCsv csv).row(row) and kind = "summary" and expectedArity = 6
or
any(TypeModelCsv csv).row(row) and kind = "type" and expectedArity = 5
|
actualArity = count(row.indexOf(";")) + 1 and
actualArity != expectedArity and
result =
"CSV " + kind + " row should have " + expectedArity + " columns but has " + actualArity +
": " + row
)
or
// Check names and arguments of access path tokens
exists(AccessPath path, AccessPathToken token |
isRelevantFullPath(_, _, path) and
token = path.getToken(_)
|
not isValidTokenNameInIdentifyingAccessPath(token.getName()) and
result = "Invalid token name '" + token.getName() + "' in access path: " + path
or
isValidTokenNameInIdentifyingAccessPath(token.getName()) and
exists(string argument |
argument = token.getAnArgument() and
not isValidTokenArgumentInIdentifyingAccessPath(token.getName(), argument) and
result =
"Invalid argument '" + argument + "' in token '" + token + "' in access path: " + path
)
or
isValidTokenNameInIdentifyingAccessPath(token.getName()) and
token.getNumArgument() = 0 and
not isValidNoArgumentTokenInIdentifyingAccessPath(token.getName()) and
result = "Invalid token '" + token + "' is missing its arguments, in access path: " + path
)
}
}

View File

@@ -0,0 +1,202 @@
/**
* Contains the language-specific part of the models-as-data implementation found in `ApiGraphModels.qll`.
*
* It must export the following members:
* ```ql
* class Unit // a unit type
* module AccessPathSyntax // a re-export of the AccessPathSyntax module
* class InvokeNode // a type representing an invocation connected to the API graph
* module API // the API graph module
* predicate isPackageUsed(string package)
* API::Node getExtraNodeFromPath(string package, string type, string path, int n)
* API::Node getExtraSuccessorFromNode(API::Node node, AccessPathToken token)
* API::Node getExtraSuccessorFromInvoke(API::InvokeNode node, AccessPathToken token)
* predicate invocationMatchesExtraCallSiteFilter(API::InvokeNode invoke, AccessPathToken token)
* InvokeNode getAnInvocationOf(API::Node node)
* predicate isExtraValidTokenNameInIdentifyingAccessPath(string name)
* predicate isExtraValidNoArgumentTokenInIdentifyingAccessPath(string name)
* predicate isExtraValidTokenArgumentInIdentifyingAccessPath(string name, string argument)
* ```
*/
private import python as PY
private import semmle.python.dataflow.new.DataFlow
private import ApiGraphModels
import semmle.python.ApiGraphs::API as API
class Unit = PY::Unit;
// Re-export libraries needed by ApiGraphModels.qll
import semmle.python.frameworks.data.internal.AccessPathSyntax as AccessPathSyntax
private import AccessPathSyntax
/**
* Holds if models describing `package` may be relevant for the analysis of this database.
*/
predicate isPackageUsed(string package) { exists(API::moduleImport(package)) }
/** Gets a Python-specific interpretation of the `(package, type, path)` tuple after resolving the first `n` access path tokens. */
bindingset[package, type, path]
API::Node getExtraNodeFromPath(string package, string type, AccessPath path, int n) {
type = "" and
n = 0 and
result = API::moduleImport(package) and
exists(path)
}
/**
* Gets a Python-specific API graph successor of `node` reachable by resolving `token`.
*/
bindingset[token]
API::Node getExtraSuccessorFromNode(API::Node node, AccessPathToken token) {
token.getName() = "Member" and
result = node.getMember(token.getAnArgument())
or
token.getName() = "Instance" and
result = node.getReturn() // In Python `Instance` is just an alias for `ReturnValue`
or
token.getName() = "Awaited" and
result = node.getAwaited()
or
token.getName() = "Subclass" and
result = node.getASubclass*()
or
token.getName() = "Method" and
result = node.getMember(token.getAnArgument()).getReturn()
or
token.getName() = ["Argument", "Parameter"] and
(
token.getAnArgument() = "self" and
result = node.getSelfParameter()
or
exists(string name | token.getAnArgument() = name + ":" |
result = node.getKeywordParameter(name)
)
or
token.getAnArgument() = "any" and
result = [node.getParameter(_), node.getKeywordParameter(_)]
or
token.getAnArgument() = "any-named" and
result = node.getKeywordParameter(_)
)
// Some features don't have MaD tokens yet, they would need to be added to API-graphs first.
// - decorators ("DecoratedClass", "DecoratedMember", "DecoratedParameter")
// - Array/Map elements ("ArrayElement", "Element", "MapKey", "MapValue")
}
/**
* Gets a Python-specific API graph successor of `node` reachable by resolving `token`.
*/
bindingset[token]
API::Node getExtraSuccessorFromInvoke(API::CallNode node, AccessPathToken token) {
token.getName() = "Instance" and
result = node.getReturn()
or
token.getName() = ["Argument", "Parameter"] and
(
token.getAnArgument() = "self" and
result = node.getSelfParameter()
or
token.getAnArgument() = "any" and
result = [node.getParameter(_), node.getKeywordParameter(_)]
or
token.getAnArgument() = "any-named" and
result = node.getKeywordParameter(_)
or
exists(string arg | arg + ":" = token.getAnArgument() | result = node.getKeywordParameter(arg))
)
}
/**
* Holds if `invoke` matches the PY-specific call site filter in `token`.
*/
bindingset[token]
predicate invocationMatchesExtraCallSiteFilter(API::CallNode invoke, AccessPathToken token) {
token.getName() = "Call" and exists(invoke) // there is only one kind of call in Python.
}
/**
* Holds if `path` is an input or output spec for a summary with the given `base` node.
*/
pragma[nomagic]
private predicate relevantInputOutputPath(API::CallNode base, AccessPath inputOrOutput) {
exists(string package, string type, string input, string output, string path |
ModelOutput::relevantSummaryModel(package, type, path, input, output, _) and
ModelOutput::resolvedSummaryBase(package, type, path, base) and
inputOrOutput = [input, output]
)
}
/**
* Gets the API node for the first `n` tokens of the given input/output path, evaluated relative to `baseNode`.
*/
private API::Node getNodeFromInputOutputPath(API::CallNode baseNode, AccessPath path, int n) {
relevantInputOutputPath(baseNode, path) and
(
n = 1 and
result = getSuccessorFromInvoke(baseNode, path.getToken(0))
or
result =
getSuccessorFromNode(getNodeFromInputOutputPath(baseNode, path, n - 1), path.getToken(n - 1))
)
}
/**
* Gets the API node for the given input/output path, evaluated relative to `baseNode`.
*/
private API::Node getNodeFromInputOutputPath(API::CallNode baseNode, AccessPath path) {
result = getNodeFromInputOutputPath(baseNode, path, path.getNumToken())
}
/**
* Holds if a CSV summary contributed the step `pred -> succ` of the given `kind`.
*/
predicate summaryStep(API::Node pred, API::Node succ, string kind) {
exists(
string package, string type, string path, API::CallNode base, AccessPath input,
AccessPath output
|
ModelOutput::relevantSummaryModel(package, type, path, input, output, kind) and
ModelOutput::resolvedSummaryBase(package, type, path, base) and
pred = getNodeFromInputOutputPath(base, input) and
succ = getNodeFromInputOutputPath(base, output)
)
}
class InvokeNode = API::CallNode;
/** Gets an `InvokeNode` corresponding to an invocation of `node`. */
InvokeNode getAnInvocationOf(API::Node node) { result = node.getACall() }
/**
* Holds if `name` is a valid name for an access path token in the identifying access path.
*/
bindingset[name]
predicate isExtraValidTokenNameInIdentifyingAccessPath(string name) {
name = ["Member", "Instance", "Awaited", "Call", "Method", "Subclass"]
}
/**
* Holds if `name` is a valid name for an access path token with no arguments, occurring
* in an identifying access path.
*/
predicate isExtraValidNoArgumentTokenInIdentifyingAccessPath(string name) {
name = ["Instance", "Awaited", "Call", "Subclass"]
}
/**
* Holds if `argument` is a valid argument to an access path token with the given `name`, occurring
* in an identifying access path.
*/
bindingset[name, argument]
predicate isExtraValidTokenArgumentInIdentifyingAccessPath(string name, string argument) {
name = ["Member", "Method"] and
exists(argument)
or
name = ["Argument", "Parameter"] and
(
argument = ["self", "any", "any-named"]
or
argument.regexpMatch("\\w+:") // keyword argument
)
}

View File

@@ -59,6 +59,12 @@ module PathInjection {
FileSystemAccessAsSink() { this = any(FileSystemAccess e).getAPathArgument() }
}
private import semmle.python.frameworks.data.ModelsAsData
private class DataAsFileSink extends Sink {
DataAsFileSink() { this = ModelOutput::getASinkNode("path-injection").getARhs() }
}
/**
* A comparison with a constant string, considered as a sanitizer-guard.
*/

View File

@@ -60,4 +60,11 @@ module SqlInjection {
* A comparison with a constant string, considered as a sanitizer-guard.
*/
class StringConstCompareAsSanitizerGuard extends SanitizerGuard, StringConstCompare { }
private import semmle.python.frameworks.data.ModelsAsData
/** A sink for sql-injection from model data. */
private class DataAsSqlSink extends Sink {
DataAsSqlSink() { this = ModelOutput::getASinkNode("sql-injection").getARhs() }
}
}

View File

@@ -0,0 +1,46 @@
import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.internal.PrintNode
private import semmle.python.frameworks.data.ModelsAsData
// need to import Frameworks to get the actual modeling imported
private import semmle.python.Frameworks
// this import needs to be public to get the query predicates propagated to the actual test files
import TestUtilities.InlineExpectationsTest
class MadSinkTest extends InlineExpectationsTest {
MadSinkTest() { this = "MadSinkTest" }
override string getARelevantTag() {
exists(string kind | exists(ModelOutput::getASinkNode(kind)) | result = "mad-sink__" + kind)
}
override predicate hasActualResult(Location location, string element, string tag, string value) {
exists(location.getFile().getRelativePath()) and
exists(DataFlow::Node sink, string kind |
sink = ModelOutput::getASinkNode(kind).getARhs() and
location = sink.getLocation() and
element = sink.toString() and
value = prettyNodeForInlineTest(sink) and
tag = "mad-sink__" + kind
)
}
}
class MadSourceTest extends InlineExpectationsTest {
MadSourceTest() { this = "MadSourceTest" }
override string getARelevantTag() {
exists(string kind | exists(ModelOutput::getASourceNode(kind)) | result = "mad-source__" + kind)
}
override predicate hasActualResult(Location location, string element, string tag, string value) {
exists(location.getFile().getRelativePath()) and
exists(DataFlow::Node source, string kind |
source = ModelOutput::getASourceNode(kind).getAnImmediateUse() and
location = source.getLocation() and
element = source.toString() and
value = prettyNodeForInlineTest(source) and
tag = "mad-source__" + kind
)
}
}

View File

@@ -0,0 +1,2 @@
import python
import experimental.meta.MaDTest

View File

@@ -7,17 +7,17 @@ async def test_connection():
try:
# The file-like object is passed in as a keyword-only argument.
# See https://magicstack.github.io/asyncpg/current/api/index.html#asyncpg.connection.Connection.copy_from_query
await conn.copy_from_query("sql", output="filepath") # $ getSql="sql" getAPathArgument="filepath"
await conn.copy_from_query("sql", "arg1", "arg2", output="filepath") # $ getSql="sql" getAPathArgument="filepath"
await conn.copy_from_query("sql", output="filepath") # $ mad-sink__sql-injection="sql" mad-sink__path-injection="filepath"
await conn.copy_from_query("sql", "arg1", "arg2", output="filepath") # $ mad-sink__sql-injection="sql" mad-sink__path-injection="filepath"
await conn.copy_from_table("table", output="filepath") # $ getAPathArgument="filepath"
await conn.copy_to_table("table", source="filepath") # $ getAPathArgument="filepath"
await conn.copy_from_table("table", output="filepath") # $ mad-sink__path-injection="filepath"
await conn.copy_to_table("table", source="filepath") # $ mad-sink__path-injection="filepath"
await conn.execute("sql") # $ getSql="sql"
await conn.executemany("sql") # $ getSql="sql"
await conn.fetch("sql") # $ getSql="sql"
await conn.fetchrow("sql") # $ getSql="sql"
await conn.fetchval("sql") # $ getSql="sql"
await conn.execute("sql") # $ mad-sink__sql-injection="sql"
await conn.executemany("sql") # $ mad-sink__sql-injection="sql"
await conn.fetch("sql") # $ mad-sink__sql-injection="sql"
await conn.fetchrow("sql") # $ mad-sink__sql-injection="sql"
await conn.fetchval("sql") # $ mad-sink__sql-injection="sql"
finally:
await conn.close()
@@ -27,11 +27,11 @@ async def test_prepared_statement():
conn = await asyncpg.connect()
try:
pstmt = await conn.prepare("psql") # $ constructedSql="psql"
pstmt.executemany() # $ getSql="psql"
pstmt.fetch() # $ getSql="psql"
pstmt.fetchrow() # $ getSql="psql"
pstmt.fetchval() # $ getSql="psql"
pstmt = await conn.prepare("psql") # $ mad-sink__sql-injection="psql"
pstmt.executemany()
pstmt.fetch()
pstmt.fetchrow()
pstmt.fetchval()
finally:
await conn.close()
@@ -46,7 +46,7 @@ async def test_cursor():
cursor = await conn.cursor("sql") # $ getSql="sql" constructedSql="sql"
await cursor.fetch()
pstmt = await conn.prepare("psql") # $ constructedSql="psql"
pstmt = await conn.prepare("psql") # $ mad-sink__sql-injection="psql"
pcursor = await pstmt.cursor() # $ getSql="psql"
await pcursor.fetch()
@@ -69,23 +69,23 @@ async def test_connection_pool():
pool = await asyncpg.create_pool()
try:
await pool.copy_from_query("sql", output="filepath") # $ getSql="sql" getAPathArgument="filepath"
await pool.copy_from_query("sql", "arg1", "arg2", output="filepath") # $ getSql="sql" getAPathArgument="filepath"
await pool.copy_from_table("table", output="filepath") # $ getAPathArgument="filepath"
await pool.copy_to_table("table", source="filepath") # $ getAPathArgument="filepath"
await pool.copy_from_query("sql", output="filepath") # $ mad-sink__sql-injection="sql" mad-sink__path-injection="filepath"
await pool.copy_from_query("sql", "arg1", "arg2", output="filepath") # $ mad-sink__sql-injection="sql" mad-sink__path-injection="filepath"
await pool.copy_from_table("table", output="filepath") # $ mad-sink__path-injection="filepath"
await pool.copy_to_table("table", source="filepath") # $ mad-sink__path-injection="filepath"
await pool.execute("sql") # $ getSql="sql"
await pool.executemany("sql") # $ getSql="sql"
await pool.fetch("sql") # $ getSql="sql"
await pool.fetchrow("sql") # $ getSql="sql"
await pool.fetchval("sql") # $ getSql="sql"
await pool.execute("sql") # $ mad-sink__sql-injection="sql"
await pool.executemany("sql") # $ mad-sink__sql-injection="sql"
await pool.fetch("sql") # $ mad-sink__sql-injection="sql"
await pool.fetchrow("sql") # $ mad-sink__sql-injection="sql"
await pool.fetchval("sql") # $ mad-sink__sql-injection="sql"
async with pool.acquire() as conn:
await conn.execute("sql") # $ getSql="sql"
await conn.execute("sql") # $ mad-sink__sql-injection="sql"
conn = await pool.acquire()
try:
await conn.fetch("sql") # $ getSql="sql"
await conn.fetch("sql") # $ mad-sink__sql-injection="sql"
finally:
await pool.release(conn)
@@ -93,13 +93,13 @@ async def test_connection_pool():
await pool.close()
async with asyncpg.create_pool() as pool:
await pool.execute("sql") # $ getSql="sql"
await pool.execute("sql") # $ mad-sink__sql-injection="sql"
async with pool.acquire() as conn:
await conn.execute("sql") # $ getSql="sql"
await conn.execute("sql") # $ mad-sink__sql-injection="sql"
conn = await pool.acquire()
try:
await conn.fetch("sql") # $ getSql="sql"
await conn.fetch("sql") # $ mad-sink__sql-injection="sql"
finally:
await pool.release(conn)

View File

@@ -0,0 +1,103 @@
taintFlow
| test.py:3:5:3:15 | ControlFlowNode for getSource() | test.py:4:8:4:8 | ControlFlowNode for x |
| test.py:3:5:3:15 | ControlFlowNode for getSource() | test.py:7:17:7:17 | ControlFlowNode for x |
| test.py:9:8:9:14 | ControlFlowNode for alias() | test.py:9:8:9:14 | ControlFlowNode for alias() |
| test.py:10:8:10:22 | ControlFlowNode for Attribute() | test.py:10:8:10:22 | ControlFlowNode for Attribute() |
| test.py:11:8:11:30 | ControlFlowNode for Attribute() | test.py:11:8:11:30 | ControlFlowNode for Attribute() |
| test.py:71:28:71:38 | ControlFlowNode for getSource() | test.py:71:8:71:39 | ControlFlowNode for Attribute() |
| test.py:75:5:75:15 | ControlFlowNode for getSource() | test.py:76:22:76:22 | ControlFlowNode for x |
| test.py:75:5:75:15 | ControlFlowNode for getSource() | test.py:77:22:77:22 | ControlFlowNode for y |
| test.py:81:36:81:46 | ControlFlowNode for getSource() | test.py:81:8:81:47 | ControlFlowNode for Attribute() |
| test.py:83:50:83:60 | ControlFlowNode for getSource() | test.py:83:8:83:61 | ControlFlowNode for Attribute() |
| test.py:86:49:86:59 | ControlFlowNode for getSource() | test.py:86:8:86:60 | ControlFlowNode for Attribute() |
| test.py:87:56:87:66 | ControlFlowNode for getSource() | test.py:87:8:87:67 | ControlFlowNode for Attribute() |
isSink
| test.py:4:8:4:8 | ControlFlowNode for x | test-sink |
| test.py:7:17:7:17 | ControlFlowNode for x | test-sink |
| test.py:9:8:9:14 | ControlFlowNode for alias() | test-sink |
| test.py:10:8:10:22 | ControlFlowNode for Attribute() | test-sink |
| test.py:11:8:11:30 | ControlFlowNode for Attribute() | test-sink |
| test.py:12:8:12:34 | ControlFlowNode for Attribute() | test-sink |
| test.py:16:11:16:13 | ControlFlowNode for one | test-sink |
| test.py:17:19:17:21 | ControlFlowNode for two | test-sink |
| test.py:17:24:17:28 | ControlFlowNode for three | test-sink |
| test.py:17:31:17:34 | ControlFlowNode for four | test-sink |
| test.py:18:37:18:40 | ControlFlowNode for five | test-sink |
| test.py:19:21:19:26 | ControlFlowNode for second | test-sink |
| test.py:30:21:30:23 | ControlFlowNode for one | test-sink |
| test.py:32:22:32:24 | ControlFlowNode for one | test-sink |
| test.py:32:27:32:29 | ControlFlowNode for two | test-sink |
| test.py:33:22:33:24 | ControlFlowNode for one | test-sink |
| test.py:33:27:33:29 | ControlFlowNode for two | test-sink |
| test.py:33:32:33:36 | ControlFlowNode for three | test-sink |
| test.py:57:27:57:33 | ControlFlowNode for arg_pos | test-sink |
| test.py:66:17:66:20 | ControlFlowNode for arg1 | test-sink |
| test.py:66:23:66:26 | ControlFlowNode for arg2 | test-sink |
| test.py:66:34:66:43 | ControlFlowNode for namedThing | test-sink |
| test.py:67:34:67:44 | ControlFlowNode for secondNamed | test-sink |
| test.py:71:8:71:39 | ControlFlowNode for Attribute() | test-sink |
| test.py:72:8:72:47 | ControlFlowNode for Attribute() | test-sink |
| test.py:76:22:76:22 | ControlFlowNode for x | test-sink |
| test.py:77:22:77:22 | ControlFlowNode for y | test-sink |
| test.py:78:22:78:22 | ControlFlowNode for z | test-sink |
| test.py:81:8:81:47 | ControlFlowNode for Attribute() | test-sink |
| test.py:82:8:82:54 | ControlFlowNode for Attribute() | test-sink |
| test.py:83:8:83:61 | ControlFlowNode for Attribute() | test-sink |
| test.py:85:8:85:53 | ControlFlowNode for Attribute() | test-sink |
| test.py:86:8:86:60 | ControlFlowNode for Attribute() | test-sink |
| test.py:87:8:87:67 | ControlFlowNode for Attribute() | test-sink |
| test.py:89:21:89:23 | ControlFlowNode for one | test-sink |
| test.py:91:21:91:23 | ControlFlowNode for one | test-sink |
| test.py:91:30:91:32 | ControlFlowNode for two | test-sink |
| test.py:98:6:98:9 | ControlFlowNode for baz2 | test-sink |
isSource
| test.py:3:5:3:15 | ControlFlowNode for getSource() | test-source |
| test.py:9:8:9:14 | ControlFlowNode for alias() | test-source |
| test.py:10:8:10:14 | ControlFlowNode for alias() | test-source |
| test.py:10:8:10:22 | ControlFlowNode for Attribute() | test-source |
| test.py:11:8:11:14 | ControlFlowNode for alias() | test-source |
| test.py:11:8:11:22 | ControlFlowNode for Attribute() | test-source |
| test.py:11:8:11:30 | ControlFlowNode for Attribute() | test-source |
| test.py:12:8:12:14 | ControlFlowNode for alias() | test-source |
| test.py:12:8:12:22 | ControlFlowNode for Attribute() | test-source |
| test.py:23:24:23:26 | ControlFlowNode for one | test-source |
| test.py:24:33:24:35 | ControlFlowNode for two | test-source |
| test.py:24:38:24:42 | ControlFlowNode for three | test-source |
| test.py:24:45:24:48 | ControlFlowNode for four | test-source |
| test.py:25:34:25:39 | ControlFlowNode for second | test-source |
| test.py:39:11:39:20 | ControlFlowNode for Await | test-source |
| test.py:41:8:41:27 | ControlFlowNode for Attribute() | test-source |
| test.py:46:7:46:16 | ControlFlowNode for SubClass() | test-source |
| test.py:53:7:53:16 | ControlFlowNode for Attribute() | test-source |
| test.py:60:13:60:16 | ControlFlowNode for self | test-source |
| test.py:60:24:60:28 | ControlFlowNode for named | test-source |
| test.py:63:36:63:39 | ControlFlowNode for arg2 | test-source |
| test.py:63:42:63:45 | ControlFlowNode for arg3 | test-source |
| test.py:63:48:63:51 | ControlFlowNode for arg4 | test-source |
| test.py:63:54:63:57 | ControlFlowNode for arg5 | test-source |
| test.py:71:28:71:38 | ControlFlowNode for getSource() | test-source |
| test.py:72:36:72:46 | ControlFlowNode for getSource() | test-source |
| test.py:75:5:75:15 | ControlFlowNode for getSource() | test-source |
| test.py:81:36:81:46 | ControlFlowNode for getSource() | test-source |
| test.py:82:43:82:53 | ControlFlowNode for getSource() | test-source |
| test.py:83:50:83:60 | ControlFlowNode for getSource() | test-source |
| test.py:85:42:85:52 | ControlFlowNode for getSource() | test-source |
| test.py:86:49:86:59 | ControlFlowNode for getSource() | test-source |
| test.py:87:56:87:66 | ControlFlowNode for getSource() | test-source |
| test.py:101:29:101:31 | ControlFlowNode for arg | test-source |
| test.py:104:24:104:29 | ControlFlowNode for param1 | test-source |
| test.py:104:32:104:37 | ControlFlowNode for param2 | test-source |
| test.py:107:24:107:28 | ControlFlowNode for name1 | test-source |
| test.py:107:31:107:35 | ControlFlowNode for name2 | test-source |
syntaxErrors
| Member[foo |
| Member[foo] .Member[bar] |
| Member[foo] Member[bar] |
| Member[foo], Member[bar] |
| Member[foo],Member[bar] |
| Member[foo]. Member[bar] |
| Member[foo]..Member[bar] |
| Member[foo]Member[bar] |
| Member[foo]] |
| Member[foo]].Member[bar] |
warning

View File

@@ -0,0 +1,108 @@
from testlib import getSource, mySink, alias
x = getSource()
mySink(x)
mySink(foo=x) # OK
mySink(sinkName=x) # NOT OK
mySink(alias()) # NOT OK
mySink(alias().chain()) # NOT OK
mySink(alias().chain().chain()) # NOT OK
mySink(alias().chain().safeThing()) # OK
from testlib import Args
Args.arg0(one, two, three, four, five)
Args.arg1to3(one, two, three, four, five)
Args.lastarg(one, two, three, four, five)
Args.nonFist(first, second)
from testlib import Callbacks
Callbacks.first(lambda one, two, three, four, five: 0)
Callbacks.param1to3(lambda one, two, three, four, five: 0)
Callbacks.nonFirst(lambda first, second: 0)
from testlib import CallFilter
CallFilter.arityOne(one, two) # NO match
CallFilter.arityOne(one) # Match
CallFilter.twoOrMore(one) # NO match
CallFilter.twoOrMore(one, two) # Match
CallFilter.twoOrMore(one, two, three) # Match
from testlib import CommonTokens
async def async_func():
prom = CommonTokens.makePromise(1);
val = await prom
inst = CommonTokens.Class()
class SubClass (CommonTokens.Super):
pass
sub = SubClass()
class Sub2Class (CommonTokens.Class):
pass
sub2 = Sub2Class() # TODO: Currently not recognized as an instance of CommonTokens.Class
val = inst.foo()
from testlib import ArgPos
arg_pos = ArgPos(); val = arg_pos.self_thing(arg, named=2);
class SubClass (ArgPos.MyClass):
def foo(self, arg, named=2, otherName=3):
pass
def secondAndAfter(self, arg1, arg2, arg3, arg4, arg5):
pass
ArgPos.anyParam(arg1, arg2, name=namedThing)
ArgPos.anyNamed(arg4, arg5, name=secondNamed)
from testlib import Steps
mySink(Steps.preserveTaint(getSource())) # FLOW
mySink(Steps.preserveTaint("safe", getSource())) # NO FLOW
Steps.taintIntoCallback(
getSource(),
lambda x: mySink(x), # FLOW
lambda y: mySink(y), # FLOW
lambda z: mySink(z) # NO FLOW
)
mySink(Steps.preserveArgZeroAndTwo(getSource())) # FLOW
mySink(Steps.preserveArgZeroAndTwo("foo", getSource())) # NO FLOW
mySink(Steps.preserveArgZeroAndTwo("foo", "bar", getSource())) # FLOW
mySink(Steps.preserveAllButFirstArgument(getSource())) # NO FLOW
mySink(Steps.preserveAllButFirstArgument("foo", getSource())) # FLOW
mySink(Steps.preserveAllButFirstArgument("foo", "bar", getSource())) # FLOW
CallFilter.arityOne(one) # match
CallFilter.arityOne(one=one) # NO match
CallFilter.arityOne(one, two=two) # match - on both the named and positional arguments
CallFilter.arityOne(one=one, two=two) # NO match
from foo1.bar import baz1
baz1(baz1) # no match, and that's the point.
from foo2.bar import baz2
baz2(baz2) # match
class OtherSubClass (ArgPos.MyClass):
def otherSelfTest(self, arg, named=2, otherName=3): # test that Parameter[0] hits `arg`.
pass
def anyParam(self, param1, param2): # Parameter[any] matches all non-self parameters
pass
def anyNamed(self, name1, name2=2): # Parameter[any-named] matches all non-self named parameters
pass

View File

@@ -0,0 +1,127 @@
import python
import semmle.python.frameworks.data.internal.AccessPathSyntax as AccessPathSyntax
import semmle.python.frameworks.data.ModelsAsData
import semmle.python.dataflow.new.TaintTracking
import semmle.python.dataflow.new.DataFlow
private import semmle.python.ApiGraphs
class Steps extends ModelInput::SummaryModelCsv {
override predicate row(string row) {
// package;type;path;input;output;kind
row =
[
"testlib;;Member[Steps].Member[preserveTaint].Call;Argument[0];ReturnValue;taint",
"testlib;;Member[Steps].Member[taintIntoCallback];Argument[0];Argument[1..2].Parameter[0];taint",
"testlib;;Member[Steps].Member[preserveArgZeroAndTwo];Argument[0,2];ReturnValue;taint",
"testlib;;Member[Steps].Member[preserveAllButFirstArgument].Call;Argument[1..];ReturnValue;taint",
]
}
}
class Types extends ModelInput::TypeModelCsv {
override predicate row(string row) {
// package1;type1;package2;type2;path
row =
[
"testlib;Alias;testlib;;Member[alias].ReturnValue",
"testlib;Alias;testlib;Alias;Member[chain].ReturnValue",
]
}
}
class Sinks extends ModelInput::SinkModelCsv {
override predicate row(string row) {
// package;type;path;kind
row =
[
"testlib;;Member[mySink].Argument[0,sinkName:];test-sink",
// testing argument syntax
"testlib;;Member[Args].Member[arg0].Argument[0];test-sink", //
"testlib;;Member[Args].Member[arg1to3].Argument[1..3];test-sink", //
"testlib;;Member[Args].Member[lastarg].Argument[N-1];test-sink", //
"testlib;;Member[Args].Member[nonFist].Argument[1..];test-sink", //
// callsite filter.
"testlib;;Member[CallFilter].Member[arityOne].WithArity[1].Argument[any];test-sink", //
"testlib;;Member[CallFilter].Member[twoOrMore].WithArity[2..].Argument[0..];test-sink", //
// testing non-positional arguments
"testlib;;Member[ArgPos].Instance.Member[self_thing].Argument[self];test-sink", //
// any argument
"testlib;;Member[ArgPos].Member[anyParam].Argument[any];test-sink", //
"testlib;;Member[ArgPos].Member[anyNamed].Argument[any-named];test-sink", //
// testing package syntax
"foo1.bar;;Member[baz1].Argument[any];test-sink", //
"foo2;;Member[bar].Member[baz2].Argument[any];test-sink", //
]
}
}
class Sources extends ModelInput::SourceModelCsv {
// package;type;path;kind
override predicate row(string row) {
row =
[
"testlib;;Member[getSource].ReturnValue;test-source", //
"testlib;Alias;;test-source",
// testing parameter syntax
"testlib;;Member[Callbacks].Member[first].Argument[0].Parameter[0];test-source", //
"testlib;;Member[Callbacks].Member[param1to3].Argument[0].Parameter[1..3];test-source", //
"testlib;;Member[Callbacks].Member[nonFirst].Argument[0].Parameter[1..];test-source", //
// Common tokens.
"testlib;;Member[CommonTokens].Member[makePromise].ReturnValue.Awaited;test-source", //
"testlib;;Member[CommonTokens].Member[Class].Instance;test-source", //
"testlib;;Member[CommonTokens].Member[Super].Subclass.Instance;test-source", //
// method
"testlib;;Member[CommonTokens].Member[Class].Instance.Method[foo];test-source", //
// testing non-positional arguments
"testlib;;Member[ArgPos].Member[MyClass].Subclass.Member[foo].Parameter[self];test-source", //
"testlib;;Member[ArgPos].Member[MyClass].Subclass.Member[foo].Parameter[named:];test-source", //
"testlib;;Member[ArgPos].Member[MyClass].Subclass.Member[secondAndAfter].Parameter[1..];test-source", //
"testlib;;Member[ArgPos].Member[MyClass].Subclass.Member[otherSelfTest].Parameter[0];test-source", //
"testlib;;Member[ArgPos].Member[MyClass].Subclass.Member[anyParam].Parameter[any];test-source", //
"testlib;;Member[ArgPos].Member[MyClass].Subclass.Member[anyNamed].Parameter[any-named];test-source", //
]
}
}
class BasicTaintTracking extends TaintTracking::Configuration {
BasicTaintTracking() { this = "BasicTaintTracking" }
override predicate isSource(DataFlow::Node source) {
source = ModelOutput::getASourceNode("test-source").getAnImmediateUse()
}
override predicate isSink(DataFlow::Node sink) {
sink = ModelOutput::getASinkNode("test-sink").getARhs()
}
}
query predicate taintFlow(DataFlow::Node source, DataFlow::Node sink) {
any(BasicTaintTracking tr).hasFlow(source, sink)
}
query predicate isSink(DataFlow::Node node, string kind) {
node = ModelOutput::getASinkNode(kind).getARhs()
}
query predicate isSource(DataFlow::Node node, string kind) {
node = ModelOutput::getASourceNode(kind).getAnImmediateUse()
}
class SyntaxErrorTest extends ModelInput::SinkModelCsv {
override predicate row(string row) {
row =
[
"testlib;;Member[foo],Member[bar];test-sink", "testlib;;Member[foo] Member[bar];test-sink",
"testlib;;Member[foo]. Member[bar];test-sink",
"testlib;;Member[foo], Member[bar];test-sink",
"testlib;;Member[foo]..Member[bar];test-sink",
"testlib;;Member[foo] .Member[bar];test-sink", "testlib;;Member[foo]Member[bar];test-sink",
"testlib;;Member[foo;test-sink", "testlib;;Member[foo]];test-sink",
"testlib;;Member[foo]].Member[bar];test-sink"
]
}
}
query predicate syntaxErrors(AccessPathSyntax::AccessPath path) { path.hasSyntaxError() }
query predicate warning = ModelOutput::getAWarning/0;

View File

@@ -0,0 +1,7 @@
| CSV type row should have 5 columns but has 2: test;TooFewColumns |
| CSV type row should have 5 columns but has 8: test;TooManyColumns;;;Member[Foo].Instance;too;many;columns |
| Invalid argument '0-1' in token 'Argument[0-1]' in access path: Method[foo].Argument[0-1] |
| Invalid argument '*' in token 'Argument[*]' in access path: Method[foo].Argument[*] |
| Invalid token 'Argument' is missing its arguments, in access path: Method[foo].Argument |
| Invalid token 'Member' is missing its arguments, in access path: Method[foo].Member |
| Invalid token name 'Arg' in access path: Method[foo].Arg[0] |

View File

@@ -0,0 +1,25 @@
import python
import semmle.python.frameworks.data.internal.AccessPathSyntax as AccessPathSyntax
import semmle.python.frameworks.data.internal.ApiGraphModels as ApiGraphModels
import semmle.python.frameworks.data.ModelsAsData
private class InvalidTypeModel extends ModelInput::TypeModelCsv {
override predicate row(string row) {
row =
[
"test;TooManyColumns;;;Member[Foo].Instance;too;many;columns", //
"test;TooFewColumns", //
"test;X;test;Y;Method[foo].Arg[0]", //
"test;X;test;Y;Method[foo].Argument[0-1]", //
"test;X;test;Y;Method[foo].Argument[*]", //
"test;X;test;Y;Method[foo].Argument", //
"test;X;test;Y;Method[foo].Member", //
]
}
}
class IsTesting extends ApiGraphModels::TestAllModels {
IsTesting() { this = this }
}
query predicate warning = ModelOutput::getAWarning/0;

View File

@@ -299,7 +299,7 @@ private class AccessPathRange extends AccessPath::Range {
bindingset[token]
API::Node getSuccessorFromNode(API::Node node, AccessPathToken token) {
// API graphs use the same label for arguments and parameters. An edge originating from a
// use-node represents be an argument, and an edge originating from a def-node represents a parameter.
// use-node represents an argument, and an edge originating from a def-node represents a parameter.
// We just map both to the same thing.
token.getName() = ["Argument", "Parameter"] and
result = node.getParameter(AccessPath::parseIntUnbounded(token.getAnArgument()))