From 1c2c9159a9053d49ae0ed491c502b6dd1c1d9deb Mon Sep 17 00:00:00 2001 From: Erik Krogh Kristensen Date: Tue, 26 Apr 2022 12:43:34 +0200 Subject: [PATCH 01/51] initial MaD implementation for Python --- config/identical-files.json | 6 +- python/ql/lib/semmle/python/ApiGraphs.qll | 3 + python/ql/lib/semmle/python/Frameworks.qll | 1 + .../python/frameworks/data/ModelsAsData.qll | 46 ++ .../data/internal/AccessPathSyntax.qll | 175 ++++++ .../data/internal/ApiGraphModels.qll | 522 ++++++++++++++++++ .../data/internal/ApiGraphModelsSpecific.qll | 173 ++++++ .../frameworks/data/test.expected | 18 + .../library-tests/frameworks/data/test.py | 4 + .../library-tests/frameworks/data/test.ql | 85 +++ .../frameworks/data/warnings.expected | 12 + .../library-tests/frameworks/data/warnings.ql | 25 + 12 files changed, 1068 insertions(+), 2 deletions(-) create mode 100644 python/ql/lib/semmle/python/frameworks/data/ModelsAsData.qll create mode 100644 python/ql/lib/semmle/python/frameworks/data/internal/AccessPathSyntax.qll create mode 100644 python/ql/lib/semmle/python/frameworks/data/internal/ApiGraphModels.qll create mode 100644 python/ql/lib/semmle/python/frameworks/data/internal/ApiGraphModelsSpecific.qll create mode 100644 python/ql/test/library-tests/frameworks/data/test.expected create mode 100644 python/ql/test/library-tests/frameworks/data/test.py create mode 100644 python/ql/test/library-tests/frameworks/data/test.ql create mode 100644 python/ql/test/library-tests/frameworks/data/warnings.expected create mode 100644 python/ql/test/library-tests/frameworks/data/warnings.ql diff --git a/config/identical-files.json b/config/identical-files.json index 2ff65c453f0..4ce332ca7f1 100644 --- a/config/identical-files.json +++ b/config/identical-files.json @@ -515,7 +515,8 @@ "csharp/ql/lib/semmle/code/csharp/dataflow/internal/AccessPathSyntax.qll", "java/ql/lib/semmle/code/java/dataflow/internal/AccessPathSyntax.qll", "javascript/ql/lib/semmle/javascript/frameworks/data/internal/AccessPathSyntax.qll", - "ruby/ql/lib/codeql/ruby/dataflow/internal/AccessPathSyntax.qll" + "ruby/ql/lib/codeql/ruby/dataflow/internal/AccessPathSyntax.qll", + "python/ql/lib/semmle/python/frameworks/data/internal/AccessPathSyntax.qll" ], "IncompleteUrlSubstringSanitization": [ "javascript/ql/src/Security/CWE-020/IncompleteUrlSubstringSanitization.qll", @@ -533,7 +534,8 @@ ], "ApiGraphModels": [ "javascript/ql/lib/semmle/javascript/frameworks/data/internal/ApiGraphModels.qll", - "ruby/ql/lib/codeql/ruby/frameworks/data/internal/ApiGraphModels.qll" + "ruby/ql/lib/codeql/ruby/frameworks/data/internal/ApiGraphModels.qll", + "python/ql/lib/semmle/python/frameworks/data/internal/ApiGraphModels.qll" ], "TaintedFormatStringQuery Ruby/JS": [ "javascript/ql/lib/semmle/javascript/security/dataflow/TaintedFormatStringQuery.qll", diff --git a/python/ql/lib/semmle/python/ApiGraphs.qll b/python/ql/lib/semmle/python/ApiGraphs.qll index 5c848fcb69e..53c78dd3c19 100644 --- a/python/ql/lib/semmle/python/ApiGraphs.qll +++ b/python/ql/lib/semmle/python/ApiGraphs.qll @@ -339,6 +339,9 @@ module API { result = callee.getReturn() and result.getAnImmediateUse() = this } + + /** Gets the number of arguments of this call. */ + int getNumArgument() { result = count(this.getArg(_)) } } /** diff --git a/python/ql/lib/semmle/python/Frameworks.qll b/python/ql/lib/semmle/python/Frameworks.qll index b94b8aee5d9..9bcbfe13576 100644 --- a/python/ql/lib/semmle/python/Frameworks.qll +++ b/python/ql/lib/semmle/python/Frameworks.qll @@ -12,6 +12,7 @@ private import semmle.python.frameworks.Asyncpg private import semmle.python.frameworks.ClickhouseDriver private import semmle.python.frameworks.Cryptodome private import semmle.python.frameworks.Cryptography +private import semmle.python.frameworks.data.ModelsAsData private import semmle.python.frameworks.Dill private import semmle.python.frameworks.Django private import semmle.python.frameworks.Fabric diff --git a/python/ql/lib/semmle/python/frameworks/data/ModelsAsData.qll b/python/ql/lib/semmle/python/frameworks/data/ModelsAsData.qll new file mode 100644 index 00000000000..6c45c9a4036 --- /dev/null +++ b/python/ql/lib/semmle/python/frameworks/data/ModelsAsData.qll @@ -0,0 +1,46 @@ +/** + * Provides classes for contributing a model, or using the interpreted results + * of a model represented as data. + * + * - Use the `ModelInput` module to contribute new models. + * - Use the `ModelOutput` module to access the model results in terms of API nodes. + * + * The package name refers to a Pip package name. + */ + +private import python +private import internal.ApiGraphModels as Shared +private import internal.ApiGraphModelsSpecific as Specific +import Shared::ModelInput as ModelInput +import Shared::ModelOutput as ModelOutput +private import semmle.python.dataflow.new.RemoteFlowSources +private import semmle.python.dataflow.new.DataFlow +private import semmle.python.ApiGraphs +private import semmle.python.dataflow.new.TaintTracking + +/** + * A remote flow source originating from a CSV source row. + */ +private class RemoteFlowSourceFromCsv extends RemoteFlowSource { + RemoteFlowSourceFromCsv() { this = ModelOutput::getASourceNode("remote").getAnImmediateUse() } + + override string getSourceType() { result = "Remote flow" } +} + +/** + * Like `ModelOutput::summaryStep` but with API nodes mapped to data-flow nodes. + */ +private predicate summaryStepNodes(DataFlow::Node pred, DataFlow::Node succ, string kind) { + exists(API::Node predNode, API::Node succNode | + Specific::summaryStep(predNode, succNode, kind) and + pred = predNode.getARhs() and + succ = succNode.getAnImmediateUse() + ) +} + +/** Taint steps induced by summary models of kind `taint`. */ +private class TaintStepFromSummary extends TaintTracking::AdditionalTaintStep { + override predicate step(DataFlow::Node pred, DataFlow::Node succ) { + summaryStepNodes(pred, succ, "taint") + } +} diff --git a/python/ql/lib/semmle/python/frameworks/data/internal/AccessPathSyntax.qll b/python/ql/lib/semmle/python/frameworks/data/internal/AccessPathSyntax.qll new file mode 100644 index 00000000000..3d40e04b815 --- /dev/null +++ b/python/ql/lib/semmle/python/frameworks/data/internal/AccessPathSyntax.qll @@ -0,0 +1,175 @@ +/** + * Module for parsing access paths from CSV models, both the identifying access path used + * by dynamic languages, and the input/output specifications for summary steps. + * + * This file is used by the shared data flow library and by the JavaScript libraries + * (which does not use the shared data flow libraries). + */ + +/** + * Convenience-predicate for extracting two capture groups at once. + */ +bindingset[input, regexp] +private predicate regexpCaptureTwo(string input, string regexp, string capture1, string capture2) { + capture1 = input.regexpCapture(regexp, 1) and + capture2 = input.regexpCapture(regexp, 2) +} + +/** Companion module to the `AccessPath` class. */ +module AccessPath { + /** A string that should be parsed as an access path. */ + abstract class Range extends string { + bindingset[this] + Range() { any() } + } + + /** + * Parses an integer constant `n` or interval `n1..n2` (inclusive) and gets the value + * of the constant or any value contained in the interval. + */ + bindingset[arg] + int parseInt(string arg) { + result = arg.toInt() + or + // Match "n1..n2" + exists(string lo, string hi | + regexpCaptureTwo(arg, "(-?\\d+)\\.\\.(-?\\d+)", lo, hi) and + result = [lo.toInt() .. hi.toInt()] + ) + } + + /** + * Parses a lower-bounded interval `n..` and gets the lower bound. + */ + bindingset[arg] + int parseLowerBound(string arg) { result = arg.regexpCapture("(-?\\d+)\\.\\.", 1).toInt() } + + /** + * Parses an integer constant or interval (bounded or unbounded) that explicitly + * references the arity, such as `N-1` or `N-3..N-1`. + * + * Note that expressions of form `N-x` will never resolve to a negative index, + * even if `N` is zero (it will have no result in that case). + */ + bindingset[arg, arity] + private int parseIntWithExplicitArity(string arg, int arity) { + result >= 0 and // do not allow N-1 to resolve to a negative index + exists(string lo | + // N-x + lo = arg.regexpCapture("N-(\\d+)", 1) and + result = arity - lo.toInt() + or + // N-x.. + lo = arg.regexpCapture("N-(\\d+)\\.\\.", 1) and + result = [arity - lo.toInt(), arity - 1] + ) + or + exists(string lo, string hi | + // x..N-y + regexpCaptureTwo(arg, "(-?\\d+)\\.\\.N-(\\d+)", lo, hi) and + result = [lo.toInt() .. arity - hi.toInt()] + or + // N-x..N-y + regexpCaptureTwo(arg, "N-(\\d+)\\.\\.N-(\\d+)", lo, hi) and + result = [arity - lo.toInt() .. arity - hi.toInt()] and + result >= 0 + or + // N-x..y + regexpCaptureTwo(arg, "N-(\\d+)\\.\\.(\\d+)", lo, hi) and + result = [arity - lo.toInt() .. hi.toInt()] and + result >= 0 + ) + } + + /** + * Parses an integer constant or interval (bounded or unbounded) and gets any + * of the integers contained within (of which there may be infinitely many). + * + * Has no result for arguments involving an explicit arity, such as `N-1`. + */ + bindingset[arg, result] + int parseIntUnbounded(string arg) { + result = parseInt(arg) + or + result >= parseLowerBound(arg) + } + + /** + * Parses an integer constant or interval (bounded or unbounded) that + * may reference the arity of a call, such as `N-1` or `N-3..N-1`. + * + * Note that expressions of form `N-x` will never resolve to a negative index, + * even if `N` is zero (it will have no result in that case). + */ + bindingset[arg, arity] + int parseIntWithArity(string arg, int arity) { + result = parseInt(arg) + or + result in [parseLowerBound(arg) .. arity - 1] + or + result = parseIntWithExplicitArity(arg, arity) + } +} + +/** Gets the `n`th token on the access path as a string. */ +private string getRawToken(AccessPath path, int n) { + // Avoid splitting by '.' since tokens may contain dots, e.g. `Field[foo.Bar.x]`. + // Instead use regexpFind to match valid tokens, and supplement with a final length + // check (in `AccessPath.hasSyntaxError`) to ensure all characters were included in a token. + result = path.regexpFind("\\w+(?:\\[[^\\]]*\\])?(?=\\.|$)", n, _) +} + +/** + * A string that occurs as an access path (either identifying or input/output spec) + * which might be relevant for this database. + */ +class AccessPath extends string instanceof AccessPath::Range { + /** Holds if this string is not a syntactically valid access path. */ + predicate hasSyntaxError() { + // If the lengths match, all characters must haven been included in a token + // or seen by the `.` lookahead pattern. + this != "" and + not this.length() = sum(int n | | getRawToken(this, n).length() + 1) - 1 + } + + /** Gets the `n`th token on the access path (if there are no syntax errors). */ + AccessPathToken getToken(int n) { + result = getRawToken(this, n) and + not this.hasSyntaxError() + } + + /** Gets the number of tokens on the path (if there are no syntax errors). */ + int getNumToken() { + result = count(int n | exists(getRawToken(this, n))) and + not this.hasSyntaxError() + } +} + +/** + * An access part token such as `Argument[1]` or `ReturnValue`, appearing in one or more access paths. + */ +class AccessPathToken extends string { + AccessPathToken() { this = getRawToken(_, _) } + + private string getPart(int part) { + result = this.regexpCapture("([^\\[]+)(?:\\[([^\\]]*)\\])?", part) + } + + /** Gets the name of the token, such as `Member` from `Member[x]` */ + string getName() { result = this.getPart(1) } + + /** + * Gets the argument list, such as `1,2` from `Member[1,2]`, + * or has no result if there are no arguments. + */ + string getArgumentList() { result = this.getPart(2) } + + /** Gets the `n`th argument to this token, such as `x` or `y` from `Member[x,y]`. */ + string getArgument(int n) { result = this.getArgumentList().splitAt(",", n).trim() } + + /** Gets an argument to this token, such as `x` or `y` from `Member[x,y]`. */ + string getAnArgument() { result = this.getArgument(_) } + + /** Gets the number of arguments to this token, such as 2 for `Member[x,y]` or zero for `ReturnValue`. */ + int getNumArgument() { result = count(int n | exists(this.getArgument(n))) } +} diff --git a/python/ql/lib/semmle/python/frameworks/data/internal/ApiGraphModels.qll b/python/ql/lib/semmle/python/frameworks/data/internal/ApiGraphModels.qll new file mode 100644 index 00000000000..4681c2b91a5 --- /dev/null +++ b/python/ql/lib/semmle/python/frameworks/data/internal/ApiGraphModels.qll @@ -0,0 +1,522 @@ +/** + * INTERNAL use only. This is an experimental API subject to change without notice. + * + * Provides classes and predicates for dealing with flow models specified in CSV format. + * + * The CSV specification has the following columns: + * - Sources: + * `package; type; path; kind` + * - Sinks: + * `package; type; path; kind` + * - Summaries: + * `package; type; path; input; output; kind` + * - Types: + * `package1; type1; package2; type2; path` + * + * The interpretation of a row is similar to API-graphs with a left-to-right + * reading. + * 1. The `package` column selects a package name, as it would be referenced in the source code, + * such as an NPM package, PIP package, or Ruby gem. (See `ModelsAsData.qll` for language-specific details). + * It may also be a synthetic package used for a type definition (see type definitions below). + * 2. The `type` column selects all instances of a named type originating from that package, + * or the empty string if referring to the package itself. + * It can also be a synthetic type name defined by a type definition (see type definitions below). + * 3. The `path` column is a `.`-separated list of "access path tokens" to resolve, starting at the node selected by `package` and `type`. + * + * Every language supports the following tokens: + * - Argument[n]: the n-th argument to a call. May be a range of form `x..y` (inclusive) and/or a comma-separated list. + * Additionally, `N-1` refers to the last argument, `N-2` refers to the second-last, and so on. + * - Parameter[n]: the n-th parameter of a callback. May be a range of form `x..y` (inclusive) and/or a comma-separated list. + * - ReturnValue: the value returned by a function call + * - WithArity[n]: match a call with the given arity. May be a range of form `x..y` (inclusive) and/or a comma-separated list. + * + * The following tokens are common and should be implemented for languages where it makes sense: + * - Member[x]: a member named `x`; exactly what a "member" is depends on the language. May be a comma-separated list of names. + * - Instance: an instance of a class + * - Subclass: a subclass of a class + * - ArrayElement: an element of array + * - Element: an element of a collection-like object + * - MapKey: a key in map-like object + * - MapValue: a value in a map-like object + * - Awaited: the value from a resolved promise/future-like object + * + * For the time being, please consult `ApiGraphModelsSpecific.qll` to see which language-specific tokens are currently supported. + * + * 4. The `input` and `output` columns specify how data enters and leaves the element selected by the + * first `(package, type, path)` tuple. Both strings are `.`-separated access paths + * of the same syntax as the `path` column. + * 5. The `kind` column is a tag that can be referenced from QL to determine to + * which classes the interpreted elements should be added. For example, for + * sources `"remote"` indicates a default remote flow source, and for summaries + * `"taint"` indicates a default additional taint step and `"value"` indicates a + * globally applicable value-preserving step. + * + * ### Types + * + * A type row of form `package1; type1; package2; type2; path` indicates that `package2; type2; path` + * should be seen as an instance of the type `package1; type1`. + * + * A `(package,type)` pair may refer to a static type or a synthetic type name used internally in the model. + * Synthetic type names can be used to reuse intermediate sub-paths, when there are multiple ways to access the same + * element. + * See `ModelsAsData.qll` for the langauge-specific interpretation of packages and static type names. + * + * By convention, if one wants to avoid clashes with static types from the package, the type name + * should be prefixed with a tilde character (`~`). For example, `(foo, ~Bar)` can be used to indicate that + * the type is related to the `foo` package but is not intended to match a static type. + */ + +private import ApiGraphModelsSpecific as Specific + +private class Unit = Specific::Unit; + +private module API = Specific::API; + +private import Specific::AccessPathSyntax + +/** Module containing hooks for providing input data to be interpreted as a model. */ +module ModelInput { + /** + * A unit class for adding additional source model rows. + * + * Extend this class to add additional source definitions. + */ + class SourceModelCsv extends Unit { + /** + * Holds if `row` specifies a source definition. + * + * A row of form + * ``` + * package;type;path;kind + * ``` + * indicates that the value at `(package, type, path)` should be seen as a flow + * source of the given `kind`. + * + * The kind `remote` represents a general remote flow source. + */ + abstract predicate row(string row); + } + + /** + * A unit class for adding additional sink model rows. + * + * Extend this class to add additional sink definitions. + */ + class SinkModelCsv extends Unit { + /** + * Holds if `row` specifies a sink definition. + * + * A row of form + * ``` + * package;type;path;kind + * ``` + * indicates that the value at `(package, type, path)` should be seen as a sink + * of the given `kind`. + */ + abstract predicate row(string row); + } + + /** + * A unit class for adding additional summary model rows. + * + * Extend this class to add additional flow summary definitions. + */ + class SummaryModelCsv extends Unit { + /** + * Holds if `row` specifies a summary definition. + * + * A row of form + * ``` + * package;type;path;input;output;kind + * ``` + * indicates that for each call to `(package, type, path)`, the value referred to by `input` + * can flow to the value referred to by `output`. + * + * `kind` should be either `value` or `taint`, for value-preserving or taint-preserving steps, + * respectively. + */ + abstract predicate row(string row); + } + + /** + * A unit class for adding additional type model rows. + * + * Extend this class to add additional type definitions. + */ + class TypeModelCsv extends Unit { + /** + * Holds if `row` specifies a type definition. + * + * A row of form, + * ``` + * package1;type1;package2;type2;path + * ``` + * indicates that `(package2, type2, path)` should be seen as an instance of `(package1, type1)`. + */ + abstract predicate row(string row); + } +} + +private import ModelInput + +/** + * An empty class, except in specific tests. + * + * If this is non-empty, all models are parsed even if the package is not + * considered relevant for the current database. + */ +abstract class TestAllModels extends Unit { } + +/** + * Append `;dummy` to the value of `s` to work around the fact that `string.split(delim,n)` + * does not preserve empty trailing substrings. + */ +bindingset[result] +private string inversePad(string s) { s = result + ";dummy" } + +private predicate sourceModel(string row) { any(SourceModelCsv s).row(inversePad(row)) } + +private predicate sinkModel(string row) { any(SinkModelCsv s).row(inversePad(row)) } + +private predicate summaryModel(string row) { any(SummaryModelCsv s).row(inversePad(row)) } + +private predicate typeModel(string row) { any(TypeModelCsv s).row(inversePad(row)) } + +/** Holds if a source model exists for the given parameters. */ +predicate sourceModel(string package, string type, string path, string kind) { + exists(string row | + sourceModel(row) and + row.splitAt(";", 0) = package and + row.splitAt(";", 1) = type and + row.splitAt(";", 2) = path and + row.splitAt(";", 3) = kind + ) +} + +/** Holds if a sink model exists for the given parameters. */ +private predicate sinkModel(string package, string type, string path, string kind) { + exists(string row | + sinkModel(row) and + row.splitAt(";", 0) = package and + row.splitAt(";", 1) = type and + row.splitAt(";", 2) = path and + row.splitAt(";", 3) = kind + ) +} + +/** Holds if a summary model `row` exists for the given parameters. */ +private predicate summaryModel( + string package, string type, string path, string input, string output, string kind +) { + exists(string row | + summaryModel(row) and + row.splitAt(";", 0) = package and + row.splitAt(";", 1) = type and + row.splitAt(";", 2) = path and + row.splitAt(";", 3) = input and + row.splitAt(";", 4) = output and + row.splitAt(";", 5) = kind + ) +} + +/** Holds if an type model exists for the given parameters. */ +private predicate typeModel( + string package1, string type1, string package2, string type2, string path +) { + exists(string row | + typeModel(row) and + row.splitAt(";", 0) = package1 and + row.splitAt(";", 1) = type1 and + row.splitAt(";", 2) = package2 and + row.splitAt(";", 3) = type2 and + row.splitAt(";", 4) = path + ) +} + +/** + * Gets a package that should be seen as an alias for the given other `package`, + * or the `package` itself. + */ +bindingset[package] +bindingset[result] +string getAPackageAlias(string package) { + typeModel(package, "", result, "", "") + or + result = package +} + +/** + * Holds if CSV rows involving `package` might be relevant for the analysis of this database. + */ +private predicate isRelevantPackage(string package) { + ( + sourceModel(package, _, _, _) or + sinkModel(package, _, _, _) or + summaryModel(package, _, _, _, _, _) or + typeModel(package, _, _, _, _) + ) and + ( + Specific::isPackageUsed(package) + or + exists(TestAllModels t) + ) + or + exists(string other | + isRelevantPackage(other) and + typeModel(package, _, other, _, _) + ) +} + +/** + * Holds if `package,type,path` is used in some CSV row. + */ +pragma[nomagic] +predicate isRelevantFullPath(string package, string type, string path) { + isRelevantPackage(package) and + ( + sourceModel(package, type, path, _) or + sinkModel(package, type, path, _) or + summaryModel(package, type, path, _, _, _) or + typeModel(_, _, package, type, path) + ) +} + +/** A string from a CSV row that should be parsed as an access path. */ +private class AccessPathRange extends AccessPath::Range { + AccessPathRange() { + isRelevantFullPath(_, _, this) + or + exists(string package | isRelevantPackage(package) | + summaryModel(package, _, _, this, _, _) or + summaryModel(package, _, _, _, this, _) + ) + } +} + +/** + * Gets a successor of `node` in the API graph. + */ +bindingset[token] +API::Node getSuccessorFromNode(API::Node node, AccessPathToken token) { + // API graphs use the same label for arguments and parameters. An edge originating from a + // use-node represents be an argument, and an edge originating from a def-node represents a parameter. + // We just map both to the same thing. + token.getName() = ["Argument", "Parameter"] and + result = node.getParameter(AccessPath::parseIntUnbounded(token.getAnArgument())) + or + token.getName() = "ReturnValue" and + result = node.getReturn() + or + // Language-specific tokens + result = Specific::getExtraSuccessorFromNode(node, token) +} + +/** + * Gets an API-graph successor for the given invocation. + */ +bindingset[token] +API::Node getSuccessorFromInvoke(Specific::InvokeNode invoke, AccessPathToken token) { + token.getName() = "Argument" and + result = + invoke + .getParameter(AccessPath::parseIntWithArity(token.getAnArgument(), invoke.getNumArgument())) + or + token.getName() = "ReturnValue" and + result = invoke.getReturn() + or + // Language-specific tokens + result = Specific::getExtraSuccessorFromInvoke(invoke, token) +} + +/** + * Holds if `invoke` invokes a call-site filter given by `token`. + */ +pragma[inline] +private predicate invocationMatchesCallSiteFilter(Specific::InvokeNode invoke, AccessPathToken token) { + token.getName() = "WithArity" and + invoke.getNumArgument() = AccessPath::parseIntUnbounded(token.getAnArgument()) + or + Specific::invocationMatchesExtraCallSiteFilter(invoke, token) +} + +/** + * Gets the API node identified by the first `n` tokens of `path` in the given `(package, type, path)` tuple. + */ +pragma[nomagic] +private API::Node getNodeFromPath(string package, string type, AccessPath path, int n) { + isRelevantFullPath(package, type, path) and + ( + n = 0 and + exists(string package2, string type2, AccessPath path2 | + typeModel(package, type, package2, type2, path2) and + result = getNodeFromPath(package2, type2, path2, path2.getNumToken()) + ) + or + // Language-specific cases, such as handling of global variables + result = Specific::getExtraNodeFromPath(package, type, path, n) + ) + or + result = getSuccessorFromNode(getNodeFromPath(package, type, path, n - 1), path.getToken(n - 1)) + or + // Similar to the other recursive case, but where the path may have stepped through one or more call-site filters + result = + getSuccessorFromInvoke(getInvocationFromPath(package, type, path, n - 1), path.getToken(n - 1)) +} + +/** Gets the node identified by the given `(package, type, path)` tuple. */ +API::Node getNodeFromPath(string package, string type, AccessPath path) { + result = getNodeFromPath(package, type, path, path.getNumToken()) +} + +/** + * Gets an invocation identified by the given `(package, type, path)` tuple. + * + * Unlike `getNodeFromPath`, the `path` may end with one or more call-site filters. + */ +Specific::InvokeNode getInvocationFromPath(string package, string type, AccessPath path, int n) { + result = Specific::getAnInvocationOf(getNodeFromPath(package, type, path, n)) + or + result = getInvocationFromPath(package, type, path, n - 1) and + invocationMatchesCallSiteFilter(result, path.getToken(n - 1)) +} + +/** Gets an invocation identified by the given `(package, type, path)` tuple. */ +Specific::InvokeNode getInvocationFromPath(string package, string type, AccessPath path) { + result = getInvocationFromPath(package, type, path, path.getNumToken()) +} + +/** + * Holds if `name` is a valid name for an access path token in the identifying access path. + */ +bindingset[name] +predicate isValidTokenNameInIdentifyingAccessPath(string name) { + name = ["Argument", "Parameter", "ReturnValue", "WithArity"] + or + Specific::isExtraValidTokenNameInIdentifyingAccessPath(name) +} + +/** + * Holds if `name` is a valid name for an access path token with no arguments, occuring + * in an identifying access path. + */ +bindingset[name] +predicate isValidNoArgumentTokenInIdentifyingAccessPath(string name) { + name = "ReturnValue" + or + Specific::isExtraValidNoArgumentTokenInIdentifyingAccessPath(name) +} + +/** + * Holds if `argument` is a valid argument to an access path token with the given `name`, occurring + * in an identifying access path. + */ +bindingset[name, argument] +predicate isValidTokenArgumentInIdentifyingAccessPath(string name, string argument) { + name = ["Argument", "Parameter"] and + argument.regexpMatch("(N-|-)?\\d+(\\.\\.((N-|-)?\\d+)?)?") + or + name = "WithArity" and + argument.regexpMatch("\\d+(\\.\\.(\\d+)?)?") + or + Specific::isExtraValidTokenArgumentInIdentifyingAccessPath(name, argument) +} + +/** + * Module providing access to the imported models in terms of API graph nodes. + */ +module ModelOutput { + /** + * Holds if a CSV source model contributed `source` with the given `kind`. + */ + API::Node getASourceNode(string kind) { + exists(string package, string type, string path | + sourceModel(package, type, path, kind) and + result = getNodeFromPath(package, type, path) + ) + } + + /** + * Holds if a CSV sink model contributed `sink` with the given `kind`. + */ + API::Node getASinkNode(string kind) { + exists(string package, string type, string path | + sinkModel(package, type, path, kind) and + result = getNodeFromPath(package, type, path) + ) + } + + /** + * Holds if a relevant CSV summary exists for these parameters. + */ + predicate relevantSummaryModel( + string package, string type, string path, string input, string output, string kind + ) { + isRelevantPackage(package) and + summaryModel(package, type, path, input, output, kind) + } + + /** + * Holds if a `baseNode` is an invocation identified by the `package,type,path` part of a summary row. + */ + predicate resolvedSummaryBase( + string package, string type, string path, Specific::InvokeNode baseNode + ) { + summaryModel(package, type, path, _, _, _) and + baseNode = getInvocationFromPath(package, type, path) + } + + /** + * Holds if `node` is seen as an instance of `(package,type)` due to a type definition + * contributed by a CSV model. + */ + API::Node getATypeNode(string package, string type) { + exists(string package2, string type2, AccessPath path | + typeModel(package, type, package2, type2, path) and + result = getNodeFromPath(package2, type2, path) + ) + } + + /** + * Gets an error message relating to an invalid CSV row in a model. + */ + string getAWarning() { + // Check number of columns + exists(string row, string kind, int expectedArity, int actualArity | + any(SourceModelCsv csv).row(row) and kind = "source" and expectedArity = 4 + or + any(SinkModelCsv csv).row(row) and kind = "sink" and expectedArity = 4 + or + any(SummaryModelCsv csv).row(row) and kind = "summary" and expectedArity = 6 + or + any(TypeModelCsv csv).row(row) and kind = "type" and expectedArity = 5 + | + actualArity = count(row.indexOf(";")) + 1 and + actualArity != expectedArity and + result = + "CSV " + kind + " row should have " + expectedArity + " columns but has " + actualArity + + ": " + row + ) + or + // Check names and arguments of access path tokens + exists(AccessPath path, AccessPathToken token | + isRelevantFullPath(_, _, path) and + token = path.getToken(_) + | + not isValidTokenNameInIdentifyingAccessPath(token.getName()) and + result = "Invalid token name '" + token.getName() + "' in access path: " + path + or + isValidTokenNameInIdentifyingAccessPath(token.getName()) and + exists(string argument | + argument = token.getAnArgument() and + not isValidTokenArgumentInIdentifyingAccessPath(token.getName(), argument) and + result = + "Invalid argument '" + argument + "' in token '" + token + "' in access path: " + path + ) + or + isValidTokenNameInIdentifyingAccessPath(token.getName()) and + token.getNumArgument() = 0 and + not isValidNoArgumentTokenInIdentifyingAccessPath(token.getName()) and + result = "Invalid token '" + token + "' is missing its arguments, in access path: " + path + ) + } +} diff --git a/python/ql/lib/semmle/python/frameworks/data/internal/ApiGraphModelsSpecific.qll b/python/ql/lib/semmle/python/frameworks/data/internal/ApiGraphModelsSpecific.qll new file mode 100644 index 00000000000..fc6e69404ee --- /dev/null +++ b/python/ql/lib/semmle/python/frameworks/data/internal/ApiGraphModelsSpecific.qll @@ -0,0 +1,173 @@ +/** + * Contains the language-specific part of the models-as-data implementation found in `ApiGraphModels.qll`. + * + * It must export the following members: + * ```ql + * class Unit // a unit type + * module AccessPathSyntax // a re-export of the AccessPathSyntax module + * class InvokeNode // a type representing an invocation connected to the API graph + * module API // the API graph module + * predicate isPackageUsed(string package) + * API::Node getExtraNodeFromPath(string package, string type, string path, int n) + * API::Node getExtraSuccessorFromNode(API::Node node, AccessPathToken token) + * API::Node getExtraSuccessorFromInvoke(API::InvokeNode node, AccessPathToken token) + * predicate invocationMatchesExtraCallSiteFilter(API::InvokeNode invoke, AccessPathToken token) + * InvokeNode getAnInvocationOf(API::Node node) + * predicate isExtraValidTokenNameInIdentifyingAccessPath(string name) + * predicate isExtraValidNoArgumentTokenInIdentifyingAccessPath(string name) + * predicate isExtraValidTokenArgumentInIdentifyingAccessPath(string name, string argument) + * ``` + */ + +private import python as PY +import semmle.python.dataflow.new.DataFlow +private import ApiGraphModels +import semmle.python.ApiGraphs + +class Unit = PY::Unit; + +// Re-export libraries needed by ApiGraphModels.qll +import semmle.python.frameworks.data.internal.AccessPathSyntax as AccessPathSyntax +private import AccessPathSyntax + +/** + * Holds if models describing `package` may be relevant for the analysis of this database. + */ +predicate isPackageUsed(string package) { exists(API::moduleImport(package)) } + +/** Gets a Python-specific interpretation of the `(package, type, path)` tuple after resolving the first `n` access path tokens. */ +bindingset[package, type, path] +API::Node getExtraNodeFromPath(string package, string type, AccessPath path, int n) { + type = "" and + n = 0 and + result = API::moduleImport(package) and + exists(path) +} + +/** + * Gets a Python-specific API graph successor of `node` reachable by resolving `token`. + */ +bindingset[token] +API::Node getExtraSuccessorFromNode(API::Node node, AccessPathToken token) { + token.getName() = "Member" and + result = node.getMember(token.getAnArgument()) + or + token.getName() = "Instance" and + result = node.getReturn() // commonly used Token. In Python `Instance` is just an alias for `ReturnValue` + or + token.getName() = "Awaited" and + result = node.getAwaited() + // Some features don't have MaD tokens yet, they would need to be added to API-graphs first. + // - decorators ("DecoratedClass", "DecoratedMember", "DecoratedParameter") + // - Array/Map elements ("ArrayElement", "Element", "MapKey", "MapValue") +} + +/** + * Gets a Python-specific API graph successor of `node` reachable by resolving `token`. + */ +bindingset[token] +API::Node getExtraSuccessorFromInvoke(API::CallNode node, AccessPathToken token) { + token.getName() = "Instance" and + result = node.getReturn() + or + token.getName() = "Argument" and + token.getAnArgument() = "self" and + result.getARhs() = node.(DataFlow::MethodCallNode).getObject() // TODO: Get proper support for this in API-graphs? + or + token.getName() = "Argument" and + exists(string arg | arg + ":" = token.getAnArgument() | result = node.getKeywordParameter(arg)) +} + +/** + * Holds if `invoke` matches the PY-specific call site filter in `token`. + */ +bindingset[token] +predicate invocationMatchesExtraCallSiteFilter(API::CallNode invoke, AccessPathToken token) { + token.getName() = "Call" and exists(invoke) // there is only one kind of call in Python. +} + +/** + * Holds if `path` is an input or output spec for a summary with the given `base` node. + */ +pragma[nomagic] +private predicate relevantInputOutputPath(API::CallNode base, AccessPath inputOrOutput) { + exists(string package, string type, string input, string output, string path | + ModelOutput::relevantSummaryModel(package, type, path, input, output, _) and + ModelOutput::resolvedSummaryBase(package, type, path, base) and + inputOrOutput = [input, output] + ) +} + +/** + * Gets the API node for the first `n` tokens of the given input/output path, evaluated relative to `baseNode`. + */ +private API::Node getNodeFromInputOutputPath(API::CallNode baseNode, AccessPath path, int n) { + relevantInputOutputPath(baseNode, path) and + ( + n = 1 and + result = getSuccessorFromInvoke(baseNode, path.getToken(0)) + or + result = + getSuccessorFromNode(getNodeFromInputOutputPath(baseNode, path, n - 1), path.getToken(n - 1)) + ) +} + +/** + * Gets the API node for the given input/output path, evaluated relative to `baseNode`. + */ +private API::Node getNodeFromInputOutputPath(API::CallNode baseNode, AccessPath path) { + result = getNodeFromInputOutputPath(baseNode, path, path.getNumToken()) +} + +/** + * Holds if a CSV summary contributed the step `pred -> succ` of the given `kind`. + */ +predicate summaryStep(API::Node pred, API::Node succ, string kind) { + exists( + string package, string type, string path, API::CallNode base, AccessPath input, + AccessPath output + | + ModelOutput::relevantSummaryModel(package, type, path, input, output, kind) and + ModelOutput::resolvedSummaryBase(package, type, path, base) and + pred = getNodeFromInputOutputPath(base, input) and + succ = getNodeFromInputOutputPath(base, output) + ) +} + +class InvokeNode = API::CallNode; + +/** Gets an `InvokeNode` corresponding to an invocation of `node`. */ +InvokeNode getAnInvocationOf(API::Node node) { result = node.getACall() } + +/** + * Holds if `name` is a valid name for an access path token in the identifying access path. + */ +bindingset[name] +predicate isExtraValidTokenNameInIdentifyingAccessPath(string name) { + name = ["Member", "Instance", "Awaited", "Call", "Method", "Subclass"] +} + +/** + * Holds if `name` is a valid name for an access path token with no arguments, occuring + * in an identifying access path. + */ +predicate isExtraValidNoArgumentTokenInIdentifyingAccessPath(string name) { + name = ["Instance", "Awaited", "Call", "Subclass"] +} + +/** + * Holds if `argument` is a valid argument to an access path token with the given `name`, occurring + * in an identifying access path. + */ +bindingset[name, argument] +predicate isExtraValidTokenArgumentInIdentifyingAccessPath(string name, string argument) { + name = ["Member"] and + exists(argument) + or + name = ["Argument", "Parameter"] and + ( + argument = "self" + or + argument.regexpMatch("\\w+:") // keyword argument + ) +} diff --git a/python/ql/test/library-tests/frameworks/data/test.expected b/python/ql/test/library-tests/frameworks/data/test.expected new file mode 100644 index 00000000000..afdd0b91a9d --- /dev/null +++ b/python/ql/test/library-tests/frameworks/data/test.expected @@ -0,0 +1,18 @@ +taintFlow +| test.py:3:5:3:15 | ControlFlowNode for getSource() | test.py:4:8:4:8 | ControlFlowNode for x | +isSink +| test.py:4:8:4:8 | ControlFlowNode for x | test-sink | +isSource +| test.py:3:5:3:15 | ControlFlowNode for getSource() | test-source | +syntaxErrors +| Member[foo | +| Member[foo] .Member[bar] | +| Member[foo] Member[bar] | +| Member[foo], Member[bar] | +| Member[foo],Member[bar] | +| Member[foo]. Member[bar] | +| Member[foo]..Member[bar] | +| Member[foo]Member[bar] | +| Member[foo]] | +| Member[foo]].Member[bar] | +warning diff --git a/python/ql/test/library-tests/frameworks/data/test.py b/python/ql/test/library-tests/frameworks/data/test.py new file mode 100644 index 00000000000..23da10f806d --- /dev/null +++ b/python/ql/test/library-tests/frameworks/data/test.py @@ -0,0 +1,4 @@ +from testlib import getSource, mySink + +x = getSource() +mySink(x) \ No newline at end of file diff --git a/python/ql/test/library-tests/frameworks/data/test.ql b/python/ql/test/library-tests/frameworks/data/test.ql new file mode 100644 index 00000000000..da1d80bf276 --- /dev/null +++ b/python/ql/test/library-tests/frameworks/data/test.ql @@ -0,0 +1,85 @@ +import python +import semmle.python.frameworks.data.internal.AccessPathSyntax as AccessPathSyntax +import semmle.python.frameworks.data.ModelsAsData +import semmle.python.dataflow.new.TaintTracking +import semmle.python.dataflow.new.DataFlow + +// TODO: +/* + * class Steps extends ModelInput::SummaryModelCsv { + * override predicate row(string row) { + * // package;type;path;input;output;kind + * row = + * [ + * "testlib;;Member[preserveTaint];Argument[0];ReturnValue;taint", + * "testlib;;Member[taintIntoCallback];Argument[0];Argument[1..2].Parameter[0];taint", + * "testlib;;Member[taintIntoCallbackThis];Argument[0];Argument[1..2].Parameter[this];taint", + * "testlib;;Member[preserveArgZeroAndTwo];Argument[0,2];ReturnValue;taint", + * "testlib;;Member[preserveAllButFirstArgument];Argument[1..];ReturnValue;taint", + * "testlib;;Member[preserveAllIfCall].Call;Argument[0..];ReturnValue;taint", + * "testlib;;Member[getSource].ReturnValue.Member[continue];Argument[this];ReturnValue;taint", + * ] + * } + * } + */ + +class Sinks extends ModelInput::SinkModelCsv { + override predicate row(string row) { + // package;type;path;kind + row = ["testlib;;Member[mySink].Argument[0];test-sink"] + } +} + +// TODO: Test taint steps (include that the base path may end with ".Call") +// TODO: Ctrl + f: TODO +// TODO: // There are no API-graph edges for: ArrayElement, Element, MapKey, MapValue (remove from valid tokens list) +// TODO: Verify that the list of valid tokens matches the implementation. +class Sources extends ModelInput::SourceModelCsv { + // package;type;path;kind + override predicate row(string row) { + row = ["testlib;;Member[getSource].ReturnValue;test-source"] + } +} + +class BasicTaintTracking extends TaintTracking::Configuration { + BasicTaintTracking() { this = "BasicTaintTracking" } + + override predicate isSource(DataFlow::Node source) { + source = ModelOutput::getASourceNode("test-source").getAnImmediateUse() + } + + override predicate isSink(DataFlow::Node sink) { + sink = ModelOutput::getASinkNode("test-sink").getARhs() + } +} + +query predicate taintFlow(DataFlow::Node source, DataFlow::Node sink) { + any(BasicTaintTracking tr).hasFlow(source, sink) +} + +query predicate isSink(DataFlow::Node node, string kind) { + node = ModelOutput::getASinkNode(kind).getARhs() +} + +query predicate isSource(DataFlow::Node node, string kind) { + node = ModelOutput::getASourceNode(kind).getAnImmediateUse() +} + +class SyntaxErrorTest extends ModelInput::SinkModelCsv { + override predicate row(string row) { + row = + [ + "testlib;;Member[foo],Member[bar];test-sink", "testlib;;Member[foo] Member[bar];test-sink", + "testlib;;Member[foo]. Member[bar];test-sink", + "testlib;;Member[foo], Member[bar];test-sink", + "testlib;;Member[foo]..Member[bar];test-sink", + "testlib;;Member[foo] .Member[bar];test-sink", "testlib;;Member[foo]Member[bar];test-sink", + "testlib;;Member[foo;test-sink", "testlib;;Member[foo]];test-sink", + "testlib;;Member[foo]].Member[bar];test-sink" + ] + } +} + +query predicate syntaxErrors(AccessPathSyntax::AccessPath path) { path.hasSyntaxError() } + +query predicate warning = ModelOutput::getAWarning/0; diff --git a/python/ql/test/library-tests/frameworks/data/warnings.expected b/python/ql/test/library-tests/frameworks/data/warnings.expected new file mode 100644 index 00000000000..b8242f31a85 --- /dev/null +++ b/python/ql/test/library-tests/frameworks/data/warnings.expected @@ -0,0 +1,12 @@ +| CSV type row should have 5 columns but has 2: test;TooFewColumns | +| CSV type row should have 5 columns but has 8: test;TooManyColumns;;;Member[Foo].Instance;too;many;columns | +| Invalid argument '0-1' in token 'Argument[0-1]' in access path: Method[foo].Argument[0-1] | +| Invalid argument '*' in token 'Argument[*]' in access path: Method[foo].Argument[*] | +| Invalid argument 'foo' in token 'Method[foo]' in access path: Method[foo].Arg[0] | +| Invalid argument 'foo' in token 'Method[foo]' in access path: Method[foo].Argument | +| Invalid argument 'foo' in token 'Method[foo]' in access path: Method[foo].Argument[0-1] | +| Invalid argument 'foo' in token 'Method[foo]' in access path: Method[foo].Argument[*] | +| Invalid argument 'foo' in token 'Method[foo]' in access path: Method[foo].Member | +| Invalid token 'Argument' is missing its arguments, in access path: Method[foo].Argument | +| Invalid token 'Member' is missing its arguments, in access path: Method[foo].Member | +| Invalid token name 'Arg' in access path: Method[foo].Arg[0] | diff --git a/python/ql/test/library-tests/frameworks/data/warnings.ql b/python/ql/test/library-tests/frameworks/data/warnings.ql new file mode 100644 index 00000000000..3443233179e --- /dev/null +++ b/python/ql/test/library-tests/frameworks/data/warnings.ql @@ -0,0 +1,25 @@ +import python +import semmle.python.frameworks.data.internal.AccessPathSyntax as AccessPathSyntax +import semmle.python.frameworks.data.internal.ApiGraphModels as ApiGraphModels +import semmle.python.frameworks.data.ModelsAsData + +private class InvalidTypeModel extends ModelInput::TypeModelCsv { + override predicate row(string row) { + row = + [ + "test;TooManyColumns;;;Member[Foo].Instance;too;many;columns", // + "test;TooFewColumns", // + "test;X;test;Y;Method[foo].Arg[0]", // + "test;X;test;Y;Method[foo].Argument[0-1]", // + "test;X;test;Y;Method[foo].Argument[*]", // + "test;X;test;Y;Method[foo].Argument", // + "test;X;test;Y;Method[foo].Member", // + ] + } +} + +class IsTesting extends ApiGraphModels::TestAllModels { + IsTesting() { this = this } +} + +query predicate warning = ModelOutput::getAWarning/0; From d4b882519af5fa716c3794a638dc0d3af36a8edd Mon Sep 17 00:00:00 2001 From: Erik Krogh Kristensen Date: Tue, 26 Apr 2022 14:47:50 +0200 Subject: [PATCH 02/51] convert most of the asyncpg model to MaD --- python/ql/lib/semmle/python/Concepts.qll | 16 +++ .../lib/semmle/python/frameworks/Asyncpg.qll | 120 ++++++------------ .../library-tests/frameworks/asyncpg/test.py | 12 +- 3 files changed, 60 insertions(+), 88 deletions(-) diff --git a/python/ql/lib/semmle/python/Concepts.qll b/python/ql/lib/semmle/python/Concepts.qll index dc461861ace..e734105cd2e 100644 --- a/python/ql/lib/semmle/python/Concepts.qll +++ b/python/ql/lib/semmle/python/Concepts.qll @@ -62,6 +62,14 @@ module FileSystemAccess { /** Gets an argument to this file system access that is interpreted as a path. */ abstract DataFlow::Node getAPathArgument(); } + + private import semmle.python.frameworks.data.ModelsAsData + + private class DataAsFileAccess extends Range { + DataAsFileAccess() { this = ModelOutput::getASinkNode("file-access").getARhs() } + + override DataFlow::Node getAPathArgument() { result = this } + } } /** @@ -364,6 +372,14 @@ module SqlExecution { /** Gets the argument that specifies the SQL statements to be executed. */ abstract DataFlow::Node getSql(); } + + private import semmle.python.frameworks.data.ModelsAsData + + private class DataAsSqlExecution extends Range { + DataAsSqlExecution() { this = ModelOutput::getASinkNode("sql-injection").getARhs() } + + override DataFlow::Node getSql() { result = this } + } } /** diff --git a/python/ql/lib/semmle/python/frameworks/Asyncpg.qll b/python/ql/lib/semmle/python/frameworks/Asyncpg.qll index 45c274c64ba..2b2b9ed2e1b 100644 --- a/python/ql/lib/semmle/python/frameworks/Asyncpg.qll +++ b/python/ql/lib/semmle/python/frameworks/Asyncpg.qll @@ -7,91 +7,42 @@ private import python private import semmle.python.dataflow.new.DataFlow private import semmle.python.Concepts private import semmle.python.ApiGraphs +private import semmle.python.frameworks.data.ModelsAsData /** Provides models for the `asyncpg` PyPI package. */ private module Asyncpg { - private import semmle.python.internal.Awaited - - /** Gets a `ConnectionPool` that is created when the result of `asyncpg.create_pool()` is awaited. */ - API::Node connectionPool() { - result = API::moduleImport("asyncpg").getMember("create_pool").getReturn().getAwaited() - } - - /** - * Gets a `Connection` that is created when - * - the result of `asyncpg.connect()` is awaited. - * - the result of calling `aquire` on a `ConnectionPool` is awaited. - */ - API::Node connection() { - result = API::moduleImport("asyncpg").getMember("connect").getReturn().getAwaited() - or - result = connectionPool().getMember("acquire").getReturn().getAwaited() - } - - /** `Connection`s and `ConnectionPool`s provide some methods that execute SQL. */ - class SqlExecutionOnConnection extends SqlExecution::Range, DataFlow::MethodCallNode { - string methodName; - - SqlExecutionOnConnection() { - this = [connectionPool(), connection()].getMember(methodName).getACall() and - methodName in ["copy_from_query", "execute", "fetch", "fetchrow", "fetchval", "executemany"] - } - - override DataFlow::Node getSql() { - methodName in ["copy_from_query", "execute", "fetch", "fetchrow", "fetchval"] and - result in [this.getArg(0), this.getArgByName("query")] - or - methodName = "executemany" and - result in [this.getArg(0), this.getArgByName("command")] + class AsyncpgModel extends ModelInput::TypeModelCsv { + override predicate row(string row) { + // package1;type1;package2;type2;path + row = + [ + // a `ConnectionPool` that is created when the result of `asyncpg.create_pool()` is awaited. + "asyncpg;ConnectionPool;asyncpg;;Member[create_pool].ReturnValue.Awaited", + // a `Connection` that is created when + // * - the result of `asyncpg.connect()` is awaited. + // * - the result of calling `aquire` on a `ConnectionPool` is awaited. + "asyncpg;Connection;asyncpg;;Member[connect].ReturnValue.Awaited", + "asyncpg;Connection;asyncpg;ConnectionPool;Member[acquire].ReturnValue.Awaited", + // Creating an internal `~Connection` type that contains both `Connection` and `ConnectionPool`. + "asyncpg;~Connection;asyncpg;Connection;", "asyncpg;~Connection;asyncpg;ConnectionPool;" + ] } } - /** A model of `Connection` and `ConnectionPool`, which provide some methods that access the file system. */ - class FileAccessOnConnection extends FileSystemAccess::Range, DataFlow::MethodCallNode { - string methodName; - - FileAccessOnConnection() { - this = [connectionPool(), connection()].getMember(methodName).getACall() and - methodName in ["copy_from_query", "copy_from_table", "copy_to_table"] - } - - // The path argument is keyword only. - override DataFlow::Node getAPathArgument() { - methodName in ["copy_from_query", "copy_from_table"] and - result = this.getArgByName("output") - or - methodName = "copy_to_table" and - result = this.getArgByName("source") - } - } - - /** - * Provides models of the `PreparedStatement` class in `asyncpg`. - * `PreparedStatement`s are created when the result of calling `prepare(query)` on a connection is awaited. - * The result of calling `prepare(query)` is a `PreparedStatementFactory` and the argument, `query` needs to - * be tracked to the place where a `PreparedStatement` is created and then futher to any executing methods. - * Hence the two type trackers. - */ - module PreparedStatement { - class PreparedStatementConstruction extends SqlConstruction::Range, API::CallNode { - PreparedStatementConstruction() { this = connection().getMember("prepare").getACall() } - - override DataFlow::Node getSql() { result = this.getParameter(0, "query").getARhs() } - } - - class PreparedStatementExecution extends SqlExecution::Range, API::CallNode { - PreparedStatementConstruction prepareCall; - - PreparedStatementExecution() { - this = - prepareCall - .getReturn() - .getAwaited() - .getMember(["executemany", "fetch", "fetchrow", "fetchval"]) - .getACall() - } - - override DataFlow::Node getSql() { result = prepareCall.getSql() } + class AsyncpgSink extends ModelInput::SinkModelCsv { + // package;type;path;kind + override predicate row(string row) { + row = + [ + // `Connection`s and `ConnectionPool`s provide some methods that execute SQL. + "asyncpg;~Connection;Member[copy_from_query,execute,fetch,fetchrow,fetchval].Argument[0,query:];sql-injection", + "asyncpg;~Connection;Member[executemany].Argument[0,command:];sql-injection", + // A model of `Connection` and `ConnectionPool`, which provide some methods that access the file system. + "asyncpg;~Connection;Member[copy_from_query,copy_from_table].Argument[output:];file-access", + "asyncpg;~Connection;Member[copy_to_table].Argument[source:];file-access", + // the `PreparedStatement` class in `asyncpg`. + "asyncpg;Connection;Member[prepare].Argument[0,query:];sql-injection", + ] } } @@ -106,7 +57,9 @@ private module Asyncpg { */ module Cursor { class CursorConstruction extends SqlConstruction::Range, API::CallNode { - CursorConstruction() { this = connection().getMember("cursor").getACall() } + CursorConstruction() { + this = ModelOutput::getATypeNode("asyncpg", "Connection").getMember("cursor").getACall() + } override DataFlow::Node getSql() { result = this.getParameter(0, "query").getARhs() } } @@ -121,8 +74,11 @@ private module Asyncpg { this = c.getReturn().getAwaited().getAnImmediateUse() ) or - exists(PreparedStatement::PreparedStatementConstruction prepareCall | - sql = prepareCall.getSql() and + exists(API::CallNode prepareCall | + prepareCall = + ModelOutput::getATypeNode("asyncpg", "Connection").getMember("prepare").getACall() + | + sql = prepareCall.getParameter(0, "query").getARhs() and this = prepareCall .getReturn() diff --git a/python/ql/test/library-tests/frameworks/asyncpg/test.py b/python/ql/test/library-tests/frameworks/asyncpg/test.py index e4b3c895ece..572fc454bed 100644 --- a/python/ql/test/library-tests/frameworks/asyncpg/test.py +++ b/python/ql/test/library-tests/frameworks/asyncpg/test.py @@ -27,11 +27,11 @@ async def test_prepared_statement(): conn = await asyncpg.connect() try: - pstmt = await conn.prepare("psql") # $ constructedSql="psql" - pstmt.executemany() # $ getSql="psql" - pstmt.fetch() # $ getSql="psql" - pstmt.fetchrow() # $ getSql="psql" - pstmt.fetchval() # $ getSql="psql" + pstmt = await conn.prepare("psql") # $ getSql="psql" + pstmt.executemany() + pstmt.fetch() + pstmt.fetchrow() + pstmt.fetchval() finally: await conn.close() @@ -46,7 +46,7 @@ async def test_cursor(): cursor = await conn.cursor("sql") # $ getSql="sql" constructedSql="sql" await cursor.fetch() - pstmt = await conn.prepare("psql") # $ constructedSql="psql" + pstmt = await conn.prepare("psql") # $ getSql="psql" pcursor = await pstmt.cursor() # $ getSql="psql" await pcursor.fetch() From 86a9bc6aca9599c2b7b999d2cfe4fb6b7332564c Mon Sep 17 00:00:00 2001 From: Erik Krogh Kristensen Date: Tue, 26 Apr 2022 15:02:56 +0200 Subject: [PATCH 03/51] add test for keyword arguments --- .../frameworks/data/test.expected | 17 ++++++++++++++++ .../library-tests/frameworks/data/test.py | 12 +++++++++-- .../library-tests/frameworks/data/test.ql | 20 +++++++++++++++++-- 3 files changed, 45 insertions(+), 4 deletions(-) diff --git a/python/ql/test/library-tests/frameworks/data/test.expected b/python/ql/test/library-tests/frameworks/data/test.expected index afdd0b91a9d..1181bde6021 100644 --- a/python/ql/test/library-tests/frameworks/data/test.expected +++ b/python/ql/test/library-tests/frameworks/data/test.expected @@ -1,9 +1,26 @@ taintFlow | test.py:3:5:3:15 | ControlFlowNode for getSource() | test.py:4:8:4:8 | ControlFlowNode for x | +| test.py:3:5:3:15 | ControlFlowNode for getSource() | test.py:7:17:7:17 | ControlFlowNode for x | +| test.py:9:8:9:14 | ControlFlowNode for alias() | test.py:9:8:9:14 | ControlFlowNode for alias() | +| test.py:10:8:10:22 | ControlFlowNode for Attribute() | test.py:10:8:10:22 | ControlFlowNode for Attribute() | +| test.py:11:8:11:30 | ControlFlowNode for Attribute() | test.py:11:8:11:30 | ControlFlowNode for Attribute() | isSink | test.py:4:8:4:8 | ControlFlowNode for x | test-sink | +| test.py:7:17:7:17 | ControlFlowNode for x | test-sink | +| test.py:9:8:9:14 | ControlFlowNode for alias() | test-sink | +| test.py:10:8:10:22 | ControlFlowNode for Attribute() | test-sink | +| test.py:11:8:11:30 | ControlFlowNode for Attribute() | test-sink | +| test.py:12:8:12:34 | ControlFlowNode for Attribute() | test-sink | isSource | test.py:3:5:3:15 | ControlFlowNode for getSource() | test-source | +| test.py:9:8:9:14 | ControlFlowNode for alias() | test-source | +| test.py:10:8:10:14 | ControlFlowNode for alias() | test-source | +| test.py:10:8:10:22 | ControlFlowNode for Attribute() | test-source | +| test.py:11:8:11:14 | ControlFlowNode for alias() | test-source | +| test.py:11:8:11:22 | ControlFlowNode for Attribute() | test-source | +| test.py:11:8:11:30 | ControlFlowNode for Attribute() | test-source | +| test.py:12:8:12:14 | ControlFlowNode for alias() | test-source | +| test.py:12:8:12:22 | ControlFlowNode for Attribute() | test-source | syntaxErrors | Member[foo | | Member[foo] .Member[bar] | diff --git a/python/ql/test/library-tests/frameworks/data/test.py b/python/ql/test/library-tests/frameworks/data/test.py index 23da10f806d..fd1790ac6e8 100644 --- a/python/ql/test/library-tests/frameworks/data/test.py +++ b/python/ql/test/library-tests/frameworks/data/test.py @@ -1,4 +1,12 @@ -from testlib import getSource, mySink +from testlib import getSource, mySink, alias x = getSource() -mySink(x) \ No newline at end of file +mySink(x) + +mySink(foo=x) # OK +mySink(sinkName=x) # NOT OK + +mySink(alias()) # NOT OK +mySink(alias().chain()) # NOT OK +mySink(alias().chain().chain()) # NOT OK +mySink(alias().chain().safeThing()) # OK \ No newline at end of file diff --git a/python/ql/test/library-tests/frameworks/data/test.ql b/python/ql/test/library-tests/frameworks/data/test.ql index da1d80bf276..746fee47e22 100644 --- a/python/ql/test/library-tests/frameworks/data/test.ql +++ b/python/ql/test/library-tests/frameworks/data/test.ql @@ -3,6 +3,7 @@ import semmle.python.frameworks.data.internal.AccessPathSyntax as AccessPathSynt import semmle.python.frameworks.data.ModelsAsData import semmle.python.dataflow.new.TaintTracking import semmle.python.dataflow.new.DataFlow +private import semmle.python.ApiGraphs // TODO: /* @@ -23,10 +24,21 @@ import semmle.python.dataflow.new.DataFlow * } */ +class Types extends ModelInput::TypeModelCsv { + override predicate row(string row) { + // package1;type1;package2;type2;path + row = + [ + "testlib;Alias;testlib;;Member[alias].ReturnValue", + "testlib;Alias;testlib;Alias;Member[chain].ReturnValue", + ] + } +} + class Sinks extends ModelInput::SinkModelCsv { override predicate row(string row) { // package;type;path;kind - row = ["testlib;;Member[mySink].Argument[0];test-sink"] + row = ["testlib;;Member[mySink].Argument[0,sinkName:];test-sink"] } } @@ -37,7 +49,11 @@ class Sinks extends ModelInput::SinkModelCsv { class Sources extends ModelInput::SourceModelCsv { // package;type;path;kind override predicate row(string row) { - row = ["testlib;;Member[getSource].ReturnValue;test-source"] + row = + [ + "testlib;;Member[getSource].ReturnValue;test-source", // + "testlib;Alias;;test-source" + ] } } From 35b143a1a5ae7989665d7199c46f1b940704f4d1 Mon Sep 17 00:00:00 2001 From: Erik Krogh Kristensen Date: Wed, 27 Apr 2022 09:26:04 +0200 Subject: [PATCH 04/51] add tests for argument syntax --- .../test/library-tests/frameworks/data/test.expected | 6 ++++++ python/ql/test/library-tests/frameworks/data/test.py | 9 ++++++++- python/ql/test/library-tests/frameworks/data/test.ql | 12 ++++++++++-- 3 files changed, 24 insertions(+), 3 deletions(-) diff --git a/python/ql/test/library-tests/frameworks/data/test.expected b/python/ql/test/library-tests/frameworks/data/test.expected index 1181bde6021..045ef365012 100644 --- a/python/ql/test/library-tests/frameworks/data/test.expected +++ b/python/ql/test/library-tests/frameworks/data/test.expected @@ -11,6 +11,12 @@ isSink | test.py:10:8:10:22 | ControlFlowNode for Attribute() | test-sink | | test.py:11:8:11:30 | ControlFlowNode for Attribute() | test-sink | | test.py:12:8:12:34 | ControlFlowNode for Attribute() | test-sink | +| test.py:16:11:16:13 | ControlFlowNode for one | test-source | +| test.py:17:19:17:21 | ControlFlowNode for two | test-source | +| test.py:17:24:17:28 | ControlFlowNode for three | test-source | +| test.py:17:31:17:34 | ControlFlowNode for four | test-source | +| test.py:18:37:18:40 | ControlFlowNode for five | test-source | +| test.py:19:21:19:26 | ControlFlowNode for second | test-source | isSource | test.py:3:5:3:15 | ControlFlowNode for getSource() | test-source | | test.py:9:8:9:14 | ControlFlowNode for alias() | test-source | diff --git a/python/ql/test/library-tests/frameworks/data/test.py b/python/ql/test/library-tests/frameworks/data/test.py index fd1790ac6e8..5108518ecf3 100644 --- a/python/ql/test/library-tests/frameworks/data/test.py +++ b/python/ql/test/library-tests/frameworks/data/test.py @@ -9,4 +9,11 @@ mySink(sinkName=x) # NOT OK mySink(alias()) # NOT OK mySink(alias().chain()) # NOT OK mySink(alias().chain().chain()) # NOT OK -mySink(alias().chain().safeThing()) # OK \ No newline at end of file +mySink(alias().chain().safeThing()) # OK + +from testlib import Args + +Args.arg0(one, two, three, four, five) +Args.arg1to3(one, two, three, four, five) +Args.lastarg(one, two, three, four, five) +Args.nonFist(first, second) \ No newline at end of file diff --git a/python/ql/test/library-tests/frameworks/data/test.ql b/python/ql/test/library-tests/frameworks/data/test.ql index 746fee47e22..4c8a04bcf19 100644 --- a/python/ql/test/library-tests/frameworks/data/test.ql +++ b/python/ql/test/library-tests/frameworks/data/test.ql @@ -38,7 +38,15 @@ class Types extends ModelInput::TypeModelCsv { class Sinks extends ModelInput::SinkModelCsv { override predicate row(string row) { // package;type;path;kind - row = ["testlib;;Member[mySink].Argument[0,sinkName:];test-sink"] + row = + [ + "testlib;;Member[mySink].Argument[0,sinkName:];test-sink", + // testing argument syntax + "testlib;;Member[Args].Member[arg0].Argument[0];test-source", // + "testlib;;Member[Args].Member[arg1to3].Argument[1..3];test-source", // + "testlib;;Member[Args].Member[lastarg].Argument[N-1];test-source", // + "testlib;;Member[Args].Member[nonFist].Argument[1..];test-source", // + ] } } @@ -52,7 +60,7 @@ class Sources extends ModelInput::SourceModelCsv { row = [ "testlib;;Member[getSource].ReturnValue;test-source", // - "testlib;Alias;;test-source" + "testlib;Alias;;test-source", ] } } From 20992af0377e20eb9d028b0844fa7c3a8e6c1988 Mon Sep 17 00:00:00 2001 From: Erik Krogh Kristensen Date: Wed, 27 Apr 2022 09:31:40 +0200 Subject: [PATCH 05/51] add test for parameter syntax --- .../ql/test/library-tests/frameworks/data/test.expected | 5 +++++ python/ql/test/library-tests/frameworks/data/test.py | 8 +++++++- python/ql/test/library-tests/frameworks/data/test.ql | 4 ++++ 3 files changed, 16 insertions(+), 1 deletion(-) diff --git a/python/ql/test/library-tests/frameworks/data/test.expected b/python/ql/test/library-tests/frameworks/data/test.expected index 045ef365012..405efa858e3 100644 --- a/python/ql/test/library-tests/frameworks/data/test.expected +++ b/python/ql/test/library-tests/frameworks/data/test.expected @@ -27,6 +27,11 @@ isSource | test.py:11:8:11:30 | ControlFlowNode for Attribute() | test-source | | test.py:12:8:12:14 | ControlFlowNode for alias() | test-source | | test.py:12:8:12:22 | ControlFlowNode for Attribute() | test-source | +| test.py:23:24:23:26 | ControlFlowNode for one | test-source | +| test.py:24:33:24:35 | ControlFlowNode for two | test-source | +| test.py:24:38:24:42 | ControlFlowNode for three | test-source | +| test.py:24:45:24:48 | ControlFlowNode for four | test-source | +| test.py:25:34:25:39 | ControlFlowNode for second | test-source | syntaxErrors | Member[foo | | Member[foo] .Member[bar] | diff --git a/python/ql/test/library-tests/frameworks/data/test.py b/python/ql/test/library-tests/frameworks/data/test.py index 5108518ecf3..da34b5ef56a 100644 --- a/python/ql/test/library-tests/frameworks/data/test.py +++ b/python/ql/test/library-tests/frameworks/data/test.py @@ -16,4 +16,10 @@ from testlib import Args Args.arg0(one, two, three, four, five) Args.arg1to3(one, two, three, four, five) Args.lastarg(one, two, three, four, five) -Args.nonFist(first, second) \ No newline at end of file +Args.nonFist(first, second) + +from testlib import Callbacks + +Callbacks.first(lambda one, two, three, four, five: 0) +Callbacks.param1to3(lambda one, two, three, four, five: 0) +Callbacks.nonFirst(lambda first, second: 0) \ No newline at end of file diff --git a/python/ql/test/library-tests/frameworks/data/test.ql b/python/ql/test/library-tests/frameworks/data/test.ql index 4c8a04bcf19..5574f3de199 100644 --- a/python/ql/test/library-tests/frameworks/data/test.ql +++ b/python/ql/test/library-tests/frameworks/data/test.ql @@ -61,6 +61,10 @@ class Sources extends ModelInput::SourceModelCsv { [ "testlib;;Member[getSource].ReturnValue;test-source", // "testlib;Alias;;test-source", + // testing parameter syntax + "testlib;;Member[Callbacks].Member[first].Argument[0].Parameter[0];test-source", // + "testlib;;Member[Callbacks].Member[param1to3].Argument[0].Parameter[1..3];test-source", // + "testlib;;Member[Callbacks].Member[nonFirst].Argument[0].Parameter[1..];test-source", // ] } } From 8d60336396f9c07301099fb7fa41ac55d68bc353 Mon Sep 17 00:00:00 2001 From: Erik Krogh Kristensen Date: Wed, 27 Apr 2022 09:41:13 +0200 Subject: [PATCH 06/51] add tests for callsite filters --- .../test/library-tests/frameworks/data/test.expected | 6 ++++++ python/ql/test/library-tests/frameworks/data/test.py | 10 +++++++++- python/ql/test/library-tests/frameworks/data/test.ql | 3 +++ 3 files changed, 18 insertions(+), 1 deletion(-) diff --git a/python/ql/test/library-tests/frameworks/data/test.expected b/python/ql/test/library-tests/frameworks/data/test.expected index 405efa858e3..c32b6f9a56f 100644 --- a/python/ql/test/library-tests/frameworks/data/test.expected +++ b/python/ql/test/library-tests/frameworks/data/test.expected @@ -17,6 +17,12 @@ isSink | test.py:17:31:17:34 | ControlFlowNode for four | test-source | | test.py:18:37:18:40 | ControlFlowNode for five | test-source | | test.py:19:21:19:26 | ControlFlowNode for second | test-source | +| test.py:30:21:30:23 | ControlFlowNode for one | test-source | +| test.py:32:22:32:24 | ControlFlowNode for one | test-source | +| test.py:32:27:32:29 | ControlFlowNode for two | test-source | +| test.py:33:22:33:24 | ControlFlowNode for one | test-source | +| test.py:33:27:33:29 | ControlFlowNode for two | test-source | +| test.py:33:32:33:36 | ControlFlowNode for three | test-source | isSource | test.py:3:5:3:15 | ControlFlowNode for getSource() | test-source | | test.py:9:8:9:14 | ControlFlowNode for alias() | test-source | diff --git a/python/ql/test/library-tests/frameworks/data/test.py b/python/ql/test/library-tests/frameworks/data/test.py index da34b5ef56a..630621ec226 100644 --- a/python/ql/test/library-tests/frameworks/data/test.py +++ b/python/ql/test/library-tests/frameworks/data/test.py @@ -22,4 +22,12 @@ from testlib import Callbacks Callbacks.first(lambda one, two, three, four, five: 0) Callbacks.param1to3(lambda one, two, three, four, five: 0) -Callbacks.nonFirst(lambda first, second: 0) \ No newline at end of file +Callbacks.nonFirst(lambda first, second: 0) + +from testlib import CallFilter + +CallFilter.arityOne(one, two) # NO match +CallFilter.arityOne(one) # Match +CallFilter.twoOrMore(one) # NO match +CallFilter.twoOrMore(one, two) # Match +CallFilter.twoOrMore(one, two, three) # Match \ No newline at end of file diff --git a/python/ql/test/library-tests/frameworks/data/test.ql b/python/ql/test/library-tests/frameworks/data/test.ql index 5574f3de199..2aca54cd356 100644 --- a/python/ql/test/library-tests/frameworks/data/test.ql +++ b/python/ql/test/library-tests/frameworks/data/test.ql @@ -46,6 +46,9 @@ class Sinks extends ModelInput::SinkModelCsv { "testlib;;Member[Args].Member[arg1to3].Argument[1..3];test-source", // "testlib;;Member[Args].Member[lastarg].Argument[N-1];test-source", // "testlib;;Member[Args].Member[nonFist].Argument[1..];test-source", // + // callsite filter. + "testlib;;Member[CallFilter].Member[arityOne].WithArity[1].Argument[0..];test-source", // + "testlib;;Member[CallFilter].Member[twoOrMore].WithArity[2..].Argument[0..];test-source", // ] } } From 48408ca45d20ad7c0a6fa5a9351c5d9191b06676 Mon Sep 17 00:00:00 2001 From: Erik Krogh Kristensen Date: Wed, 27 Apr 2022 10:06:32 +0200 Subject: [PATCH 07/51] Add TODO list --- python/ql/test/library-tests/frameworks/data/test.ql | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/python/ql/test/library-tests/frameworks/data/test.ql b/python/ql/test/library-tests/frameworks/data/test.ql index 2aca54cd356..35db27c557c 100644 --- a/python/ql/test/library-tests/frameworks/data/test.ql +++ b/python/ql/test/library-tests/frameworks/data/test.ql @@ -53,10 +53,12 @@ class Sinks extends ModelInput::SinkModelCsv { } } -// TODO: Test taint steps (include that the base path may end with ".Call") -// TODO: Ctrl + f: TODO -// TODO: // There are no API-graph edges for: ArrayElement, Element, MapKey, MapValue (remove from valid tokens list) -// TODO: Verify that the list of valid tokens matches the implementation. +// TODO: Named parameters? +// TODO: Commonly used tokens +// TODO: Uniform tokens for fields +// TODO: Non-positional arguments +// TODO: Any argument +// TODO: Test taint steps. class Sources extends ModelInput::SourceModelCsv { // package;type;path;kind override predicate row(string row) { From 682cab37378cd763238ed67c9e4f1b42910ee8a7 Mon Sep 17 00:00:00 2001 From: Erik Krogh Kristensen Date: Wed, 27 Apr 2022 11:01:10 +0200 Subject: [PATCH 08/51] add test for awaited --- .../ql/test/library-tests/frameworks/data/test.expected | 1 + python/ql/test/library-tests/frameworks/data/test.py | 8 +++++++- python/ql/test/library-tests/frameworks/data/test.ql | 7 +++++-- 3 files changed, 13 insertions(+), 3 deletions(-) diff --git a/python/ql/test/library-tests/frameworks/data/test.expected b/python/ql/test/library-tests/frameworks/data/test.expected index c32b6f9a56f..b521a4a3018 100644 --- a/python/ql/test/library-tests/frameworks/data/test.expected +++ b/python/ql/test/library-tests/frameworks/data/test.expected @@ -38,6 +38,7 @@ isSource | test.py:24:38:24:42 | ControlFlowNode for three | test-source | | test.py:24:45:24:48 | ControlFlowNode for four | test-source | | test.py:25:34:25:39 | ControlFlowNode for second | test-source | +| test.py:39:11:39:20 | ControlFlowNode for Await | test-source | syntaxErrors | Member[foo | | Member[foo] .Member[bar] | diff --git a/python/ql/test/library-tests/frameworks/data/test.py b/python/ql/test/library-tests/frameworks/data/test.py index 630621ec226..0dcbd633a2b 100644 --- a/python/ql/test/library-tests/frameworks/data/test.py +++ b/python/ql/test/library-tests/frameworks/data/test.py @@ -30,4 +30,10 @@ CallFilter.arityOne(one, two) # NO match CallFilter.arityOne(one) # Match CallFilter.twoOrMore(one) # NO match CallFilter.twoOrMore(one, two) # Match -CallFilter.twoOrMore(one, two, three) # Match \ No newline at end of file +CallFilter.twoOrMore(one, two, three) # Match + +from testlib import CommonTokens + +async def async_func(): + prom = CommonTokens.makePromise(1); + val = await prom \ No newline at end of file diff --git a/python/ql/test/library-tests/frameworks/data/test.ql b/python/ql/test/library-tests/frameworks/data/test.ql index 35db27c557c..96a4fe38cf2 100644 --- a/python/ql/test/library-tests/frameworks/data/test.ql +++ b/python/ql/test/library-tests/frameworks/data/test.ql @@ -53,12 +53,13 @@ class Sinks extends ModelInput::SinkModelCsv { } } -// TODO: Named parameters? // TODO: Commonly used tokens // TODO: Uniform tokens for fields -// TODO: Non-positional arguments +// TODO: Non-positional arguments (including Named parameters) // TODO: Any argument // TODO: Test taint steps. +// TODO: Should `instance()` be shorthand for `subClass*().getReturn()`? +// TODO: // There are no API-graph edges for: ArrayElement, Element, MapKey, MapValue (remove from valid tokens list) class Sources extends ModelInput::SourceModelCsv { // package;type;path;kind override predicate row(string row) { @@ -70,6 +71,8 @@ class Sources extends ModelInput::SourceModelCsv { "testlib;;Member[Callbacks].Member[first].Argument[0].Parameter[0];test-source", // "testlib;;Member[Callbacks].Member[param1to3].Argument[0].Parameter[1..3];test-source", // "testlib;;Member[Callbacks].Member[nonFirst].Argument[0].Parameter[1..];test-source", // + // Common tokens. + "testlib;;Member[CommonTokens].Member[makePromise].ReturnValue.Awaited;test-source", // ] } } From a02e812de8a05e491f868a372868eb51a0dcd0ba Mon Sep 17 00:00:00 2001 From: Erik Krogh Kristensen Date: Wed, 27 Apr 2022 11:10:20 +0200 Subject: [PATCH 09/51] add test for the Instance token --- python/ql/test/library-tests/frameworks/data/test.expected | 1 + python/ql/test/library-tests/frameworks/data/test.py | 4 +++- python/ql/test/library-tests/frameworks/data/test.ql | 1 + 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/python/ql/test/library-tests/frameworks/data/test.expected b/python/ql/test/library-tests/frameworks/data/test.expected index b521a4a3018..c82438b93fc 100644 --- a/python/ql/test/library-tests/frameworks/data/test.expected +++ b/python/ql/test/library-tests/frameworks/data/test.expected @@ -39,6 +39,7 @@ isSource | test.py:24:45:24:48 | ControlFlowNode for four | test-source | | test.py:25:34:25:39 | ControlFlowNode for second | test-source | | test.py:39:11:39:20 | ControlFlowNode for Await | test-source | +| test.py:41:8:41:27 | ControlFlowNode for Attribute() | test-source | syntaxErrors | Member[foo | | Member[foo] .Member[bar] | diff --git a/python/ql/test/library-tests/frameworks/data/test.py b/python/ql/test/library-tests/frameworks/data/test.py index 0dcbd633a2b..6860ef1211d 100644 --- a/python/ql/test/library-tests/frameworks/data/test.py +++ b/python/ql/test/library-tests/frameworks/data/test.py @@ -36,4 +36,6 @@ from testlib import CommonTokens async def async_func(): prom = CommonTokens.makePromise(1); - val = await prom \ No newline at end of file + val = await prom + +inst = CommonTokens.Class() \ No newline at end of file diff --git a/python/ql/test/library-tests/frameworks/data/test.ql b/python/ql/test/library-tests/frameworks/data/test.ql index 96a4fe38cf2..38b505cd768 100644 --- a/python/ql/test/library-tests/frameworks/data/test.ql +++ b/python/ql/test/library-tests/frameworks/data/test.ql @@ -73,6 +73,7 @@ class Sources extends ModelInput::SourceModelCsv { "testlib;;Member[Callbacks].Member[nonFirst].Argument[0].Parameter[1..];test-source", // // Common tokens. "testlib;;Member[CommonTokens].Member[makePromise].ReturnValue.Awaited;test-source", // + "testlib;;Member[CommonTokens].Member[Class].Instance;test-source", // ] } } From 46acce0ad42fc104911096f5f7d655eb627a251b Mon Sep 17 00:00:00 2001 From: Erik Krogh Kristensen Date: Wed, 27 Apr 2022 11:14:50 +0200 Subject: [PATCH 10/51] add support for the Subclass token --- .../frameworks/data/internal/ApiGraphModelsSpecific.qll | 3 +++ python/ql/test/library-tests/frameworks/data/test.expected | 1 + python/ql/test/library-tests/frameworks/data/test.py | 7 ++++++- python/ql/test/library-tests/frameworks/data/test.ql | 2 ++ 4 files changed, 12 insertions(+), 1 deletion(-) diff --git a/python/ql/lib/semmle/python/frameworks/data/internal/ApiGraphModelsSpecific.qll b/python/ql/lib/semmle/python/frameworks/data/internal/ApiGraphModelsSpecific.qll index fc6e69404ee..d174f9416c7 100644 --- a/python/ql/lib/semmle/python/frameworks/data/internal/ApiGraphModelsSpecific.qll +++ b/python/ql/lib/semmle/python/frameworks/data/internal/ApiGraphModelsSpecific.qll @@ -57,6 +57,9 @@ API::Node getExtraSuccessorFromNode(API::Node node, AccessPathToken token) { or token.getName() = "Awaited" and result = node.getAwaited() + or + token.getName() = "Subclass" and + result = node.getASubclass*() // Some features don't have MaD tokens yet, they would need to be added to API-graphs first. // - decorators ("DecoratedClass", "DecoratedMember", "DecoratedParameter") // - Array/Map elements ("ArrayElement", "Element", "MapKey", "MapValue") diff --git a/python/ql/test/library-tests/frameworks/data/test.expected b/python/ql/test/library-tests/frameworks/data/test.expected index c82438b93fc..8f450186dd1 100644 --- a/python/ql/test/library-tests/frameworks/data/test.expected +++ b/python/ql/test/library-tests/frameworks/data/test.expected @@ -40,6 +40,7 @@ isSource | test.py:25:34:25:39 | ControlFlowNode for second | test-source | | test.py:39:11:39:20 | ControlFlowNode for Await | test-source | | test.py:41:8:41:27 | ControlFlowNode for Attribute() | test-source | +| test.py:46:7:46:16 | ControlFlowNode for SubClass() | test-source | syntaxErrors | Member[foo | | Member[foo] .Member[bar] | diff --git a/python/ql/test/library-tests/frameworks/data/test.py b/python/ql/test/library-tests/frameworks/data/test.py index 6860ef1211d..707b7714dbc 100644 --- a/python/ql/test/library-tests/frameworks/data/test.py +++ b/python/ql/test/library-tests/frameworks/data/test.py @@ -38,4 +38,9 @@ async def async_func(): prom = CommonTokens.makePromise(1); val = await prom -inst = CommonTokens.Class() \ No newline at end of file +inst = CommonTokens.Class() + +class SubClass (CommonTokens.Super): + pass + +sub = SubClass() \ No newline at end of file diff --git a/python/ql/test/library-tests/frameworks/data/test.ql b/python/ql/test/library-tests/frameworks/data/test.ql index 38b505cd768..0d768404073 100644 --- a/python/ql/test/library-tests/frameworks/data/test.ql +++ b/python/ql/test/library-tests/frameworks/data/test.ql @@ -54,6 +54,7 @@ class Sinks extends ModelInput::SinkModelCsv { } // TODO: Commonly used tokens +// TODO: Should `instance()` be shorthand for `subClass*().getReturn()`? // TODO: Uniform tokens for fields // TODO: Non-positional arguments (including Named parameters) // TODO: Any argument @@ -74,6 +75,7 @@ class Sources extends ModelInput::SourceModelCsv { // Common tokens. "testlib;;Member[CommonTokens].Member[makePromise].ReturnValue.Awaited;test-source", // "testlib;;Member[CommonTokens].Member[Class].Instance;test-source", // + "testlib;;Member[CommonTokens].Member[Super].Subclass.Instance;test-source", // ] } } From ea01bcf5ec9055dbfb26672a30d87882ff36a2ef Mon Sep 17 00:00:00 2001 From: Erik Krogh Kristensen Date: Wed, 27 Apr 2022 11:17:15 +0200 Subject: [PATCH 11/51] have the Instance token be an alias for Subclass.ReturnValue --- .../frameworks/data/internal/ApiGraphModelsSpecific.qll | 2 +- python/ql/test/library-tests/frameworks/data/test.expected | 1 + python/ql/test/library-tests/frameworks/data/test.py | 7 ++++++- python/ql/test/library-tests/frameworks/data/test.ql | 2 -- 4 files changed, 8 insertions(+), 4 deletions(-) diff --git a/python/ql/lib/semmle/python/frameworks/data/internal/ApiGraphModelsSpecific.qll b/python/ql/lib/semmle/python/frameworks/data/internal/ApiGraphModelsSpecific.qll index d174f9416c7..113bcc71314 100644 --- a/python/ql/lib/semmle/python/frameworks/data/internal/ApiGraphModelsSpecific.qll +++ b/python/ql/lib/semmle/python/frameworks/data/internal/ApiGraphModelsSpecific.qll @@ -53,7 +53,7 @@ API::Node getExtraSuccessorFromNode(API::Node node, AccessPathToken token) { result = node.getMember(token.getAnArgument()) or token.getName() = "Instance" and - result = node.getReturn() // commonly used Token. In Python `Instance` is just an alias for `ReturnValue` + result = node.getASubclass*().getReturn() // In Python `Instance` is just an alias for `Subclass.ReturnValue` or token.getName() = "Awaited" and result = node.getAwaited() diff --git a/python/ql/test/library-tests/frameworks/data/test.expected b/python/ql/test/library-tests/frameworks/data/test.expected index 8f450186dd1..93697631fb3 100644 --- a/python/ql/test/library-tests/frameworks/data/test.expected +++ b/python/ql/test/library-tests/frameworks/data/test.expected @@ -41,6 +41,7 @@ isSource | test.py:39:11:39:20 | ControlFlowNode for Await | test-source | | test.py:41:8:41:27 | ControlFlowNode for Attribute() | test-source | | test.py:46:7:46:16 | ControlFlowNode for SubClass() | test-source | +| test.py:51:8:51:18 | ControlFlowNode for Sub2Class() | test-source | syntaxErrors | Member[foo | | Member[foo] .Member[bar] | diff --git a/python/ql/test/library-tests/frameworks/data/test.py b/python/ql/test/library-tests/frameworks/data/test.py index 707b7714dbc..aa0c9f2a6fc 100644 --- a/python/ql/test/library-tests/frameworks/data/test.py +++ b/python/ql/test/library-tests/frameworks/data/test.py @@ -43,4 +43,9 @@ inst = CommonTokens.Class() class SubClass (CommonTokens.Super): pass -sub = SubClass() \ No newline at end of file +sub = SubClass() + +class Sub2Class (CommonTokens.Class): + pass + +sub2 = Sub2Class() \ No newline at end of file diff --git a/python/ql/test/library-tests/frameworks/data/test.ql b/python/ql/test/library-tests/frameworks/data/test.ql index 0d768404073..bb19389bf9d 100644 --- a/python/ql/test/library-tests/frameworks/data/test.ql +++ b/python/ql/test/library-tests/frameworks/data/test.ql @@ -53,8 +53,6 @@ class Sinks extends ModelInput::SinkModelCsv { } } -// TODO: Commonly used tokens -// TODO: Should `instance()` be shorthand for `subClass*().getReturn()`? // TODO: Uniform tokens for fields // TODO: Non-positional arguments (including Named parameters) // TODO: Any argument From dc38aa8a96fc65097d5f44e2af27de8a161fc216 Mon Sep 17 00:00:00 2001 From: Erik Krogh Kristensen Date: Wed, 27 Apr 2022 11:35:17 +0200 Subject: [PATCH 12/51] add support for the Method[name] token --- .../frameworks/data/internal/ApiGraphModelsSpecific.qll | 5 ++++- python/ql/test/library-tests/frameworks/data/test.expected | 1 + python/ql/test/library-tests/frameworks/data/test.py | 4 +++- python/ql/test/library-tests/frameworks/data/test.ql | 3 ++- .../ql/test/library-tests/frameworks/data/warnings.expected | 5 ----- 5 files changed, 10 insertions(+), 8 deletions(-) diff --git a/python/ql/lib/semmle/python/frameworks/data/internal/ApiGraphModelsSpecific.qll b/python/ql/lib/semmle/python/frameworks/data/internal/ApiGraphModelsSpecific.qll index 113bcc71314..d2de1f21724 100644 --- a/python/ql/lib/semmle/python/frameworks/data/internal/ApiGraphModelsSpecific.qll +++ b/python/ql/lib/semmle/python/frameworks/data/internal/ApiGraphModelsSpecific.qll @@ -60,6 +60,9 @@ API::Node getExtraSuccessorFromNode(API::Node node, AccessPathToken token) { or token.getName() = "Subclass" and result = node.getASubclass*() + or + token.getName() = "Method" and + result = node.getMember(token.getAnArgument()).getReturn() // Some features don't have MaD tokens yet, they would need to be added to API-graphs first. // - decorators ("DecoratedClass", "DecoratedMember", "DecoratedParameter") // - Array/Map elements ("ArrayElement", "Element", "MapKey", "MapValue") @@ -164,7 +167,7 @@ predicate isExtraValidNoArgumentTokenInIdentifyingAccessPath(string name) { */ bindingset[name, argument] predicate isExtraValidTokenArgumentInIdentifyingAccessPath(string name, string argument) { - name = ["Member"] and + name = ["Member", "Method"] and exists(argument) or name = ["Argument", "Parameter"] and diff --git a/python/ql/test/library-tests/frameworks/data/test.expected b/python/ql/test/library-tests/frameworks/data/test.expected index 93697631fb3..72600cd8be5 100644 --- a/python/ql/test/library-tests/frameworks/data/test.expected +++ b/python/ql/test/library-tests/frameworks/data/test.expected @@ -42,6 +42,7 @@ isSource | test.py:41:8:41:27 | ControlFlowNode for Attribute() | test-source | | test.py:46:7:46:16 | ControlFlowNode for SubClass() | test-source | | test.py:51:8:51:18 | ControlFlowNode for Sub2Class() | test-source | +| test.py:53:7:53:16 | ControlFlowNode for Attribute() | test-source | syntaxErrors | Member[foo | | Member[foo] .Member[bar] | diff --git a/python/ql/test/library-tests/frameworks/data/test.py b/python/ql/test/library-tests/frameworks/data/test.py index aa0c9f2a6fc..c53b6c55fbe 100644 --- a/python/ql/test/library-tests/frameworks/data/test.py +++ b/python/ql/test/library-tests/frameworks/data/test.py @@ -48,4 +48,6 @@ sub = SubClass() class Sub2Class (CommonTokens.Class): pass -sub2 = Sub2Class() \ No newline at end of file +sub2 = Sub2Class() + +val = inst.foo() \ No newline at end of file diff --git a/python/ql/test/library-tests/frameworks/data/test.ql b/python/ql/test/library-tests/frameworks/data/test.ql index bb19389bf9d..43549a36d8e 100644 --- a/python/ql/test/library-tests/frameworks/data/test.ql +++ b/python/ql/test/library-tests/frameworks/data/test.ql @@ -57,7 +57,6 @@ class Sinks extends ModelInput::SinkModelCsv { // TODO: Non-positional arguments (including Named parameters) // TODO: Any argument // TODO: Test taint steps. -// TODO: Should `instance()` be shorthand for `subClass*().getReturn()`? // TODO: // There are no API-graph edges for: ArrayElement, Element, MapKey, MapValue (remove from valid tokens list) class Sources extends ModelInput::SourceModelCsv { // package;type;path;kind @@ -74,6 +73,8 @@ class Sources extends ModelInput::SourceModelCsv { "testlib;;Member[CommonTokens].Member[makePromise].ReturnValue.Awaited;test-source", // "testlib;;Member[CommonTokens].Member[Class].Instance;test-source", // "testlib;;Member[CommonTokens].Member[Super].Subclass.Instance;test-source", // + // method + "testlib;;Member[CommonTokens].Member[Class].Instance.Method[foo];test-source", // ] } } diff --git a/python/ql/test/library-tests/frameworks/data/warnings.expected b/python/ql/test/library-tests/frameworks/data/warnings.expected index b8242f31a85..5cebb548358 100644 --- a/python/ql/test/library-tests/frameworks/data/warnings.expected +++ b/python/ql/test/library-tests/frameworks/data/warnings.expected @@ -2,11 +2,6 @@ | CSV type row should have 5 columns but has 8: test;TooManyColumns;;;Member[Foo].Instance;too;many;columns | | Invalid argument '0-1' in token 'Argument[0-1]' in access path: Method[foo].Argument[0-1] | | Invalid argument '*' in token 'Argument[*]' in access path: Method[foo].Argument[*] | -| Invalid argument 'foo' in token 'Method[foo]' in access path: Method[foo].Arg[0] | -| Invalid argument 'foo' in token 'Method[foo]' in access path: Method[foo].Argument | -| Invalid argument 'foo' in token 'Method[foo]' in access path: Method[foo].Argument[0-1] | -| Invalid argument 'foo' in token 'Method[foo]' in access path: Method[foo].Argument[*] | -| Invalid argument 'foo' in token 'Method[foo]' in access path: Method[foo].Member | | Invalid token 'Argument' is missing its arguments, in access path: Method[foo].Argument | | Invalid token 'Member' is missing its arguments, in access path: Method[foo].Member | | Invalid token name 'Arg' in access path: Method[foo].Arg[0] | From 547047ef19c0c3deea2690a0f16170828d535b47 Mon Sep 17 00:00:00 2001 From: Erik Krogh Kristensen Date: Wed, 27 Apr 2022 13:48:42 +0200 Subject: [PATCH 13/51] add self parameters to API-graphs, and add support for self parameters in MaD --- python/ql/lib/semmle/python/ApiGraphs.qll | 24 +++++++++++++++++++ .../data/internal/ApiGraphModelsSpecific.qll | 6 ++++- .../frameworks/data/test.expected | 2 ++ .../library-tests/frameworks/data/test.py | 10 +++++++- .../library-tests/frameworks/data/test.ql | 5 +++- 5 files changed, 44 insertions(+), 3 deletions(-) diff --git a/python/ql/lib/semmle/python/ApiGraphs.qll b/python/ql/lib/semmle/python/ApiGraphs.qll index 53c78dd3c19..61c57557bd9 100644 --- a/python/ql/lib/semmle/python/ApiGraphs.qll +++ b/python/ql/lib/semmle/python/ApiGraphs.qll @@ -136,6 +136,9 @@ module API { result = this.getASuccessor(Label::keywordParameter(name)) } + /** Gets the node representing the self parameter */ + Node getSelfParameter() { result = this.getASuccessor(Label::selfParameter()) } + /** * Gets the number of parameters of the function represented by this node. */ @@ -315,6 +318,12 @@ module API { /** Gets the API node for a parameter of this invocation. */ Node getAParameter() { result = this.getParameter(_) } + /** Gets the object that this method-call is being called on, if this is a method-call */ + Node getSelfParameter() { + result.getARhs() = this.(DataFlow::MethodCallNode).getObject() and + result = callee.getSelfParameter() + } + /** Gets the API node for the keyword parameter `name` of this invocation. */ Node getKeywordParameter(string name) { result = callee.getKeywordParameter(name) and @@ -595,6 +604,9 @@ module API { lbl = Label::keywordParameter(name) and ref.asExpr() = fn.getInnerScope().getArgByName(name) ) + or + lbl = Label::selfParameter() and + ref.asExpr() = any(PY::Parameter p | p = fn.getInnerScope().getAnArg() and p.isSelf()) ) or // Built-ins, treated as members of the module `builtins` @@ -661,6 +673,9 @@ module API { exists(string name | lbl = Label::keywordParameter(name) | arg = pred.getACall().getArgByName(name) ) + or + lbl = Label::selfParameter() and + arg = pred.getACall().(DataFlow::MethodCallNode).getObject() ) } @@ -777,6 +792,7 @@ module API { or exists(any(PY::Function f).getArgByName(name)) } or + MkLabelSelfParameter() or MkLabelReturn() or MkLabelSubclass() or MkLabelAwait() @@ -834,6 +850,11 @@ module API { string getName() { result = name } } + /** A label for the self parameter. */ + class LabelSelfParameter extends ApiLabel, MkLabelSelfParameter { + override string toString() { result = "getSelfParameter()" } + } + /** A label that gets the return value of a function. */ class LabelReturn extends ApiLabel, MkLabelReturn { override string toString() { result = "getReturn()" } @@ -873,6 +894,9 @@ module API { /** Gets the `parameter` edge label for the keyword parameter `name`. */ LabelKeywordParameter keywordParameter(string name) { result.getName() = name } + /** Gets the edge label for the self parameter. */ + LabelSelfParameter selfParameter() { any() } + /** Gets the `return` edge label. */ LabelReturn return() { any() } diff --git a/python/ql/lib/semmle/python/frameworks/data/internal/ApiGraphModelsSpecific.qll b/python/ql/lib/semmle/python/frameworks/data/internal/ApiGraphModelsSpecific.qll index d2de1f21724..6136aa33bef 100644 --- a/python/ql/lib/semmle/python/frameworks/data/internal/ApiGraphModelsSpecific.qll +++ b/python/ql/lib/semmle/python/frameworks/data/internal/ApiGraphModelsSpecific.qll @@ -63,6 +63,10 @@ API::Node getExtraSuccessorFromNode(API::Node node, AccessPathToken token) { or token.getName() = "Method" and result = node.getMember(token.getAnArgument()).getReturn() + or + token.getName() = ["Argument", "Parameter"] and + token.getAnArgument() = "self" and + result = node.getSelfParameter() // Some features don't have MaD tokens yet, they would need to be added to API-graphs first. // - decorators ("DecoratedClass", "DecoratedMember", "DecoratedParameter") // - Array/Map elements ("ArrayElement", "Element", "MapKey", "MapValue") @@ -78,7 +82,7 @@ API::Node getExtraSuccessorFromInvoke(API::CallNode node, AccessPathToken token) or token.getName() = "Argument" and token.getAnArgument() = "self" and - result.getARhs() = node.(DataFlow::MethodCallNode).getObject() // TODO: Get proper support for this in API-graphs? + result = node.getSelfParameter() or token.getName() = "Argument" and exists(string arg | arg + ":" = token.getAnArgument() | result = node.getKeywordParameter(arg)) diff --git a/python/ql/test/library-tests/frameworks/data/test.expected b/python/ql/test/library-tests/frameworks/data/test.expected index 72600cd8be5..36af5305a63 100644 --- a/python/ql/test/library-tests/frameworks/data/test.expected +++ b/python/ql/test/library-tests/frameworks/data/test.expected @@ -23,6 +23,7 @@ isSink | test.py:33:22:33:24 | ControlFlowNode for one | test-source | | test.py:33:27:33:29 | ControlFlowNode for two | test-source | | test.py:33:32:33:36 | ControlFlowNode for three | test-source | +| test.py:57:7:57:12 | ControlFlowNode for ArgPos | test-source | isSource | test.py:3:5:3:15 | ControlFlowNode for getSource() | test-source | | test.py:9:8:9:14 | ControlFlowNode for alias() | test-source | @@ -43,6 +44,7 @@ isSource | test.py:46:7:46:16 | ControlFlowNode for SubClass() | test-source | | test.py:51:8:51:18 | ControlFlowNode for Sub2Class() | test-source | | test.py:53:7:53:16 | ControlFlowNode for Attribute() | test-source | +| test.py:60:13:60:16 | ControlFlowNode for self | test-source | syntaxErrors | Member[foo | | Member[foo] .Member[bar] | diff --git a/python/ql/test/library-tests/frameworks/data/test.py b/python/ql/test/library-tests/frameworks/data/test.py index c53b6c55fbe..920378d3470 100644 --- a/python/ql/test/library-tests/frameworks/data/test.py +++ b/python/ql/test/library-tests/frameworks/data/test.py @@ -50,4 +50,12 @@ class Sub2Class (CommonTokens.Class): sub2 = Sub2Class() -val = inst.foo() \ No newline at end of file +val = inst.foo() + +from testlib import ArgPos + +val = ArgPos.selfThing(arg, named=2) + +class SubClass (ArgPos.MyClass): + def foo(self, arg, named=2): + pass \ No newline at end of file diff --git a/python/ql/test/library-tests/frameworks/data/test.ql b/python/ql/test/library-tests/frameworks/data/test.ql index 43549a36d8e..81c105aecff 100644 --- a/python/ql/test/library-tests/frameworks/data/test.ql +++ b/python/ql/test/library-tests/frameworks/data/test.ql @@ -49,11 +49,12 @@ class Sinks extends ModelInput::SinkModelCsv { // callsite filter. "testlib;;Member[CallFilter].Member[arityOne].WithArity[1].Argument[0..];test-source", // "testlib;;Member[CallFilter].Member[twoOrMore].WithArity[2..].Argument[0..];test-source", // + // testing non-positional arguments + "testlib;;Member[ArgPos].Member[selfThing].Argument[self];test-source", // ] } } -// TODO: Uniform tokens for fields // TODO: Non-positional arguments (including Named parameters) // TODO: Any argument // TODO: Test taint steps. @@ -75,6 +76,8 @@ class Sources extends ModelInput::SourceModelCsv { "testlib;;Member[CommonTokens].Member[Super].Subclass.Instance;test-source", // // method "testlib;;Member[CommonTokens].Member[Class].Instance.Method[foo];test-source", // + // testing non-positional arguments + "testlib;;Member[ArgPos].Member[MyClass].Subclass.Member[foo].Parameter[self];test-source", // ] } } From c1d3738fb8b61f08436cd6b08620352a36964e76 Mon Sep 17 00:00:00 2001 From: Erik Krogh Kristensen Date: Mon, 2 May 2022 12:52:02 +0200 Subject: [PATCH 14/51] fix API-graphs such that the first parameter is the first non-self parameter --- python/ql/lib/semmle/python/ApiGraphs.qll | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/python/ql/lib/semmle/python/ApiGraphs.qll b/python/ql/lib/semmle/python/ApiGraphs.qll index 61c57557bd9..77574ab44ad 100644 --- a/python/ql/lib/semmle/python/ApiGraphs.qll +++ b/python/ql/lib/semmle/python/ApiGraphs.qll @@ -595,8 +595,12 @@ module API { exists(DataFlow::Node def, PY::CallableExpr fn | rhs(base, def) and fn = trackDefNode(def).asExpr() | - exists(int i | - lbl = Label::parameter(i) and + exists(int i, int offset | + if exists(PY::Parameter p | p = fn.getInnerScope().getAnArg() and p.isSelf()) + then offset = 1 + else offset = 0 + | + lbl = Label::parameter(i - offset) and ref.asExpr() = fn.getInnerScope().getArg(i) ) or From 413d182bcfdf34f6f81237ebb530e6de3df0d3d9 Mon Sep 17 00:00:00 2001 From: Erik Krogh Kristensen Date: Mon, 2 May 2022 12:56:44 +0200 Subject: [PATCH 15/51] add support for named parameters --- .../data/internal/ApiGraphModelsSpecific.qll | 10 ++++++++-- python/ql/test/library-tests/frameworks/data/test.py | 7 +++++-- python/ql/test/library-tests/frameworks/data/test.ql | 3 ++- 3 files changed, 15 insertions(+), 5 deletions(-) diff --git a/python/ql/lib/semmle/python/frameworks/data/internal/ApiGraphModelsSpecific.qll b/python/ql/lib/semmle/python/frameworks/data/internal/ApiGraphModelsSpecific.qll index 6136aa33bef..4e6c3e571a8 100644 --- a/python/ql/lib/semmle/python/frameworks/data/internal/ApiGraphModelsSpecific.qll +++ b/python/ql/lib/semmle/python/frameworks/data/internal/ApiGraphModelsSpecific.qll @@ -65,8 +65,14 @@ API::Node getExtraSuccessorFromNode(API::Node node, AccessPathToken token) { result = node.getMember(token.getAnArgument()).getReturn() or token.getName() = ["Argument", "Parameter"] and - token.getAnArgument() = "self" and - result = node.getSelfParameter() + ( + token.getAnArgument() = "self" and + result = node.getSelfParameter() + or + exists(string name | token.getAnArgument() = name + ":" | + result = node.getKeywordParameter(name) + ) + ) // Some features don't have MaD tokens yet, they would need to be added to API-graphs first. // - decorators ("DecoratedClass", "DecoratedMember", "DecoratedParameter") // - Array/Map elements ("ArrayElement", "Element", "MapKey", "MapValue") diff --git a/python/ql/test/library-tests/frameworks/data/test.py b/python/ql/test/library-tests/frameworks/data/test.py index 920378d3470..67f77a54520 100644 --- a/python/ql/test/library-tests/frameworks/data/test.py +++ b/python/ql/test/library-tests/frameworks/data/test.py @@ -57,5 +57,8 @@ from testlib import ArgPos val = ArgPos.selfThing(arg, named=2) class SubClass (ArgPos.MyClass): - def foo(self, arg, named=2): - pass \ No newline at end of file + def foo(self, arg, named=2, otherName=3): + pass + + def secondAndAfter(self, arg1, arg2, arg3, arg4, arg5): + pass diff --git a/python/ql/test/library-tests/frameworks/data/test.ql b/python/ql/test/library-tests/frameworks/data/test.ql index 81c105aecff..acc78789459 100644 --- a/python/ql/test/library-tests/frameworks/data/test.ql +++ b/python/ql/test/library-tests/frameworks/data/test.ql @@ -55,7 +55,6 @@ class Sinks extends ModelInput::SinkModelCsv { } } -// TODO: Non-positional arguments (including Named parameters) // TODO: Any argument // TODO: Test taint steps. // TODO: // There are no API-graph edges for: ArrayElement, Element, MapKey, MapValue (remove from valid tokens list) @@ -78,6 +77,8 @@ class Sources extends ModelInput::SourceModelCsv { "testlib;;Member[CommonTokens].Member[Class].Instance.Method[foo];test-source", // // testing non-positional arguments "testlib;;Member[ArgPos].Member[MyClass].Subclass.Member[foo].Parameter[self];test-source", // + "testlib;;Member[ArgPos].Member[MyClass].Subclass.Member[foo].Parameter[named:];test-source", // + "testlib;;Member[ArgPos].Member[MyClass].Subclass.Member[secondAndAfter].Parameter[1..];test-source", // ] } } From b1fa7f86a837b46b03afd1b9025ae9f13ee3daee Mon Sep 17 00:00:00 2001 From: Erik Krogh Kristensen Date: Mon, 2 May 2022 12:58:15 +0200 Subject: [PATCH 16/51] add support for the any argument tokens --- .../frameworks/data/internal/ApiGraphModelsSpecific.qll | 8 +++++++- .../ql/test/library-tests/frameworks/data/test.expected | 9 +++++++++ python/ql/test/library-tests/frameworks/data/test.py | 3 +++ python/ql/test/library-tests/frameworks/data/test.ql | 3 +++ 4 files changed, 22 insertions(+), 1 deletion(-) diff --git a/python/ql/lib/semmle/python/frameworks/data/internal/ApiGraphModelsSpecific.qll b/python/ql/lib/semmle/python/frameworks/data/internal/ApiGraphModelsSpecific.qll index 4e6c3e571a8..abe41fd4e66 100644 --- a/python/ql/lib/semmle/python/frameworks/data/internal/ApiGraphModelsSpecific.qll +++ b/python/ql/lib/semmle/python/frameworks/data/internal/ApiGraphModelsSpecific.qll @@ -72,6 +72,12 @@ API::Node getExtraSuccessorFromNode(API::Node node, AccessPathToken token) { exists(string name | token.getAnArgument() = name + ":" | result = node.getKeywordParameter(name) ) + or + token.getAnArgument() = "any" and + result = [node.getParameter(_), node.getKeywordParameter(_)] + or + token.getAnArgument() = "any-named" and + result = node.getKeywordParameter(_) ) // Some features don't have MaD tokens yet, they would need to be added to API-graphs first. // - decorators ("DecoratedClass", "DecoratedMember", "DecoratedParameter") @@ -182,7 +188,7 @@ predicate isExtraValidTokenArgumentInIdentifyingAccessPath(string name, string a or name = ["Argument", "Parameter"] and ( - argument = "self" + argument = ["self", "any", "any-named"] or argument.regexpMatch("\\w+:") // keyword argument ) diff --git a/python/ql/test/library-tests/frameworks/data/test.expected b/python/ql/test/library-tests/frameworks/data/test.expected index 36af5305a63..e31b3a38228 100644 --- a/python/ql/test/library-tests/frameworks/data/test.expected +++ b/python/ql/test/library-tests/frameworks/data/test.expected @@ -24,6 +24,10 @@ isSink | test.py:33:27:33:29 | ControlFlowNode for two | test-source | | test.py:33:32:33:36 | ControlFlowNode for three | test-source | | test.py:57:7:57:12 | ControlFlowNode for ArgPos | test-source | +| test.py:66:17:66:20 | ControlFlowNode for arg1 | test-source | +| test.py:66:23:66:26 | ControlFlowNode for arg2 | test-source | +| test.py:66:34:66:43 | ControlFlowNode for namedThing | test-source | +| test.py:67:34:67:44 | ControlFlowNode for secondNamed | test-source | isSource | test.py:3:5:3:15 | ControlFlowNode for getSource() | test-source | | test.py:9:8:9:14 | ControlFlowNode for alias() | test-source | @@ -45,6 +49,11 @@ isSource | test.py:51:8:51:18 | ControlFlowNode for Sub2Class() | test-source | | test.py:53:7:53:16 | ControlFlowNode for Attribute() | test-source | | test.py:60:13:60:16 | ControlFlowNode for self | test-source | +| test.py:60:24:60:28 | ControlFlowNode for named | test-source | +| test.py:63:36:63:39 | ControlFlowNode for arg2 | test-source | +| test.py:63:42:63:45 | ControlFlowNode for arg3 | test-source | +| test.py:63:48:63:51 | ControlFlowNode for arg4 | test-source | +| test.py:63:54:63:57 | ControlFlowNode for arg5 | test-source | syntaxErrors | Member[foo | | Member[foo] .Member[bar] | diff --git a/python/ql/test/library-tests/frameworks/data/test.py b/python/ql/test/library-tests/frameworks/data/test.py index 67f77a54520..dbc241c71e9 100644 --- a/python/ql/test/library-tests/frameworks/data/test.py +++ b/python/ql/test/library-tests/frameworks/data/test.py @@ -62,3 +62,6 @@ class SubClass (ArgPos.MyClass): def secondAndAfter(self, arg1, arg2, arg3, arg4, arg5): pass + +ArgPos.anyParam(arg1, arg2, name=namedThing) +ArgPos.anyNamed(arg4, arg5, name=secondNamed) \ No newline at end of file diff --git a/python/ql/test/library-tests/frameworks/data/test.ql b/python/ql/test/library-tests/frameworks/data/test.ql index acc78789459..d2068df23e2 100644 --- a/python/ql/test/library-tests/frameworks/data/test.ql +++ b/python/ql/test/library-tests/frameworks/data/test.ql @@ -51,6 +51,9 @@ class Sinks extends ModelInput::SinkModelCsv { "testlib;;Member[CallFilter].Member[twoOrMore].WithArity[2..].Argument[0..];test-source", // // testing non-positional arguments "testlib;;Member[ArgPos].Member[selfThing].Argument[self];test-source", // + // any argument + "testlib;;Member[ArgPos].Member[anyParam].Argument[any];test-source", // + "testlib;;Member[ArgPos].Member[anyNamed].Argument[any-named];test-source", // ] } } From a8790412dda666894fd73cbb913f6186b43762a2 Mon Sep 17 00:00:00 2001 From: Erik Krogh Kristensen Date: Wed, 27 Apr 2022 22:49:43 +0200 Subject: [PATCH 17/51] add support for the Argument[any] and Argument[any-named] tokens --- .../data/internal/ApiGraphModelsSpecific.qll | 19 +++++++++++++------ .../library-tests/frameworks/data/test.ql | 3 +-- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/python/ql/lib/semmle/python/frameworks/data/internal/ApiGraphModelsSpecific.qll b/python/ql/lib/semmle/python/frameworks/data/internal/ApiGraphModelsSpecific.qll index abe41fd4e66..206a6ff1e52 100644 --- a/python/ql/lib/semmle/python/frameworks/data/internal/ApiGraphModelsSpecific.qll +++ b/python/ql/lib/semmle/python/frameworks/data/internal/ApiGraphModelsSpecific.qll @@ -92,12 +92,19 @@ API::Node getExtraSuccessorFromInvoke(API::CallNode node, AccessPathToken token) token.getName() = "Instance" and result = node.getReturn() or - token.getName() = "Argument" and - token.getAnArgument() = "self" and - result = node.getSelfParameter() - or - token.getName() = "Argument" and - exists(string arg | arg + ":" = token.getAnArgument() | result = node.getKeywordParameter(arg)) + token.getName() = ["Argument", "Parameter"] and + ( + token.getAnArgument() = "self" and + result = node.getSelfParameter() + or + token.getAnArgument() = "any" and + result = [node.getParameter(_), node.getKeywordParameter(_)] + or + token.getAnArgument() = "any-named" and + result = node.getKeywordParameter(_) + or + exists(string arg | arg + ":" = token.getAnArgument() | result = node.getKeywordParameter(arg)) + ) } /** diff --git a/python/ql/test/library-tests/frameworks/data/test.ql b/python/ql/test/library-tests/frameworks/data/test.ql index d2068df23e2..9986011e89f 100644 --- a/python/ql/test/library-tests/frameworks/data/test.ql +++ b/python/ql/test/library-tests/frameworks/data/test.ql @@ -58,8 +58,7 @@ class Sinks extends ModelInput::SinkModelCsv { } } -// TODO: Any argument -// TODO: Test taint steps. +// TODO: Test taint steps (include that the base path may end with ".Call") // TODO: // There are no API-graph edges for: ArrayElement, Element, MapKey, MapValue (remove from valid tokens list) class Sources extends ModelInput::SourceModelCsv { // package;type;path;kind From 649df1dd317100c78a34c4b712c133422e224a37 Mon Sep 17 00:00:00 2001 From: Erik Krogh Kristensen Date: Wed, 27 Apr 2022 23:48:39 +0200 Subject: [PATCH 18/51] simple taint-flow test --- .../frameworks/data/test.expected | 5 +++ .../library-tests/frameworks/data/test.py | 7 +++- .../library-tests/frameworks/data/test.ql | 33 +++++++++---------- 3 files changed, 26 insertions(+), 19 deletions(-) diff --git a/python/ql/test/library-tests/frameworks/data/test.expected b/python/ql/test/library-tests/frameworks/data/test.expected index e31b3a38228..eafa55318aa 100644 --- a/python/ql/test/library-tests/frameworks/data/test.expected +++ b/python/ql/test/library-tests/frameworks/data/test.expected @@ -4,6 +4,7 @@ taintFlow | test.py:9:8:9:14 | ControlFlowNode for alias() | test.py:9:8:9:14 | ControlFlowNode for alias() | | test.py:10:8:10:22 | ControlFlowNode for Attribute() | test.py:10:8:10:22 | ControlFlowNode for Attribute() | | test.py:11:8:11:30 | ControlFlowNode for Attribute() | test.py:11:8:11:30 | ControlFlowNode for Attribute() | +| test.py:71:28:71:38 | ControlFlowNode for getSource() | test.py:71:8:71:39 | ControlFlowNode for Attribute() | isSink | test.py:4:8:4:8 | ControlFlowNode for x | test-sink | | test.py:7:17:7:17 | ControlFlowNode for x | test-sink | @@ -28,6 +29,8 @@ isSink | test.py:66:23:66:26 | ControlFlowNode for arg2 | test-source | | test.py:66:34:66:43 | ControlFlowNode for namedThing | test-source | | test.py:67:34:67:44 | ControlFlowNode for secondNamed | test-source | +| test.py:71:8:71:39 | ControlFlowNode for Attribute() | test-sink | +| test.py:72:8:72:47 | ControlFlowNode for Attribute() | test-sink | isSource | test.py:3:5:3:15 | ControlFlowNode for getSource() | test-source | | test.py:9:8:9:14 | ControlFlowNode for alias() | test-source | @@ -54,6 +57,8 @@ isSource | test.py:63:42:63:45 | ControlFlowNode for arg3 | test-source | | test.py:63:48:63:51 | ControlFlowNode for arg4 | test-source | | test.py:63:54:63:57 | ControlFlowNode for arg5 | test-source | +| test.py:71:28:71:38 | ControlFlowNode for getSource() | test-source | +| test.py:72:36:72:46 | ControlFlowNode for getSource() | test-source | syntaxErrors | Member[foo | | Member[foo] .Member[bar] | diff --git a/python/ql/test/library-tests/frameworks/data/test.py b/python/ql/test/library-tests/frameworks/data/test.py index dbc241c71e9..8780838930d 100644 --- a/python/ql/test/library-tests/frameworks/data/test.py +++ b/python/ql/test/library-tests/frameworks/data/test.py @@ -64,4 +64,9 @@ class SubClass (ArgPos.MyClass): pass ArgPos.anyParam(arg1, arg2, name=namedThing) -ArgPos.anyNamed(arg4, arg5, name=secondNamed) \ No newline at end of file +ArgPos.anyNamed(arg4, arg5, name=secondNamed) + +from testlib import Steps + +mySink(Steps.preserveTaint(getSource())) # FLOW +mySink(Steps.preserveTaint("safe", getSource())) # NO FLOW \ No newline at end of file diff --git a/python/ql/test/library-tests/frameworks/data/test.ql b/python/ql/test/library-tests/frameworks/data/test.ql index 9986011e89f..e2fd53b142a 100644 --- a/python/ql/test/library-tests/frameworks/data/test.ql +++ b/python/ql/test/library-tests/frameworks/data/test.ql @@ -5,24 +5,21 @@ import semmle.python.dataflow.new.TaintTracking import semmle.python.dataflow.new.DataFlow private import semmle.python.ApiGraphs -// TODO: -/* - * class Steps extends ModelInput::SummaryModelCsv { - * override predicate row(string row) { - * // package;type;path;input;output;kind - * row = - * [ - * "testlib;;Member[preserveTaint];Argument[0];ReturnValue;taint", - * "testlib;;Member[taintIntoCallback];Argument[0];Argument[1..2].Parameter[0];taint", - * "testlib;;Member[taintIntoCallbackThis];Argument[0];Argument[1..2].Parameter[this];taint", - * "testlib;;Member[preserveArgZeroAndTwo];Argument[0,2];ReturnValue;taint", - * "testlib;;Member[preserveAllButFirstArgument];Argument[1..];ReturnValue;taint", - * "testlib;;Member[preserveAllIfCall].Call;Argument[0..];ReturnValue;taint", - * "testlib;;Member[getSource].ReturnValue.Member[continue];Argument[this];ReturnValue;taint", - * ] - * } - * } - */ +class Steps extends ModelInput::SummaryModelCsv { + override predicate row(string row) { + // package;type;path;input;output;kind + row = + [ + "testlib;;Member[Steps].Member[preserveTaint];Argument[0];ReturnValue;taint", + // "testlib;;Member[Steps].Member[taintIntoCallback];Argument[0];Argument[1..2].Parameter[0];taint", + // "testlib;;Member[Steps].Member[taintIntoCallbackThis];Argument[0];Argument[1..2].Parameter[this];taint", + // "testlib;;Member[Steps].Member[preserveArgZeroAndTwo];Argument[0,2];ReturnValue;taint", + // "testlib;;Member[Steps].Member[preserveAllButFirstArgument];Argument[1..];ReturnValue;taint", + // "testlib;;Member[Steps].Member[preserveAllIfCall].Call;Argument[0..];ReturnValue;taint", + // "testlib;;Member[Steps].Member[getSource].ReturnValue.Member[continue];Argument[this];ReturnValue;taint", + ] + } +} class Types extends ModelInput::TypeModelCsv { override predicate row(string row) { From 0f1e070d82409691a1e32abea69efdec06a365bc Mon Sep 17 00:00:00 2001 From: Erik Krogh Kristensen Date: Wed, 27 Apr 2022 23:51:28 +0200 Subject: [PATCH 19/51] second test of taint steps --- .../ql/test/library-tests/frameworks/data/test.expected | 6 ++++++ python/ql/test/library-tests/frameworks/data/test.py | 9 ++++++++- python/ql/test/library-tests/frameworks/data/test.ql | 2 +- 3 files changed, 15 insertions(+), 2 deletions(-) diff --git a/python/ql/test/library-tests/frameworks/data/test.expected b/python/ql/test/library-tests/frameworks/data/test.expected index eafa55318aa..d336ac76b88 100644 --- a/python/ql/test/library-tests/frameworks/data/test.expected +++ b/python/ql/test/library-tests/frameworks/data/test.expected @@ -5,6 +5,8 @@ taintFlow | test.py:10:8:10:22 | ControlFlowNode for Attribute() | test.py:10:8:10:22 | ControlFlowNode for Attribute() | | test.py:11:8:11:30 | ControlFlowNode for Attribute() | test.py:11:8:11:30 | ControlFlowNode for Attribute() | | test.py:71:28:71:38 | ControlFlowNode for getSource() | test.py:71:8:71:39 | ControlFlowNode for Attribute() | +| test.py:75:5:75:15 | ControlFlowNode for getSource() | test.py:76:22:76:22 | ControlFlowNode for x | +| test.py:75:5:75:15 | ControlFlowNode for getSource() | test.py:77:22:77:22 | ControlFlowNode for y | isSink | test.py:4:8:4:8 | ControlFlowNode for x | test-sink | | test.py:7:17:7:17 | ControlFlowNode for x | test-sink | @@ -31,6 +33,9 @@ isSink | test.py:67:34:67:44 | ControlFlowNode for secondNamed | test-source | | test.py:71:8:71:39 | ControlFlowNode for Attribute() | test-sink | | test.py:72:8:72:47 | ControlFlowNode for Attribute() | test-sink | +| test.py:76:22:76:22 | ControlFlowNode for x | test-sink | +| test.py:77:22:77:22 | ControlFlowNode for y | test-sink | +| test.py:78:22:78:22 | ControlFlowNode for z | test-sink | isSource | test.py:3:5:3:15 | ControlFlowNode for getSource() | test-source | | test.py:9:8:9:14 | ControlFlowNode for alias() | test-source | @@ -59,6 +64,7 @@ isSource | test.py:63:54:63:57 | ControlFlowNode for arg5 | test-source | | test.py:71:28:71:38 | ControlFlowNode for getSource() | test-source | | test.py:72:36:72:46 | ControlFlowNode for getSource() | test-source | +| test.py:75:5:75:15 | ControlFlowNode for getSource() | test-source | syntaxErrors | Member[foo | | Member[foo] .Member[bar] | diff --git a/python/ql/test/library-tests/frameworks/data/test.py b/python/ql/test/library-tests/frameworks/data/test.py index 8780838930d..d25ceb97fd2 100644 --- a/python/ql/test/library-tests/frameworks/data/test.py +++ b/python/ql/test/library-tests/frameworks/data/test.py @@ -69,4 +69,11 @@ ArgPos.anyNamed(arg4, arg5, name=secondNamed) from testlib import Steps mySink(Steps.preserveTaint(getSource())) # FLOW -mySink(Steps.preserveTaint("safe", getSource())) # NO FLOW \ No newline at end of file +mySink(Steps.preserveTaint("safe", getSource())) # NO FLOW + +Steps.taintIntoCallback( + getSource(), + lambda x: mySink(x), # FLOW + lambda y: mySink(y), # FLOW + lambda z: mySink(z) # NO FLOW +) \ No newline at end of file diff --git a/python/ql/test/library-tests/frameworks/data/test.ql b/python/ql/test/library-tests/frameworks/data/test.ql index e2fd53b142a..21c246fac4a 100644 --- a/python/ql/test/library-tests/frameworks/data/test.ql +++ b/python/ql/test/library-tests/frameworks/data/test.ql @@ -11,7 +11,7 @@ class Steps extends ModelInput::SummaryModelCsv { row = [ "testlib;;Member[Steps].Member[preserveTaint];Argument[0];ReturnValue;taint", - // "testlib;;Member[Steps].Member[taintIntoCallback];Argument[0];Argument[1..2].Parameter[0];taint", + "testlib;;Member[Steps].Member[taintIntoCallback];Argument[0];Argument[1..2].Parameter[0];taint", // "testlib;;Member[Steps].Member[taintIntoCallbackThis];Argument[0];Argument[1..2].Parameter[this];taint", // "testlib;;Member[Steps].Member[preserveArgZeroAndTwo];Argument[0,2];ReturnValue;taint", // "testlib;;Member[Steps].Member[preserveAllButFirstArgument];Argument[1..];ReturnValue;taint", From 894252dfa723ebcf43814a26ffe321614e84e90e Mon Sep 17 00:00:00 2001 From: Erik Krogh Kristensen Date: Thu, 28 Apr 2022 09:56:04 +0200 Subject: [PATCH 20/51] third test of taint steps --- .../ql/test/library-tests/frameworks/data/test.expected | 8 ++++++++ python/ql/test/library-tests/frameworks/data/test.py | 6 +++++- python/ql/test/library-tests/frameworks/data/test.ql | 6 +----- 3 files changed, 14 insertions(+), 6 deletions(-) diff --git a/python/ql/test/library-tests/frameworks/data/test.expected b/python/ql/test/library-tests/frameworks/data/test.expected index d336ac76b88..2b40ba5c4fb 100644 --- a/python/ql/test/library-tests/frameworks/data/test.expected +++ b/python/ql/test/library-tests/frameworks/data/test.expected @@ -7,6 +7,8 @@ taintFlow | test.py:71:28:71:38 | ControlFlowNode for getSource() | test.py:71:8:71:39 | ControlFlowNode for Attribute() | | test.py:75:5:75:15 | ControlFlowNode for getSource() | test.py:76:22:76:22 | ControlFlowNode for x | | test.py:75:5:75:15 | ControlFlowNode for getSource() | test.py:77:22:77:22 | ControlFlowNode for y | +| test.py:81:36:81:46 | ControlFlowNode for getSource() | test.py:81:8:81:47 | ControlFlowNode for Attribute() | +| test.py:83:50:83:60 | ControlFlowNode for getSource() | test.py:83:8:83:61 | ControlFlowNode for Attribute() | isSink | test.py:4:8:4:8 | ControlFlowNode for x | test-sink | | test.py:7:17:7:17 | ControlFlowNode for x | test-sink | @@ -36,6 +38,9 @@ isSink | test.py:76:22:76:22 | ControlFlowNode for x | test-sink | | test.py:77:22:77:22 | ControlFlowNode for y | test-sink | | test.py:78:22:78:22 | ControlFlowNode for z | test-sink | +| test.py:81:8:81:47 | ControlFlowNode for Attribute() | test-sink | +| test.py:82:8:82:54 | ControlFlowNode for Attribute() | test-sink | +| test.py:83:8:83:61 | ControlFlowNode for Attribute() | test-sink | isSource | test.py:3:5:3:15 | ControlFlowNode for getSource() | test-source | | test.py:9:8:9:14 | ControlFlowNode for alias() | test-source | @@ -65,6 +70,9 @@ isSource | test.py:71:28:71:38 | ControlFlowNode for getSource() | test-source | | test.py:72:36:72:46 | ControlFlowNode for getSource() | test-source | | test.py:75:5:75:15 | ControlFlowNode for getSource() | test-source | +| test.py:81:36:81:46 | ControlFlowNode for getSource() | test-source | +| test.py:82:43:82:53 | ControlFlowNode for getSource() | test-source | +| test.py:83:50:83:60 | ControlFlowNode for getSource() | test-source | syntaxErrors | Member[foo | | Member[foo] .Member[bar] | diff --git a/python/ql/test/library-tests/frameworks/data/test.py b/python/ql/test/library-tests/frameworks/data/test.py index d25ceb97fd2..a7388f53832 100644 --- a/python/ql/test/library-tests/frameworks/data/test.py +++ b/python/ql/test/library-tests/frameworks/data/test.py @@ -76,4 +76,8 @@ Steps.taintIntoCallback( lambda x: mySink(x), # FLOW lambda y: mySink(y), # FLOW lambda z: mySink(z) # NO FLOW -) \ No newline at end of file +) + +mySink(Steps.preserveArgZeroAndTwo(getSource())) # FLOW +mySink(Steps.preserveArgZeroAndTwo("foo", getSource())) # NO FLOW +mySink(Steps.preserveArgZeroAndTwo("foo", "bar", getSource())) # FLOW \ No newline at end of file diff --git a/python/ql/test/library-tests/frameworks/data/test.ql b/python/ql/test/library-tests/frameworks/data/test.ql index 21c246fac4a..c9cb3033719 100644 --- a/python/ql/test/library-tests/frameworks/data/test.ql +++ b/python/ql/test/library-tests/frameworks/data/test.ql @@ -12,11 +12,8 @@ class Steps extends ModelInput::SummaryModelCsv { [ "testlib;;Member[Steps].Member[preserveTaint];Argument[0];ReturnValue;taint", "testlib;;Member[Steps].Member[taintIntoCallback];Argument[0];Argument[1..2].Parameter[0];taint", - // "testlib;;Member[Steps].Member[taintIntoCallbackThis];Argument[0];Argument[1..2].Parameter[this];taint", - // "testlib;;Member[Steps].Member[preserveArgZeroAndTwo];Argument[0,2];ReturnValue;taint", + "testlib;;Member[Steps].Member[preserveArgZeroAndTwo];Argument[0,2];ReturnValue;taint", // "testlib;;Member[Steps].Member[preserveAllButFirstArgument];Argument[1..];ReturnValue;taint", - // "testlib;;Member[Steps].Member[preserveAllIfCall].Call;Argument[0..];ReturnValue;taint", - // "testlib;;Member[Steps].Member[getSource].ReturnValue.Member[continue];Argument[this];ReturnValue;taint", ] } } @@ -56,7 +53,6 @@ class Sinks extends ModelInput::SinkModelCsv { } // TODO: Test taint steps (include that the base path may end with ".Call") -// TODO: // There are no API-graph edges for: ArrayElement, Element, MapKey, MapValue (remove from valid tokens list) class Sources extends ModelInput::SourceModelCsv { // package;type;path;kind override predicate row(string row) { From 9c3d45a16af06a70aa7f29fbf61285ed1afd1e05 Mon Sep 17 00:00:00 2001 From: Erik Krogh Kristensen Date: Thu, 28 Apr 2022 09:57:43 +0200 Subject: [PATCH 21/51] last test of taint steps --- .../ql/test/library-tests/frameworks/data/test.expected | 8 ++++++++ python/ql/test/library-tests/frameworks/data/test.py | 6 +++++- python/ql/test/library-tests/frameworks/data/test.ql | 2 +- 3 files changed, 14 insertions(+), 2 deletions(-) diff --git a/python/ql/test/library-tests/frameworks/data/test.expected b/python/ql/test/library-tests/frameworks/data/test.expected index 2b40ba5c4fb..721854dad37 100644 --- a/python/ql/test/library-tests/frameworks/data/test.expected +++ b/python/ql/test/library-tests/frameworks/data/test.expected @@ -9,6 +9,8 @@ taintFlow | test.py:75:5:75:15 | ControlFlowNode for getSource() | test.py:77:22:77:22 | ControlFlowNode for y | | test.py:81:36:81:46 | ControlFlowNode for getSource() | test.py:81:8:81:47 | ControlFlowNode for Attribute() | | test.py:83:50:83:60 | ControlFlowNode for getSource() | test.py:83:8:83:61 | ControlFlowNode for Attribute() | +| test.py:86:49:86:59 | ControlFlowNode for getSource() | test.py:86:8:86:60 | ControlFlowNode for Attribute() | +| test.py:87:56:87:66 | ControlFlowNode for getSource() | test.py:87:8:87:67 | ControlFlowNode for Attribute() | isSink | test.py:4:8:4:8 | ControlFlowNode for x | test-sink | | test.py:7:17:7:17 | ControlFlowNode for x | test-sink | @@ -41,6 +43,9 @@ isSink | test.py:81:8:81:47 | ControlFlowNode for Attribute() | test-sink | | test.py:82:8:82:54 | ControlFlowNode for Attribute() | test-sink | | test.py:83:8:83:61 | ControlFlowNode for Attribute() | test-sink | +| test.py:85:8:85:53 | ControlFlowNode for Attribute() | test-sink | +| test.py:86:8:86:60 | ControlFlowNode for Attribute() | test-sink | +| test.py:87:8:87:67 | ControlFlowNode for Attribute() | test-sink | isSource | test.py:3:5:3:15 | ControlFlowNode for getSource() | test-source | | test.py:9:8:9:14 | ControlFlowNode for alias() | test-source | @@ -73,6 +78,9 @@ isSource | test.py:81:36:81:46 | ControlFlowNode for getSource() | test-source | | test.py:82:43:82:53 | ControlFlowNode for getSource() | test-source | | test.py:83:50:83:60 | ControlFlowNode for getSource() | test-source | +| test.py:85:42:85:52 | ControlFlowNode for getSource() | test-source | +| test.py:86:49:86:59 | ControlFlowNode for getSource() | test-source | +| test.py:87:56:87:66 | ControlFlowNode for getSource() | test-source | syntaxErrors | Member[foo | | Member[foo] .Member[bar] | diff --git a/python/ql/test/library-tests/frameworks/data/test.py b/python/ql/test/library-tests/frameworks/data/test.py index a7388f53832..db8e8c98e91 100644 --- a/python/ql/test/library-tests/frameworks/data/test.py +++ b/python/ql/test/library-tests/frameworks/data/test.py @@ -80,4 +80,8 @@ Steps.taintIntoCallback( mySink(Steps.preserveArgZeroAndTwo(getSource())) # FLOW mySink(Steps.preserveArgZeroAndTwo("foo", getSource())) # NO FLOW -mySink(Steps.preserveArgZeroAndTwo("foo", "bar", getSource())) # FLOW \ No newline at end of file +mySink(Steps.preserveArgZeroAndTwo("foo", "bar", getSource())) # FLOW + +mySink(Steps.preserveAllButFirstArgument(getSource())) # NO FLOW +mySink(Steps.preserveAllButFirstArgument("foo", getSource())) # FLOW +mySink(Steps.preserveAllButFirstArgument("foo", "bar", getSource())) # FLOW \ No newline at end of file diff --git a/python/ql/test/library-tests/frameworks/data/test.ql b/python/ql/test/library-tests/frameworks/data/test.ql index c9cb3033719..d5a74a93371 100644 --- a/python/ql/test/library-tests/frameworks/data/test.ql +++ b/python/ql/test/library-tests/frameworks/data/test.ql @@ -13,7 +13,7 @@ class Steps extends ModelInput::SummaryModelCsv { "testlib;;Member[Steps].Member[preserveTaint];Argument[0];ReturnValue;taint", "testlib;;Member[Steps].Member[taintIntoCallback];Argument[0];Argument[1..2].Parameter[0];taint", "testlib;;Member[Steps].Member[preserveArgZeroAndTwo];Argument[0,2];ReturnValue;taint", - // "testlib;;Member[Steps].Member[preserveAllButFirstArgument];Argument[1..];ReturnValue;taint", + "testlib;;Member[Steps].Member[preserveAllButFirstArgument];Argument[1..];ReturnValue;taint", ] } } From 6c67e51ec3fa9df0c4bb6577c194bd264a9dff55 Mon Sep 17 00:00:00 2001 From: Erik Krogh Kristensen Date: Thu, 28 Apr 2022 10:00:44 +0200 Subject: [PATCH 22/51] add test for the .Call token --- python/ql/test/library-tests/frameworks/data/test.ql | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/python/ql/test/library-tests/frameworks/data/test.ql b/python/ql/test/library-tests/frameworks/data/test.ql index d5a74a93371..d694837b253 100644 --- a/python/ql/test/library-tests/frameworks/data/test.ql +++ b/python/ql/test/library-tests/frameworks/data/test.ql @@ -10,10 +10,10 @@ class Steps extends ModelInput::SummaryModelCsv { // package;type;path;input;output;kind row = [ - "testlib;;Member[Steps].Member[preserveTaint];Argument[0];ReturnValue;taint", + "testlib;;Member[Steps].Member[preserveTaint].Call;Argument[0];ReturnValue;taint", "testlib;;Member[Steps].Member[taintIntoCallback];Argument[0];Argument[1..2].Parameter[0];taint", "testlib;;Member[Steps].Member[preserveArgZeroAndTwo];Argument[0,2];ReturnValue;taint", - "testlib;;Member[Steps].Member[preserveAllButFirstArgument];Argument[1..];ReturnValue;taint", + "testlib;;Member[Steps].Member[preserveAllButFirstArgument].Call;Argument[1..];ReturnValue;taint", ] } } @@ -52,7 +52,6 @@ class Sinks extends ModelInput::SinkModelCsv { } } -// TODO: Test taint steps (include that the base path may end with ".Call") class Sources extends ModelInput::SourceModelCsv { // package;type;path;kind override predicate row(string row) { From c0eca0d09a76cef154e14dd75032e7b87f11a10a Mon Sep 17 00:00:00 2001 From: Erik Krogh Kristensen Date: Sun, 1 May 2022 17:57:48 +0200 Subject: [PATCH 23/51] deprecate SqlConstruction --- .../2022-05-02-deprecate-sqlconstruction.md | 4 ++ python/ql/lib/semmle/python/Concepts.qll | 41 ++++--------------- .../lib/semmle/python/frameworks/Aiomysql.qll | 4 +- .../ql/lib/semmle/python/frameworks/Aiopg.qll | 4 +- .../lib/semmle/python/frameworks/Asyncpg.qll | 2 +- .../semmle/python/frameworks/SqlAlchemy.qll | 2 +- .../dataflow/SqlInjectionCustomizations.qll | 3 +- .../test/experimental/meta/ConceptsTest.qll | 18 -------- .../library-tests/frameworks/aiomysql/test.py | 10 ++--- .../library-tests/frameworks/aiopg/test.py | 10 ++--- .../library-tests/frameworks/asyncpg/test.py | 6 +-- .../flask_sqlalchemy/SqlExecution.py | 6 +-- .../frameworks/sqlalchemy/SqlExecution.py | 2 +- .../frameworks/sqlalchemy/new_tests.py | 6 +-- .../frameworks/sqlalchemy/taint_test.py | 16 ++++---- 15 files changed, 49 insertions(+), 85 deletions(-) create mode 100644 python/ql/lib/change-notes/2022-05-02-deprecate-sqlconstruction.md diff --git a/python/ql/lib/change-notes/2022-05-02-deprecate-sqlconstruction.md b/python/ql/lib/change-notes/2022-05-02-deprecate-sqlconstruction.md new file mode 100644 index 00000000000..4219fd12d9c --- /dev/null +++ b/python/ql/lib/change-notes/2022-05-02-deprecate-sqlconstruction.md @@ -0,0 +1,4 @@ +--- +category: deprecated +--- +The `SqlConstruction` class and module from `Concepts.qll` has been deprecated. Use `SqlExecution` from the same file instead. \ No newline at end of file diff --git a/python/ql/lib/semmle/python/Concepts.qll b/python/ql/lib/semmle/python/Concepts.qll index e734105cd2e..6154d24628b 100644 --- a/python/ql/lib/semmle/python/Concepts.qll +++ b/python/ql/lib/semmle/python/Concepts.qll @@ -308,36 +308,19 @@ module CodeExecution { } } -/** - * A data-flow node that constructs an SQL statement. - * - * Often, it is worthy of an alert if an SQL statement is constructed such that - * executing it would be a security risk. - * - * If it is important that the SQL statement is indeed executed, then use `SQLExecution`. - * - * Extend this class to refine existing API models. If you want to model new APIs, - * extend `SqlConstruction::Range` instead. - */ -class SqlConstruction extends DataFlow::Node instanceof SqlConstruction::Range { +/** DEPRECATED: Use `SqlExecution` instead. */ +deprecated class SqlConstruction extends DataFlow::Node instanceof SqlConstruction::Range { /** Gets the argument that specifies the SQL statements to be constructed. */ DataFlow::Node getSql() { result = super.getSql() } } -/** Provides a class for modeling new SQL execution APIs. */ -module SqlConstruction { - /** - * A data-flow node that constructs an SQL statement. - * - * Often, it is worthy of an alert if an SQL statement is constructed such that - * executing it would be a security risk. - * - * If it is important that the SQL statement is indeed executed, then use `SQLExecution`. - * - * Extend this class to model new APIs. If you want to refine existing API models, - * extend `SqlConstruction` instead. - */ - abstract class Range extends DataFlow::Node { +/** + * DEPRECATED: Use `SqlExecution` instead. + * Provides a class for modeling new SQL execution APIs. + */ +deprecated module SqlConstruction { + /** DEPRECATED: Use `SqlExecution::Range` instead. */ + abstract deprecated class Range extends DataFlow::Node { /** Gets the argument that specifies the SQL statements to be constructed. */ abstract DataFlow::Node getSql(); } @@ -346,9 +329,6 @@ module SqlConstruction { /** * A data-flow node that executes SQL statements. * - * If the context of interest is such that merely constructing an SQL statement - * would be valuabe to report, then consider using `SqlConstruction`. - * * Extend this class to refine existing API models. If you want to model new APIs, * extend `SqlExecution::Range` instead. */ @@ -362,9 +342,6 @@ module SqlExecution { /** * A data-flow node that executes SQL statements. * - * If the context of interest is such that merely constructing an SQL statement - * would be valuabe to report, then consider using `SqlConstruction`. - * * Extend this class to model new APIs. If you want to refine existing API models, * extend `SqlExecution` instead. */ diff --git a/python/ql/lib/semmle/python/frameworks/Aiomysql.qll b/python/ql/lib/semmle/python/frameworks/Aiomysql.qll index 112dc58d061..afff79783c2 100644 --- a/python/ql/lib/semmle/python/frameworks/Aiomysql.qll +++ b/python/ql/lib/semmle/python/frameworks/Aiomysql.qll @@ -50,7 +50,7 @@ private module Aiomysql { * A query. Calling `execute` on a `Cursor` constructs a query. * See https://aiomysql.readthedocs.io/en/stable/cursors.html#Cursor.execute */ - class CursorExecuteCall extends SqlConstruction::Range, API::CallNode { + class CursorExecuteCall extends SqlExecution::Range, API::CallNode { CursorExecuteCall() { this = cursor().getMember("execute").getACall() } override DataFlow::Node getSql() { result = this.getParameter(0, "operation").getARhs() } @@ -91,7 +91,7 @@ private module Aiomysql { * A query. Calling `execute` on a `SAConnection` constructs a query. * See https://aiomysql.readthedocs.io/en/stable/sa.html#aiomysql.sa.SAConnection.execute */ - class SAConnectionExecuteCall extends SqlConstruction::Range, API::CallNode { + class SAConnectionExecuteCall extends SqlExecution::Range, API::CallNode { SAConnectionExecuteCall() { this = saConnection().getMember("execute").getACall() } override DataFlow::Node getSql() { result = this.getParameter(0, "query").getARhs() } diff --git a/python/ql/lib/semmle/python/frameworks/Aiopg.qll b/python/ql/lib/semmle/python/frameworks/Aiopg.qll index 1a60c433150..67ba3c80db0 100644 --- a/python/ql/lib/semmle/python/frameworks/Aiopg.qll +++ b/python/ql/lib/semmle/python/frameworks/Aiopg.qll @@ -50,7 +50,7 @@ private module Aiopg { * A query. Calling `execute` on a `Cursor` constructs a query. * See https://aiopg.readthedocs.io/en/stable/core.html#aiopg.Cursor.execute */ - class CursorExecuteCall extends SqlConstruction::Range, API::CallNode { + class CursorExecuteCall extends SqlExecution::Range, API::CallNode { CursorExecuteCall() { this = cursor().getMember("execute").getACall() } override DataFlow::Node getSql() { result = this.getParameter(0, "operation").getARhs() } @@ -87,7 +87,7 @@ private module Aiopg { * A query. Calling `execute` on a `SAConnection` constructs a query. * See https://aiopg.readthedocs.io/en/stable/sa.html#aiopg.sa.SAConnection.execute */ - class SAConnectionExecuteCall extends SqlConstruction::Range, API::CallNode { + class SAConnectionExecuteCall extends SqlExecution::Range, API::CallNode { SAConnectionExecuteCall() { this = saConnection().getMember("execute").getACall() } override DataFlow::Node getSql() { result = this.getParameter(0, "query").getARhs() } diff --git a/python/ql/lib/semmle/python/frameworks/Asyncpg.qll b/python/ql/lib/semmle/python/frameworks/Asyncpg.qll index 2b2b9ed2e1b..77678c92b51 100644 --- a/python/ql/lib/semmle/python/frameworks/Asyncpg.qll +++ b/python/ql/lib/semmle/python/frameworks/Asyncpg.qll @@ -56,7 +56,7 @@ private module Asyncpg { * The creation of the `Cursor` executes the query. */ module Cursor { - class CursorConstruction extends SqlConstruction::Range, API::CallNode { + class CursorConstruction extends SqlExecution::Range, API::CallNode { CursorConstruction() { this = ModelOutput::getATypeNode("asyncpg", "Connection").getMember("cursor").getACall() } diff --git a/python/ql/lib/semmle/python/frameworks/SqlAlchemy.qll b/python/ql/lib/semmle/python/frameworks/SqlAlchemy.qll index c1712e7e7eb..8a7e2f176e8 100644 --- a/python/ql/lib/semmle/python/frameworks/SqlAlchemy.qll +++ b/python/ql/lib/semmle/python/frameworks/SqlAlchemy.qll @@ -323,7 +323,7 @@ module SqlAlchemy { * A construction of a `sqlalchemy.sql.expression.TextClause`, which represents a * textual SQL string directly. */ - abstract class TextClauseConstruction extends SqlConstruction::Range, DataFlow::CallCfgNode { + abstract class TextClauseConstruction extends SqlExecution::Range, DataFlow::CallCfgNode { /** Gets the argument that specifies the SQL text. */ override DataFlow::Node getSql() { result in [this.getArg(0), this.getArgByName("text")] } } diff --git a/python/ql/lib/semmle/python/security/dataflow/SqlInjectionCustomizations.qll b/python/ql/lib/semmle/python/security/dataflow/SqlInjectionCustomizations.qll index 756a1f6b773..9e3e2749a2b 100644 --- a/python/ql/lib/semmle/python/security/dataflow/SqlInjectionCustomizations.qll +++ b/python/ql/lib/semmle/python/security/dataflow/SqlInjectionCustomizations.qll @@ -43,9 +43,10 @@ module SqlInjection { class RemoteFlowSourceAsSource extends Source, RemoteFlowSource { } /** + * DEPRECATED: Use `SqlExecutionAsSink` instead. * A SQL statement of a SQL construction, considered as a flow sink. */ - class SqlConstructionAsSink extends Sink { + deprecated class SqlConstructionAsSink extends Sink { SqlConstructionAsSink() { this = any(SqlConstruction c).getSql() } } diff --git a/python/ql/test/experimental/meta/ConceptsTest.qll b/python/ql/test/experimental/meta/ConceptsTest.qll index 8f9435f633f..bdf71059680 100644 --- a/python/ql/test/experimental/meta/ConceptsTest.qll +++ b/python/ql/test/experimental/meta/ConceptsTest.qll @@ -128,24 +128,6 @@ class CodeExecutionTest extends InlineExpectationsTest { } } -class SqlConstructionTest extends InlineExpectationsTest { - SqlConstructionTest() { this = "SqlConstructionTest" } - - override string getARelevantTag() { result = "constructedSql" } - - override predicate hasActualResult(Location location, string element, string tag, string value) { - exists(location.getFile().getRelativePath()) and - exists(SqlConstruction e, DataFlow::Node sql | - exists(location.getFile().getRelativePath()) and - sql = e.getSql() and - location = e.getLocation() and - element = sql.toString() and - value = prettyNodeForInlineTest(sql) and - tag = "constructedSql" - ) - } -} - class SqlExecutionTest extends InlineExpectationsTest { SqlExecutionTest() { this = "SqlExecutionTest" } diff --git a/python/ql/test/library-tests/frameworks/aiomysql/test.py b/python/ql/test/library-tests/frameworks/aiomysql/test.py index 782fa6cb7bf..5a06b46b9f1 100644 --- a/python/ql/test/library-tests/frameworks/aiomysql/test.py +++ b/python/ql/test/library-tests/frameworks/aiomysql/test.py @@ -5,24 +5,24 @@ async def test_cursor(): # Create connection directly conn = await aiomysql.connect() cur = await conn.cursor() - await cur.execute("sql") # $ getSql="sql" constructedSql="sql" + await cur.execute("sql") # $ getSql="sql" # Create connection via pool async with aiomysql.create_pool() as pool: # Create Cursor via Connection async with pool.acquire() as conn: async with conn.cursor() as cur: - await cur.execute("sql") # $ getSql="sql" constructedSql="sql" + await cur.execute("sql") # $ getSql="sql" # Create Cursor directly async with pool.cursor() as cur: - await cur.execute("sql") # $ getSql="sql" constructedSql="sql" + await cur.execute("sql") # $ getSql="sql" # variants using as few `async with` as possible pool = await aiomysql.create_pool() conn = await pool.acquire() cur = await conn.cursor() - await cur.execute("sql") # $ getSql="sql" constructedSql="sql" + await cur.execute("sql") # $ getSql="sql" # Test SQLAlchemy integration from aiomysql.sa import create_engine @@ -30,4 +30,4 @@ from aiomysql.sa import create_engine async def test_engine(): engine = await create_engine() conn = await engine.acquire() - await conn.execute("sql") # $ getSql="sql" constructedSql="sql" + await conn.execute("sql") # $ getSql="sql" diff --git a/python/ql/test/library-tests/frameworks/aiopg/test.py b/python/ql/test/library-tests/frameworks/aiopg/test.py index 63bf141f52d..1d085066688 100644 --- a/python/ql/test/library-tests/frameworks/aiopg/test.py +++ b/python/ql/test/library-tests/frameworks/aiopg/test.py @@ -5,24 +5,24 @@ async def test_cursor(): # Create connection directly conn = await aiopg.connect() cur = await conn.cursor() - await cur.execute("sql") # $ getSql="sql" constructedSql="sql" + await cur.execute("sql") # $ getSql="sql" # Create connection via pool async with aiopg.create_pool() as pool: # Create Cursor via Connection async with pool.acquire() as conn: async with conn.cursor() as cur: - await cur.execute("sql") # $ getSql="sql" constructedSql="sql" + await cur.execute("sql") # $ getSql="sql" # Create Cursor directly async with pool.cursor() as cur: - await cur.execute("sql") # $ getSql="sql" constructedSql="sql" + await cur.execute("sql") # $ getSql="sql" # variants using as few `async with` as possible pool = await aiopg.create_pool() conn = await pool.acquire() cur = await conn.cursor() - await cur.execute("sql") # $ getSql="sql" constructedSql="sql" + await cur.execute("sql") # $ getSql="sql" # Test SQLAlchemy integration from aiopg.sa import create_engine @@ -30,4 +30,4 @@ from aiopg.sa import create_engine async def test_engine(): engine = await create_engine() conn = await engine.acquire() - await conn.execute("sql") # $ getSql="sql" constructedSql="sql" + await conn.execute("sql") # $ getSql="sql" diff --git a/python/ql/test/library-tests/frameworks/asyncpg/test.py b/python/ql/test/library-tests/frameworks/asyncpg/test.py index 572fc454bed..7e2687e5766 100644 --- a/python/ql/test/library-tests/frameworks/asyncpg/test.py +++ b/python/ql/test/library-tests/frameworks/asyncpg/test.py @@ -43,20 +43,20 @@ async def test_cursor(): try: async with conn.transaction(): - cursor = await conn.cursor("sql") # $ getSql="sql" constructedSql="sql" + cursor = await conn.cursor("sql") # $ getSql="sql" await cursor.fetch() pstmt = await conn.prepare("psql") # $ getSql="psql" pcursor = await pstmt.cursor() # $ getSql="psql" await pcursor.fetch() - async for record in conn.cursor("sql"): # $ getSql="sql" constructedSql="sql" + async for record in conn.cursor("sql"): # $ getSql="sql" pass async for record in pstmt.cursor(): # $ getSql="psql" pass - cursor_factory = conn.cursor("sql") # $ constructedSql="sql" + cursor_factory = conn.cursor("sql") # $ getSql="sql" cursor = await cursor_factory # $ getSql="sql" pcursor_factory = pstmt.cursor() diff --git a/python/ql/test/library-tests/frameworks/flask_sqlalchemy/SqlExecution.py b/python/ql/test/library-tests/frameworks/flask_sqlalchemy/SqlExecution.py index 39cd49195d2..19a1349ffb6 100644 --- a/python/ql/test/library-tests/frameworks/flask_sqlalchemy/SqlExecution.py +++ b/python/ql/test/library-tests/frameworks/flask_sqlalchemy/SqlExecution.py @@ -12,7 +12,7 @@ db = SQLAlchemy(app) # - https://github.com/pallets/flask-sqlalchemy/blob/931ec00d1e27f51508e05706eef41cc4419a0b32/src/flask_sqlalchemy/__init__.py#L765 # - https://github.com/pallets/flask-sqlalchemy/blob/931ec00d1e27f51508e05706eef41cc4419a0b32/src/flask_sqlalchemy/__init__.py#L99-L109 -assert str(type(db.text("Foo"))) == "" # $ constructedSql="Foo" +assert str(type(db.text("Foo"))) == "" # $ getSql="Foo" # also has engine/session instantiated @@ -44,8 +44,8 @@ assert result.fetchall() == [("Foo",)] # text -t = db.text("foo") # $ constructedSql="foo" +t = db.text("foo") # $ getSql="foo" assert isinstance(t, sqlalchemy.sql.expression.TextClause) -t = db.text(text="foo") # $ constructedSql="foo" +t = db.text(text="foo") # $ getSql="foo" assert isinstance(t, sqlalchemy.sql.expression.TextClause) diff --git a/python/ql/test/library-tests/frameworks/sqlalchemy/SqlExecution.py b/python/ql/test/library-tests/frameworks/sqlalchemy/SqlExecution.py index fe75fc17d5c..7d9c478e27b 100644 --- a/python/ql/test/library-tests/frameworks/sqlalchemy/SqlExecution.py +++ b/python/ql/test/library-tests/frameworks/sqlalchemy/SqlExecution.py @@ -46,7 +46,7 @@ with engine.begin() as connection: connection.execute("some sql") # $ getSql="some sql" # Injection requiring the text() taint-step -t = text("some sql") # $ constructedSql="some sql" +t = text("some sql") # $ getSql="some sql" session.query(User).filter(t) session.query(User).group_by(User.id).having(t) session.query(User).group_by(t).first() diff --git a/python/ql/test/library-tests/frameworks/sqlalchemy/new_tests.py b/python/ql/test/library-tests/frameworks/sqlalchemy/new_tests.py index 6b11ee5d070..9cb13d15e05 100644 --- a/python/ql/test/library-tests/frameworks/sqlalchemy/new_tests.py +++ b/python/ql/test/library-tests/frameworks/sqlalchemy/new_tests.py @@ -6,7 +6,7 @@ import sqlalchemy.orm # either v1.4 or v2.0, such that we cover both. raw_sql = "select 'FOO'" -text_sql = sqlalchemy.text(raw_sql) # $ constructedSql=raw_sql +text_sql = sqlalchemy.text(raw_sql) # $ getSql=raw_sql Base = sqlalchemy.orm.declarative_base() @@ -176,7 +176,7 @@ assert session.query(For14).all()[0].id == 14 # and now we can do the actual querying -text_foo = sqlalchemy.text("'FOO'") # $ constructedSql="'FOO'" +text_foo = sqlalchemy.text("'FOO'") # $ getSql="'FOO'" # filter_by is only vulnerable to injection if sqlalchemy.text is used, which is evident # from the logs produced if this file is run @@ -305,7 +305,7 @@ with engine.connect() as conn: assert scalar_result == "FOO" # This is a contrived example - select = sqlalchemy.select(sqlalchemy.text("'BAR'")) # $ constructedSql="'BAR'" + select = sqlalchemy.select(sqlalchemy.text("'BAR'")) # $ getSql="'BAR'" result = conn.execute(select) # $ getSql=select assert result.fetchall() == [("BAR",)] diff --git a/python/ql/test/library-tests/frameworks/sqlalchemy/taint_test.py b/python/ql/test/library-tests/frameworks/sqlalchemy/taint_test.py index 884d30d3672..9d346d41979 100644 --- a/python/ql/test/library-tests/frameworks/sqlalchemy/taint_test.py +++ b/python/ql/test/library-tests/frameworks/sqlalchemy/taint_test.py @@ -8,14 +8,14 @@ def test_taint(): ensure_tainted(ts) # $ tainted - t1 = sqlalchemy.text(ts) # $ constructedSql=ts - t2 = sqlalchemy.text(text=ts) # $ constructedSql=ts - t3 = sqlalchemy.sql.text(ts) # $ constructedSql=ts - t4 = sqlalchemy.sql.text(text=ts) # $ constructedSql=ts - t5 = sqlalchemy.sql.expression.text(ts) # $ constructedSql=ts - t6 = sqlalchemy.sql.expression.text(text=ts) # $ constructedSql=ts - t7 = sqlalchemy.sql.expression.TextClause(ts) # $ constructedSql=ts - t8 = sqlalchemy.sql.expression.TextClause(text=ts) # $ constructedSql=ts + t1 = sqlalchemy.text(ts) # $ getSql=ts + t2 = sqlalchemy.text(text=ts) # $ getSql=ts + t3 = sqlalchemy.sql.text(ts) # $ getSql=ts + t4 = sqlalchemy.sql.text(text=ts) # $ getSql=ts + t5 = sqlalchemy.sql.expression.text(ts) # $ getSql=ts + t6 = sqlalchemy.sql.expression.text(text=ts) # $ getSql=ts + t7 = sqlalchemy.sql.expression.TextClause(ts) # $ getSql=ts + t8 = sqlalchemy.sql.expression.TextClause(text=ts) # $ getSql=ts # Since we flag user-input to a TextClause with its' own query, we don't want to # have a taint-step for it as that would lead to us also giving an alert for normal From 8ffc05c84b33594c61d2dac530ea58e719da8000 Mon Sep 17 00:00:00 2001 From: Erik Krogh Kristensen Date: Tue, 3 May 2022 21:21:57 +0200 Subject: [PATCH 24/51] count both named and positional arguments in the `WithArity` filter --- python/ql/lib/semmle/python/ApiGraphs.qll | 4 ++-- python/ql/test/library-tests/frameworks/data/test.expected | 2 ++ python/ql/test/library-tests/frameworks/data/test.py | 7 ++++++- python/ql/test/library-tests/frameworks/data/test.ql | 2 +- 4 files changed, 11 insertions(+), 4 deletions(-) diff --git a/python/ql/lib/semmle/python/ApiGraphs.qll b/python/ql/lib/semmle/python/ApiGraphs.qll index 77574ab44ad..e8fc14d90fd 100644 --- a/python/ql/lib/semmle/python/ApiGraphs.qll +++ b/python/ql/lib/semmle/python/ApiGraphs.qll @@ -349,8 +349,8 @@ module API { result.getAnImmediateUse() = this } - /** Gets the number of arguments of this call. */ - int getNumArgument() { result = count(this.getArg(_)) } + /** Gets the number of arguments of this call. Both positional and named arguments are counted. */ + int getNumArgument() { result = count([this.getArg(_), this.getArgByName(_)]) } } /** diff --git a/python/ql/test/library-tests/frameworks/data/test.expected b/python/ql/test/library-tests/frameworks/data/test.expected index 721854dad37..8fb6efc0f95 100644 --- a/python/ql/test/library-tests/frameworks/data/test.expected +++ b/python/ql/test/library-tests/frameworks/data/test.expected @@ -46,6 +46,8 @@ isSink | test.py:85:8:85:53 | ControlFlowNode for Attribute() | test-sink | | test.py:86:8:86:60 | ControlFlowNode for Attribute() | test-sink | | test.py:87:8:87:67 | ControlFlowNode for Attribute() | test-sink | +| test.py:89:21:89:23 | ControlFlowNode for one | test-source | +| test.py:90:25:90:27 | ControlFlowNode for one | test-source | isSource | test.py:3:5:3:15 | ControlFlowNode for getSource() | test-source | | test.py:9:8:9:14 | ControlFlowNode for alias() | test-source | diff --git a/python/ql/test/library-tests/frameworks/data/test.py b/python/ql/test/library-tests/frameworks/data/test.py index db8e8c98e91..96f09c51598 100644 --- a/python/ql/test/library-tests/frameworks/data/test.py +++ b/python/ql/test/library-tests/frameworks/data/test.py @@ -84,4 +84,9 @@ mySink(Steps.preserveArgZeroAndTwo("foo", "bar", getSource())) # FLOW mySink(Steps.preserveAllButFirstArgument(getSource())) # NO FLOW mySink(Steps.preserveAllButFirstArgument("foo", getSource())) # FLOW -mySink(Steps.preserveAllButFirstArgument("foo", "bar", getSource())) # FLOW \ No newline at end of file +mySink(Steps.preserveAllButFirstArgument("foo", "bar", getSource())) # FLOW + +CallFilter.arityOne(one) # match +CallFilter.arityOne(one=one) # match +CallFilter.arityOne(one, two=two) # NO match +CallFilter.arityOne(one=one, two=two) # NO match diff --git a/python/ql/test/library-tests/frameworks/data/test.ql b/python/ql/test/library-tests/frameworks/data/test.ql index d694837b253..2558631cec6 100644 --- a/python/ql/test/library-tests/frameworks/data/test.ql +++ b/python/ql/test/library-tests/frameworks/data/test.ql @@ -41,7 +41,7 @@ class Sinks extends ModelInput::SinkModelCsv { "testlib;;Member[Args].Member[lastarg].Argument[N-1];test-source", // "testlib;;Member[Args].Member[nonFist].Argument[1..];test-source", // // callsite filter. - "testlib;;Member[CallFilter].Member[arityOne].WithArity[1].Argument[0..];test-source", // + "testlib;;Member[CallFilter].Member[arityOne].WithArity[1].Argument[any];test-source", // "testlib;;Member[CallFilter].Member[twoOrMore].WithArity[2..].Argument[0..];test-source", // // testing non-positional arguments "testlib;;Member[ArgPos].Member[selfThing].Argument[self];test-source", // From ead978187d01d34c1f6b73f6f988705798fc0d6f Mon Sep 17 00:00:00 2001 From: Erik Krogh Kristensen Date: Tue, 3 May 2022 21:25:48 +0200 Subject: [PATCH 25/51] adjust the source-type for remote-flow from MaD --- python/ql/lib/semmle/python/frameworks/data/ModelsAsData.qll | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/ql/lib/semmle/python/frameworks/data/ModelsAsData.qll b/python/ql/lib/semmle/python/frameworks/data/ModelsAsData.qll index 6c45c9a4036..1ced4d15ee9 100644 --- a/python/ql/lib/semmle/python/frameworks/data/ModelsAsData.qll +++ b/python/ql/lib/semmle/python/frameworks/data/ModelsAsData.qll @@ -24,7 +24,7 @@ private import semmle.python.dataflow.new.TaintTracking private class RemoteFlowSourceFromCsv extends RemoteFlowSource { RemoteFlowSourceFromCsv() { this = ModelOutput::getASourceNode("remote").getAnImmediateUse() } - override string getSourceType() { result = "Remote flow" } + override string getSourceType() { result = "Remote flow (from model)" } } /** From 1062aae21c5ffbcd4147f60a05142cce315ca8ca Mon Sep 17 00:00:00 2001 From: Erik Krogh Kristensen Date: Tue, 3 May 2022 21:45:33 +0200 Subject: [PATCH 26/51] add test that the foo.bar package syntax works --- python/ql/test/library-tests/frameworks/data/test.expected | 2 ++ python/ql/test/library-tests/frameworks/data/test.py | 6 ++++++ python/ql/test/library-tests/frameworks/data/test.ql | 3 +++ 3 files changed, 11 insertions(+) diff --git a/python/ql/test/library-tests/frameworks/data/test.expected b/python/ql/test/library-tests/frameworks/data/test.expected index 8fb6efc0f95..7fed228e022 100644 --- a/python/ql/test/library-tests/frameworks/data/test.expected +++ b/python/ql/test/library-tests/frameworks/data/test.expected @@ -48,6 +48,8 @@ isSink | test.py:87:8:87:67 | ControlFlowNode for Attribute() | test-sink | | test.py:89:21:89:23 | ControlFlowNode for one | test-source | | test.py:90:25:90:27 | ControlFlowNode for one | test-source | +| test.py:95:6:95:9 | ControlFlowNode for baz1 | test-source | +| test.py:98:6:98:9 | ControlFlowNode for baz2 | test-source | isSource | test.py:3:5:3:15 | ControlFlowNode for getSource() | test-source | | test.py:9:8:9:14 | ControlFlowNode for alias() | test-source | diff --git a/python/ql/test/library-tests/frameworks/data/test.py b/python/ql/test/library-tests/frameworks/data/test.py index 96f09c51598..67f3ac2fbad 100644 --- a/python/ql/test/library-tests/frameworks/data/test.py +++ b/python/ql/test/library-tests/frameworks/data/test.py @@ -90,3 +90,9 @@ CallFilter.arityOne(one) # match CallFilter.arityOne(one=one) # match CallFilter.arityOne(one, two=two) # NO match CallFilter.arityOne(one=one, two=two) # NO match + +from foo1.bar import baz1 +baz1(baz1) # match + +from foo2.bar import baz2 +baz2(baz2) # match \ No newline at end of file diff --git a/python/ql/test/library-tests/frameworks/data/test.ql b/python/ql/test/library-tests/frameworks/data/test.ql index 2558631cec6..4b585309a6a 100644 --- a/python/ql/test/library-tests/frameworks/data/test.ql +++ b/python/ql/test/library-tests/frameworks/data/test.ql @@ -48,6 +48,9 @@ class Sinks extends ModelInput::SinkModelCsv { // any argument "testlib;;Member[ArgPos].Member[anyParam].Argument[any];test-source", // "testlib;;Member[ArgPos].Member[anyNamed].Argument[any-named];test-source", // + // testing package syntax + "foo1.bar;;Member[baz1].Argument[any];test-source", // + "foo2;;Member[bar].Member[baz2].Argument[any];test-source", // ] } } From 571fc3e73b8430bb7596f2da00dde6081094e1ab Mon Sep 17 00:00:00 2001 From: Erik Krogh Kristensen Date: Tue, 3 May 2022 22:49:33 +0200 Subject: [PATCH 27/51] Revert "deprecate SqlConstruction" This reverts commit c0eca0d09a76cef154e14dd75032e7b87f11a10a. --- .../2022-05-02-deprecate-sqlconstruction.md | 4 -- python/ql/lib/semmle/python/Concepts.qll | 41 +++++++++++++++---- .../lib/semmle/python/frameworks/Aiomysql.qll | 4 +- .../ql/lib/semmle/python/frameworks/Aiopg.qll | 4 +- .../lib/semmle/python/frameworks/Asyncpg.qll | 2 +- .../semmle/python/frameworks/SqlAlchemy.qll | 2 +- .../dataflow/SqlInjectionCustomizations.qll | 3 +- .../test/experimental/meta/ConceptsTest.qll | 18 ++++++++ .../library-tests/frameworks/aiomysql/test.py | 10 ++--- .../library-tests/frameworks/aiopg/test.py | 10 ++--- .../library-tests/frameworks/asyncpg/test.py | 6 +-- .../flask_sqlalchemy/SqlExecution.py | 6 +-- .../frameworks/sqlalchemy/SqlExecution.py | 2 +- .../frameworks/sqlalchemy/new_tests.py | 6 +-- .../frameworks/sqlalchemy/taint_test.py | 16 ++++---- 15 files changed, 85 insertions(+), 49 deletions(-) delete mode 100644 python/ql/lib/change-notes/2022-05-02-deprecate-sqlconstruction.md diff --git a/python/ql/lib/change-notes/2022-05-02-deprecate-sqlconstruction.md b/python/ql/lib/change-notes/2022-05-02-deprecate-sqlconstruction.md deleted file mode 100644 index 4219fd12d9c..00000000000 --- a/python/ql/lib/change-notes/2022-05-02-deprecate-sqlconstruction.md +++ /dev/null @@ -1,4 +0,0 @@ ---- -category: deprecated ---- -The `SqlConstruction` class and module from `Concepts.qll` has been deprecated. Use `SqlExecution` from the same file instead. \ No newline at end of file diff --git a/python/ql/lib/semmle/python/Concepts.qll b/python/ql/lib/semmle/python/Concepts.qll index 6154d24628b..e734105cd2e 100644 --- a/python/ql/lib/semmle/python/Concepts.qll +++ b/python/ql/lib/semmle/python/Concepts.qll @@ -308,19 +308,36 @@ module CodeExecution { } } -/** DEPRECATED: Use `SqlExecution` instead. */ -deprecated class SqlConstruction extends DataFlow::Node instanceof SqlConstruction::Range { +/** + * A data-flow node that constructs an SQL statement. + * + * Often, it is worthy of an alert if an SQL statement is constructed such that + * executing it would be a security risk. + * + * If it is important that the SQL statement is indeed executed, then use `SQLExecution`. + * + * Extend this class to refine existing API models. If you want to model new APIs, + * extend `SqlConstruction::Range` instead. + */ +class SqlConstruction extends DataFlow::Node instanceof SqlConstruction::Range { /** Gets the argument that specifies the SQL statements to be constructed. */ DataFlow::Node getSql() { result = super.getSql() } } -/** - * DEPRECATED: Use `SqlExecution` instead. - * Provides a class for modeling new SQL execution APIs. - */ -deprecated module SqlConstruction { - /** DEPRECATED: Use `SqlExecution::Range` instead. */ - abstract deprecated class Range extends DataFlow::Node { +/** Provides a class for modeling new SQL execution APIs. */ +module SqlConstruction { + /** + * A data-flow node that constructs an SQL statement. + * + * Often, it is worthy of an alert if an SQL statement is constructed such that + * executing it would be a security risk. + * + * If it is important that the SQL statement is indeed executed, then use `SQLExecution`. + * + * Extend this class to model new APIs. If you want to refine existing API models, + * extend `SqlConstruction` instead. + */ + abstract class Range extends DataFlow::Node { /** Gets the argument that specifies the SQL statements to be constructed. */ abstract DataFlow::Node getSql(); } @@ -329,6 +346,9 @@ deprecated module SqlConstruction { /** * A data-flow node that executes SQL statements. * + * If the context of interest is such that merely constructing an SQL statement + * would be valuabe to report, then consider using `SqlConstruction`. + * * Extend this class to refine existing API models. If you want to model new APIs, * extend `SqlExecution::Range` instead. */ @@ -342,6 +362,9 @@ module SqlExecution { /** * A data-flow node that executes SQL statements. * + * If the context of interest is such that merely constructing an SQL statement + * would be valuabe to report, then consider using `SqlConstruction`. + * * Extend this class to model new APIs. If you want to refine existing API models, * extend `SqlExecution` instead. */ diff --git a/python/ql/lib/semmle/python/frameworks/Aiomysql.qll b/python/ql/lib/semmle/python/frameworks/Aiomysql.qll index afff79783c2..112dc58d061 100644 --- a/python/ql/lib/semmle/python/frameworks/Aiomysql.qll +++ b/python/ql/lib/semmle/python/frameworks/Aiomysql.qll @@ -50,7 +50,7 @@ private module Aiomysql { * A query. Calling `execute` on a `Cursor` constructs a query. * See https://aiomysql.readthedocs.io/en/stable/cursors.html#Cursor.execute */ - class CursorExecuteCall extends SqlExecution::Range, API::CallNode { + class CursorExecuteCall extends SqlConstruction::Range, API::CallNode { CursorExecuteCall() { this = cursor().getMember("execute").getACall() } override DataFlow::Node getSql() { result = this.getParameter(0, "operation").getARhs() } @@ -91,7 +91,7 @@ private module Aiomysql { * A query. Calling `execute` on a `SAConnection` constructs a query. * See https://aiomysql.readthedocs.io/en/stable/sa.html#aiomysql.sa.SAConnection.execute */ - class SAConnectionExecuteCall extends SqlExecution::Range, API::CallNode { + class SAConnectionExecuteCall extends SqlConstruction::Range, API::CallNode { SAConnectionExecuteCall() { this = saConnection().getMember("execute").getACall() } override DataFlow::Node getSql() { result = this.getParameter(0, "query").getARhs() } diff --git a/python/ql/lib/semmle/python/frameworks/Aiopg.qll b/python/ql/lib/semmle/python/frameworks/Aiopg.qll index 67ba3c80db0..1a60c433150 100644 --- a/python/ql/lib/semmle/python/frameworks/Aiopg.qll +++ b/python/ql/lib/semmle/python/frameworks/Aiopg.qll @@ -50,7 +50,7 @@ private module Aiopg { * A query. Calling `execute` on a `Cursor` constructs a query. * See https://aiopg.readthedocs.io/en/stable/core.html#aiopg.Cursor.execute */ - class CursorExecuteCall extends SqlExecution::Range, API::CallNode { + class CursorExecuteCall extends SqlConstruction::Range, API::CallNode { CursorExecuteCall() { this = cursor().getMember("execute").getACall() } override DataFlow::Node getSql() { result = this.getParameter(0, "operation").getARhs() } @@ -87,7 +87,7 @@ private module Aiopg { * A query. Calling `execute` on a `SAConnection` constructs a query. * See https://aiopg.readthedocs.io/en/stable/sa.html#aiopg.sa.SAConnection.execute */ - class SAConnectionExecuteCall extends SqlExecution::Range, API::CallNode { + class SAConnectionExecuteCall extends SqlConstruction::Range, API::CallNode { SAConnectionExecuteCall() { this = saConnection().getMember("execute").getACall() } override DataFlow::Node getSql() { result = this.getParameter(0, "query").getARhs() } diff --git a/python/ql/lib/semmle/python/frameworks/Asyncpg.qll b/python/ql/lib/semmle/python/frameworks/Asyncpg.qll index 77678c92b51..2b2b9ed2e1b 100644 --- a/python/ql/lib/semmle/python/frameworks/Asyncpg.qll +++ b/python/ql/lib/semmle/python/frameworks/Asyncpg.qll @@ -56,7 +56,7 @@ private module Asyncpg { * The creation of the `Cursor` executes the query. */ module Cursor { - class CursorConstruction extends SqlExecution::Range, API::CallNode { + class CursorConstruction extends SqlConstruction::Range, API::CallNode { CursorConstruction() { this = ModelOutput::getATypeNode("asyncpg", "Connection").getMember("cursor").getACall() } diff --git a/python/ql/lib/semmle/python/frameworks/SqlAlchemy.qll b/python/ql/lib/semmle/python/frameworks/SqlAlchemy.qll index 8a7e2f176e8..c1712e7e7eb 100644 --- a/python/ql/lib/semmle/python/frameworks/SqlAlchemy.qll +++ b/python/ql/lib/semmle/python/frameworks/SqlAlchemy.qll @@ -323,7 +323,7 @@ module SqlAlchemy { * A construction of a `sqlalchemy.sql.expression.TextClause`, which represents a * textual SQL string directly. */ - abstract class TextClauseConstruction extends SqlExecution::Range, DataFlow::CallCfgNode { + abstract class TextClauseConstruction extends SqlConstruction::Range, DataFlow::CallCfgNode { /** Gets the argument that specifies the SQL text. */ override DataFlow::Node getSql() { result in [this.getArg(0), this.getArgByName("text")] } } diff --git a/python/ql/lib/semmle/python/security/dataflow/SqlInjectionCustomizations.qll b/python/ql/lib/semmle/python/security/dataflow/SqlInjectionCustomizations.qll index 9e3e2749a2b..756a1f6b773 100644 --- a/python/ql/lib/semmle/python/security/dataflow/SqlInjectionCustomizations.qll +++ b/python/ql/lib/semmle/python/security/dataflow/SqlInjectionCustomizations.qll @@ -43,10 +43,9 @@ module SqlInjection { class RemoteFlowSourceAsSource extends Source, RemoteFlowSource { } /** - * DEPRECATED: Use `SqlExecutionAsSink` instead. * A SQL statement of a SQL construction, considered as a flow sink. */ - deprecated class SqlConstructionAsSink extends Sink { + class SqlConstructionAsSink extends Sink { SqlConstructionAsSink() { this = any(SqlConstruction c).getSql() } } diff --git a/python/ql/test/experimental/meta/ConceptsTest.qll b/python/ql/test/experimental/meta/ConceptsTest.qll index bdf71059680..8f9435f633f 100644 --- a/python/ql/test/experimental/meta/ConceptsTest.qll +++ b/python/ql/test/experimental/meta/ConceptsTest.qll @@ -128,6 +128,24 @@ class CodeExecutionTest extends InlineExpectationsTest { } } +class SqlConstructionTest extends InlineExpectationsTest { + SqlConstructionTest() { this = "SqlConstructionTest" } + + override string getARelevantTag() { result = "constructedSql" } + + override predicate hasActualResult(Location location, string element, string tag, string value) { + exists(location.getFile().getRelativePath()) and + exists(SqlConstruction e, DataFlow::Node sql | + exists(location.getFile().getRelativePath()) and + sql = e.getSql() and + location = e.getLocation() and + element = sql.toString() and + value = prettyNodeForInlineTest(sql) and + tag = "constructedSql" + ) + } +} + class SqlExecutionTest extends InlineExpectationsTest { SqlExecutionTest() { this = "SqlExecutionTest" } diff --git a/python/ql/test/library-tests/frameworks/aiomysql/test.py b/python/ql/test/library-tests/frameworks/aiomysql/test.py index 5a06b46b9f1..782fa6cb7bf 100644 --- a/python/ql/test/library-tests/frameworks/aiomysql/test.py +++ b/python/ql/test/library-tests/frameworks/aiomysql/test.py @@ -5,24 +5,24 @@ async def test_cursor(): # Create connection directly conn = await aiomysql.connect() cur = await conn.cursor() - await cur.execute("sql") # $ getSql="sql" + await cur.execute("sql") # $ getSql="sql" constructedSql="sql" # Create connection via pool async with aiomysql.create_pool() as pool: # Create Cursor via Connection async with pool.acquire() as conn: async with conn.cursor() as cur: - await cur.execute("sql") # $ getSql="sql" + await cur.execute("sql") # $ getSql="sql" constructedSql="sql" # Create Cursor directly async with pool.cursor() as cur: - await cur.execute("sql") # $ getSql="sql" + await cur.execute("sql") # $ getSql="sql" constructedSql="sql" # variants using as few `async with` as possible pool = await aiomysql.create_pool() conn = await pool.acquire() cur = await conn.cursor() - await cur.execute("sql") # $ getSql="sql" + await cur.execute("sql") # $ getSql="sql" constructedSql="sql" # Test SQLAlchemy integration from aiomysql.sa import create_engine @@ -30,4 +30,4 @@ from aiomysql.sa import create_engine async def test_engine(): engine = await create_engine() conn = await engine.acquire() - await conn.execute("sql") # $ getSql="sql" + await conn.execute("sql") # $ getSql="sql" constructedSql="sql" diff --git a/python/ql/test/library-tests/frameworks/aiopg/test.py b/python/ql/test/library-tests/frameworks/aiopg/test.py index 1d085066688..63bf141f52d 100644 --- a/python/ql/test/library-tests/frameworks/aiopg/test.py +++ b/python/ql/test/library-tests/frameworks/aiopg/test.py @@ -5,24 +5,24 @@ async def test_cursor(): # Create connection directly conn = await aiopg.connect() cur = await conn.cursor() - await cur.execute("sql") # $ getSql="sql" + await cur.execute("sql") # $ getSql="sql" constructedSql="sql" # Create connection via pool async with aiopg.create_pool() as pool: # Create Cursor via Connection async with pool.acquire() as conn: async with conn.cursor() as cur: - await cur.execute("sql") # $ getSql="sql" + await cur.execute("sql") # $ getSql="sql" constructedSql="sql" # Create Cursor directly async with pool.cursor() as cur: - await cur.execute("sql") # $ getSql="sql" + await cur.execute("sql") # $ getSql="sql" constructedSql="sql" # variants using as few `async with` as possible pool = await aiopg.create_pool() conn = await pool.acquire() cur = await conn.cursor() - await cur.execute("sql") # $ getSql="sql" + await cur.execute("sql") # $ getSql="sql" constructedSql="sql" # Test SQLAlchemy integration from aiopg.sa import create_engine @@ -30,4 +30,4 @@ from aiopg.sa import create_engine async def test_engine(): engine = await create_engine() conn = await engine.acquire() - await conn.execute("sql") # $ getSql="sql" + await conn.execute("sql") # $ getSql="sql" constructedSql="sql" diff --git a/python/ql/test/library-tests/frameworks/asyncpg/test.py b/python/ql/test/library-tests/frameworks/asyncpg/test.py index 7e2687e5766..572fc454bed 100644 --- a/python/ql/test/library-tests/frameworks/asyncpg/test.py +++ b/python/ql/test/library-tests/frameworks/asyncpg/test.py @@ -43,20 +43,20 @@ async def test_cursor(): try: async with conn.transaction(): - cursor = await conn.cursor("sql") # $ getSql="sql" + cursor = await conn.cursor("sql") # $ getSql="sql" constructedSql="sql" await cursor.fetch() pstmt = await conn.prepare("psql") # $ getSql="psql" pcursor = await pstmt.cursor() # $ getSql="psql" await pcursor.fetch() - async for record in conn.cursor("sql"): # $ getSql="sql" + async for record in conn.cursor("sql"): # $ getSql="sql" constructedSql="sql" pass async for record in pstmt.cursor(): # $ getSql="psql" pass - cursor_factory = conn.cursor("sql") # $ getSql="sql" + cursor_factory = conn.cursor("sql") # $ constructedSql="sql" cursor = await cursor_factory # $ getSql="sql" pcursor_factory = pstmt.cursor() diff --git a/python/ql/test/library-tests/frameworks/flask_sqlalchemy/SqlExecution.py b/python/ql/test/library-tests/frameworks/flask_sqlalchemy/SqlExecution.py index 19a1349ffb6..39cd49195d2 100644 --- a/python/ql/test/library-tests/frameworks/flask_sqlalchemy/SqlExecution.py +++ b/python/ql/test/library-tests/frameworks/flask_sqlalchemy/SqlExecution.py @@ -12,7 +12,7 @@ db = SQLAlchemy(app) # - https://github.com/pallets/flask-sqlalchemy/blob/931ec00d1e27f51508e05706eef41cc4419a0b32/src/flask_sqlalchemy/__init__.py#L765 # - https://github.com/pallets/flask-sqlalchemy/blob/931ec00d1e27f51508e05706eef41cc4419a0b32/src/flask_sqlalchemy/__init__.py#L99-L109 -assert str(type(db.text("Foo"))) == "" # $ getSql="Foo" +assert str(type(db.text("Foo"))) == "" # $ constructedSql="Foo" # also has engine/session instantiated @@ -44,8 +44,8 @@ assert result.fetchall() == [("Foo",)] # text -t = db.text("foo") # $ getSql="foo" +t = db.text("foo") # $ constructedSql="foo" assert isinstance(t, sqlalchemy.sql.expression.TextClause) -t = db.text(text="foo") # $ getSql="foo" +t = db.text(text="foo") # $ constructedSql="foo" assert isinstance(t, sqlalchemy.sql.expression.TextClause) diff --git a/python/ql/test/library-tests/frameworks/sqlalchemy/SqlExecution.py b/python/ql/test/library-tests/frameworks/sqlalchemy/SqlExecution.py index 7d9c478e27b..fe75fc17d5c 100644 --- a/python/ql/test/library-tests/frameworks/sqlalchemy/SqlExecution.py +++ b/python/ql/test/library-tests/frameworks/sqlalchemy/SqlExecution.py @@ -46,7 +46,7 @@ with engine.begin() as connection: connection.execute("some sql") # $ getSql="some sql" # Injection requiring the text() taint-step -t = text("some sql") # $ getSql="some sql" +t = text("some sql") # $ constructedSql="some sql" session.query(User).filter(t) session.query(User).group_by(User.id).having(t) session.query(User).group_by(t).first() diff --git a/python/ql/test/library-tests/frameworks/sqlalchemy/new_tests.py b/python/ql/test/library-tests/frameworks/sqlalchemy/new_tests.py index 9cb13d15e05..6b11ee5d070 100644 --- a/python/ql/test/library-tests/frameworks/sqlalchemy/new_tests.py +++ b/python/ql/test/library-tests/frameworks/sqlalchemy/new_tests.py @@ -6,7 +6,7 @@ import sqlalchemy.orm # either v1.4 or v2.0, such that we cover both. raw_sql = "select 'FOO'" -text_sql = sqlalchemy.text(raw_sql) # $ getSql=raw_sql +text_sql = sqlalchemy.text(raw_sql) # $ constructedSql=raw_sql Base = sqlalchemy.orm.declarative_base() @@ -176,7 +176,7 @@ assert session.query(For14).all()[0].id == 14 # and now we can do the actual querying -text_foo = sqlalchemy.text("'FOO'") # $ getSql="'FOO'" +text_foo = sqlalchemy.text("'FOO'") # $ constructedSql="'FOO'" # filter_by is only vulnerable to injection if sqlalchemy.text is used, which is evident # from the logs produced if this file is run @@ -305,7 +305,7 @@ with engine.connect() as conn: assert scalar_result == "FOO" # This is a contrived example - select = sqlalchemy.select(sqlalchemy.text("'BAR'")) # $ getSql="'BAR'" + select = sqlalchemy.select(sqlalchemy.text("'BAR'")) # $ constructedSql="'BAR'" result = conn.execute(select) # $ getSql=select assert result.fetchall() == [("BAR",)] diff --git a/python/ql/test/library-tests/frameworks/sqlalchemy/taint_test.py b/python/ql/test/library-tests/frameworks/sqlalchemy/taint_test.py index 9d346d41979..884d30d3672 100644 --- a/python/ql/test/library-tests/frameworks/sqlalchemy/taint_test.py +++ b/python/ql/test/library-tests/frameworks/sqlalchemy/taint_test.py @@ -8,14 +8,14 @@ def test_taint(): ensure_tainted(ts) # $ tainted - t1 = sqlalchemy.text(ts) # $ getSql=ts - t2 = sqlalchemy.text(text=ts) # $ getSql=ts - t3 = sqlalchemy.sql.text(ts) # $ getSql=ts - t4 = sqlalchemy.sql.text(text=ts) # $ getSql=ts - t5 = sqlalchemy.sql.expression.text(ts) # $ getSql=ts - t6 = sqlalchemy.sql.expression.text(text=ts) # $ getSql=ts - t7 = sqlalchemy.sql.expression.TextClause(ts) # $ getSql=ts - t8 = sqlalchemy.sql.expression.TextClause(text=ts) # $ getSql=ts + t1 = sqlalchemy.text(ts) # $ constructedSql=ts + t2 = sqlalchemy.text(text=ts) # $ constructedSql=ts + t3 = sqlalchemy.sql.text(ts) # $ constructedSql=ts + t4 = sqlalchemy.sql.text(text=ts) # $ constructedSql=ts + t5 = sqlalchemy.sql.expression.text(ts) # $ constructedSql=ts + t6 = sqlalchemy.sql.expression.text(text=ts) # $ constructedSql=ts + t7 = sqlalchemy.sql.expression.TextClause(ts) # $ constructedSql=ts + t8 = sqlalchemy.sql.expression.TextClause(text=ts) # $ constructedSql=ts # Since we flag user-input to a TextClause with its' own query, we don't want to # have a taint-step for it as that would lead to us also giving an alert for normal From a812d4dd34e82ca168e9037da2d9f993a5963f58 Mon Sep 17 00:00:00 2001 From: Erik Krogh Kristensen Date: Tue, 3 May 2022 22:51:54 +0200 Subject: [PATCH 28/51] move the MaD sql-injection sink to SqlInjectionCustomizations.qll --- python/ql/lib/semmle/python/Concepts.qll | 8 -------- .../security/dataflow/SqlInjectionCustomizations.qll | 7 +++++++ 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/python/ql/lib/semmle/python/Concepts.qll b/python/ql/lib/semmle/python/Concepts.qll index e734105cd2e..296b36417f3 100644 --- a/python/ql/lib/semmle/python/Concepts.qll +++ b/python/ql/lib/semmle/python/Concepts.qll @@ -372,14 +372,6 @@ module SqlExecution { /** Gets the argument that specifies the SQL statements to be executed. */ abstract DataFlow::Node getSql(); } - - private import semmle.python.frameworks.data.ModelsAsData - - private class DataAsSqlExecution extends Range { - DataAsSqlExecution() { this = ModelOutput::getASinkNode("sql-injection").getARhs() } - - override DataFlow::Node getSql() { result = this } - } } /** diff --git a/python/ql/lib/semmle/python/security/dataflow/SqlInjectionCustomizations.qll b/python/ql/lib/semmle/python/security/dataflow/SqlInjectionCustomizations.qll index 756a1f6b773..cf21a5c0e94 100644 --- a/python/ql/lib/semmle/python/security/dataflow/SqlInjectionCustomizations.qll +++ b/python/ql/lib/semmle/python/security/dataflow/SqlInjectionCustomizations.qll @@ -60,4 +60,11 @@ module SqlInjection { * A comparison with a constant string, considered as a sanitizer-guard. */ class StringConstCompareAsSanitizerGuard extends SanitizerGuard, StringConstCompare { } + + private import semmle.python.frameworks.data.ModelsAsData + + /** A sink for sql-injection from model data. */ + private class DataAsSqlSink extends Sink { + DataAsSqlSink() { this = ModelOutput::getASinkNode("sql-injection").getARhs() } + } } From 4b9c9b0c8d71d8ed444ceff248443a38f32cbc37 Mon Sep 17 00:00:00 2001 From: Erik Krogh Kristensen Date: Wed, 4 May 2022 10:58:29 +0200 Subject: [PATCH 29/51] move most of asyncpg test into SqlInjection after moving MaD sql-injection sink --- .../library-tests/frameworks/asyncpg/test.py | 44 +++++----- .../SqlInjection.expected | 73 ++++++++++++++++ .../Security/CWE-089-SqlInjection/asyncpg.py | 87 +++++++++++++++++++ 3 files changed, 182 insertions(+), 22 deletions(-) create mode 100644 python/ql/test/query-tests/Security/CWE-089-SqlInjection/asyncpg.py diff --git a/python/ql/test/library-tests/frameworks/asyncpg/test.py b/python/ql/test/library-tests/frameworks/asyncpg/test.py index 572fc454bed..b9d6aee5b18 100644 --- a/python/ql/test/library-tests/frameworks/asyncpg/test.py +++ b/python/ql/test/library-tests/frameworks/asyncpg/test.py @@ -7,17 +7,17 @@ async def test_connection(): try: # The file-like object is passed in as a keyword-only argument. # See https://magicstack.github.io/asyncpg/current/api/index.html#asyncpg.connection.Connection.copy_from_query - await conn.copy_from_query("sql", output="filepath") # $ getSql="sql" getAPathArgument="filepath" - await conn.copy_from_query("sql", "arg1", "arg2", output="filepath") # $ getSql="sql" getAPathArgument="filepath" + await conn.copy_from_query("sql", output="filepath") # $ getAPathArgument="filepath" + await conn.copy_from_query("sql", "arg1", "arg2", output="filepath") # $ getAPathArgument="filepath" await conn.copy_from_table("table", output="filepath") # $ getAPathArgument="filepath" await conn.copy_to_table("table", source="filepath") # $ getAPathArgument="filepath" - await conn.execute("sql") # $ getSql="sql" - await conn.executemany("sql") # $ getSql="sql" - await conn.fetch("sql") # $ getSql="sql" - await conn.fetchrow("sql") # $ getSql="sql" - await conn.fetchval("sql") # $ getSql="sql" + await conn.execute("sql") # $ + await conn.executemany("sql") # $ + await conn.fetch("sql") # $ + await conn.fetchrow("sql") # $ + await conn.fetchval("sql") # $ finally: await conn.close() @@ -27,7 +27,7 @@ async def test_prepared_statement(): conn = await asyncpg.connect() try: - pstmt = await conn.prepare("psql") # $ getSql="psql" + pstmt = await conn.prepare("psql") # $ pstmt.executemany() pstmt.fetch() pstmt.fetchrow() @@ -43,10 +43,10 @@ async def test_cursor(): try: async with conn.transaction(): - cursor = await conn.cursor("sql") # $ getSql="sql" constructedSql="sql" + cursor = await conn.cursor("sql") # $ constructedSql="sql" getSql="sql" await cursor.fetch() - pstmt = await conn.prepare("psql") # $ getSql="psql" + pstmt = await conn.prepare("psql") # pcursor = await pstmt.cursor() # $ getSql="psql" await pcursor.fetch() @@ -69,23 +69,23 @@ async def test_connection_pool(): pool = await asyncpg.create_pool() try: - await pool.copy_from_query("sql", output="filepath") # $ getSql="sql" getAPathArgument="filepath" - await pool.copy_from_query("sql", "arg1", "arg2", output="filepath") # $ getSql="sql" getAPathArgument="filepath" + await pool.copy_from_query("sql", output="filepath") # $ getAPathArgument="filepath" + await pool.copy_from_query("sql", "arg1", "arg2", output="filepath") # $ getAPathArgument="filepath" await pool.copy_from_table("table", output="filepath") # $ getAPathArgument="filepath" await pool.copy_to_table("table", source="filepath") # $ getAPathArgument="filepath" - await pool.execute("sql") # $ getSql="sql" - await pool.executemany("sql") # $ getSql="sql" - await pool.fetch("sql") # $ getSql="sql" - await pool.fetchrow("sql") # $ getSql="sql" - await pool.fetchval("sql") # $ getSql="sql" + await pool.execute("sql") # $ + await pool.executemany("sql") # $ + await pool.fetch("sql") # $ + await pool.fetchrow("sql") # $ + await pool.fetchval("sql") # $ async with pool.acquire() as conn: - await conn.execute("sql") # $ getSql="sql" + await conn.execute("sql") # $ conn = await pool.acquire() try: - await conn.fetch("sql") # $ getSql="sql" + await conn.fetch("sql") # $ finally: await pool.release(conn) @@ -93,13 +93,13 @@ async def test_connection_pool(): await pool.close() async with asyncpg.create_pool() as pool: - await pool.execute("sql") # $ getSql="sql" + await pool.execute("sql") # $ async with pool.acquire() as conn: - await conn.execute("sql") # $ getSql="sql" + await conn.execute("sql") # $ conn = await pool.acquire() try: - await conn.fetch("sql") # $ getSql="sql" + await conn.fetch("sql") # $ finally: await pool.release(conn) diff --git a/python/ql/test/query-tests/Security/CWE-089-SqlInjection/SqlInjection.expected b/python/ql/test/query-tests/Security/CWE-089-SqlInjection/SqlInjection.expected index ed4c3e3d313..fe01f3b548d 100644 --- a/python/ql/test/query-tests/Security/CWE-089-SqlInjection/SqlInjection.expected +++ b/python/ql/test/query-tests/Security/CWE-089-SqlInjection/SqlInjection.expected @@ -1,4 +1,28 @@ edges +| asyncpg.py:13:21:13:28 | ControlFlowNode for username | asyncpg.py:19:36:19:43 | ControlFlowNode for username | +| asyncpg.py:13:21:13:28 | ControlFlowNode for username | asyncpg.py:20:36:20:43 | ControlFlowNode for username | +| asyncpg.py:13:21:13:28 | ControlFlowNode for username | asyncpg.py:22:28:22:35 | ControlFlowNode for username | +| asyncpg.py:13:21:13:28 | ControlFlowNode for username | asyncpg.py:23:32:23:39 | ControlFlowNode for username | +| asyncpg.py:13:21:13:28 | ControlFlowNode for username | asyncpg.py:24:26:24:33 | ControlFlowNode for username | +| asyncpg.py:13:21:13:28 | ControlFlowNode for username | asyncpg.py:25:29:25:36 | ControlFlowNode for username | +| asyncpg.py:13:21:13:28 | ControlFlowNode for username | asyncpg.py:26:29:26:36 | ControlFlowNode for username | +| asyncpg.py:13:21:13:28 | ControlFlowNode for username | asyncpg.py:28:36:28:43 | ControlFlowNode for username | +| asyncpg.py:13:21:13:28 | ControlFlowNode for username | asyncpg.py:37:40:37:47 | ControlFlowNode for username | +| asyncpg.py:13:21:13:28 | ControlFlowNode for username | asyncpg.py:40:40:40:47 | ControlFlowNode for username | +| asyncpg.py:13:21:13:28 | ControlFlowNode for username | asyncpg.py:44:45:44:52 | ControlFlowNode for username | +| asyncpg.py:13:21:13:28 | ControlFlowNode for username | asyncpg.py:47:42:47:49 | ControlFlowNode for username | +| asyncpg.py:13:21:13:28 | ControlFlowNode for username | asyncpg.py:56:36:56:43 | ControlFlowNode for username | +| asyncpg.py:13:21:13:28 | ControlFlowNode for username | asyncpg.py:57:36:57:43 | ControlFlowNode for username | +| asyncpg.py:13:21:13:28 | ControlFlowNode for username | asyncpg.py:59:28:59:35 | ControlFlowNode for username | +| asyncpg.py:13:21:13:28 | ControlFlowNode for username | asyncpg.py:60:32:60:39 | ControlFlowNode for username | +| asyncpg.py:13:21:13:28 | ControlFlowNode for username | asyncpg.py:61:26:61:33 | ControlFlowNode for username | +| asyncpg.py:13:21:13:28 | ControlFlowNode for username | asyncpg.py:62:29:62:36 | ControlFlowNode for username | +| asyncpg.py:13:21:13:28 | ControlFlowNode for username | asyncpg.py:63:29:63:36 | ControlFlowNode for username | +| asyncpg.py:13:21:13:28 | ControlFlowNode for username | asyncpg.py:66:32:66:39 | ControlFlowNode for username | +| asyncpg.py:13:21:13:28 | ControlFlowNode for username | asyncpg.py:70:30:70:37 | ControlFlowNode for username | +| asyncpg.py:13:21:13:28 | ControlFlowNode for username | asyncpg.py:78:28:78:35 | ControlFlowNode for username | +| asyncpg.py:13:21:13:28 | ControlFlowNode for username | asyncpg.py:81:32:81:39 | ControlFlowNode for username | +| asyncpg.py:13:21:13:28 | ControlFlowNode for username | asyncpg.py:85:30:85:37 | ControlFlowNode for username | | sql_injection.py:14:15:14:22 | ControlFlowNode for username | sql_injection.py:21:24:21:77 | ControlFlowNode for BinaryExpr | | sql_injection.py:14:15:14:22 | ControlFlowNode for username | sql_injection.py:24:38:24:95 | ControlFlowNode for BinaryExpr | | sql_injection.py:14:15:14:22 | ControlFlowNode for username | sql_injection.py:25:26:25:83 | ControlFlowNode for BinaryExpr | @@ -16,6 +40,31 @@ edges | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | sqlalchemy_textclause.py:50:18:50:25 | ControlFlowNode for username | | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | sqlalchemy_textclause.py:51:24:51:31 | ControlFlowNode for username | nodes +| asyncpg.py:13:21:13:28 | ControlFlowNode for username | semmle.label | ControlFlowNode for username | +| asyncpg.py:19:36:19:43 | ControlFlowNode for username | semmle.label | ControlFlowNode for username | +| asyncpg.py:20:36:20:43 | ControlFlowNode for username | semmle.label | ControlFlowNode for username | +| asyncpg.py:22:28:22:35 | ControlFlowNode for username | semmle.label | ControlFlowNode for username | +| asyncpg.py:23:32:23:39 | ControlFlowNode for username | semmle.label | ControlFlowNode for username | +| asyncpg.py:24:26:24:33 | ControlFlowNode for username | semmle.label | ControlFlowNode for username | +| asyncpg.py:25:29:25:36 | ControlFlowNode for username | semmle.label | ControlFlowNode for username | +| asyncpg.py:26:29:26:36 | ControlFlowNode for username | semmle.label | ControlFlowNode for username | +| asyncpg.py:28:36:28:43 | ControlFlowNode for username | semmle.label | ControlFlowNode for username | +| asyncpg.py:37:40:37:47 | ControlFlowNode for username | semmle.label | ControlFlowNode for username | +| asyncpg.py:40:40:40:47 | ControlFlowNode for username | semmle.label | ControlFlowNode for username | +| asyncpg.py:44:45:44:52 | ControlFlowNode for username | semmle.label | ControlFlowNode for username | +| asyncpg.py:47:42:47:49 | ControlFlowNode for username | semmle.label | ControlFlowNode for username | +| asyncpg.py:56:36:56:43 | ControlFlowNode for username | semmle.label | ControlFlowNode for username | +| asyncpg.py:57:36:57:43 | ControlFlowNode for username | semmle.label | ControlFlowNode for username | +| asyncpg.py:59:28:59:35 | ControlFlowNode for username | semmle.label | ControlFlowNode for username | +| asyncpg.py:60:32:60:39 | ControlFlowNode for username | semmle.label | ControlFlowNode for username | +| asyncpg.py:61:26:61:33 | ControlFlowNode for username | semmle.label | ControlFlowNode for username | +| asyncpg.py:62:29:62:36 | ControlFlowNode for username | semmle.label | ControlFlowNode for username | +| asyncpg.py:63:29:63:36 | ControlFlowNode for username | semmle.label | ControlFlowNode for username | +| asyncpg.py:66:32:66:39 | ControlFlowNode for username | semmle.label | ControlFlowNode for username | +| asyncpg.py:70:30:70:37 | ControlFlowNode for username | semmle.label | ControlFlowNode for username | +| asyncpg.py:78:28:78:35 | ControlFlowNode for username | semmle.label | ControlFlowNode for username | +| asyncpg.py:81:32:81:39 | ControlFlowNode for username | semmle.label | ControlFlowNode for username | +| asyncpg.py:85:30:85:37 | ControlFlowNode for username | semmle.label | ControlFlowNode for username | | sql_injection.py:14:15:14:22 | ControlFlowNode for username | semmle.label | ControlFlowNode for username | | sql_injection.py:21:24:21:77 | ControlFlowNode for BinaryExpr | semmle.label | ControlFlowNode for BinaryExpr | | sql_injection.py:24:38:24:95 | ControlFlowNode for BinaryExpr | semmle.label | ControlFlowNode for BinaryExpr | @@ -36,6 +85,30 @@ nodes | sqlalchemy_textclause.py:51:24:51:31 | ControlFlowNode for username | semmle.label | ControlFlowNode for username | subpaths #select +| asyncpg.py:19:36:19:43 | ControlFlowNode for username | asyncpg.py:13:21:13:28 | ControlFlowNode for username | asyncpg.py:19:36:19:43 | ControlFlowNode for username | This SQL query depends on $@. | asyncpg.py:13:21:13:28 | ControlFlowNode for username | a user-provided value | +| asyncpg.py:20:36:20:43 | ControlFlowNode for username | asyncpg.py:13:21:13:28 | ControlFlowNode for username | asyncpg.py:20:36:20:43 | ControlFlowNode for username | This SQL query depends on $@. | asyncpg.py:13:21:13:28 | ControlFlowNode for username | a user-provided value | +| asyncpg.py:22:28:22:35 | ControlFlowNode for username | asyncpg.py:13:21:13:28 | ControlFlowNode for username | asyncpg.py:22:28:22:35 | ControlFlowNode for username | This SQL query depends on $@. | asyncpg.py:13:21:13:28 | ControlFlowNode for username | a user-provided value | +| asyncpg.py:23:32:23:39 | ControlFlowNode for username | asyncpg.py:13:21:13:28 | ControlFlowNode for username | asyncpg.py:23:32:23:39 | ControlFlowNode for username | This SQL query depends on $@. | asyncpg.py:13:21:13:28 | ControlFlowNode for username | a user-provided value | +| asyncpg.py:24:26:24:33 | ControlFlowNode for username | asyncpg.py:13:21:13:28 | ControlFlowNode for username | asyncpg.py:24:26:24:33 | ControlFlowNode for username | This SQL query depends on $@. | asyncpg.py:13:21:13:28 | ControlFlowNode for username | a user-provided value | +| asyncpg.py:25:29:25:36 | ControlFlowNode for username | asyncpg.py:13:21:13:28 | ControlFlowNode for username | asyncpg.py:25:29:25:36 | ControlFlowNode for username | This SQL query depends on $@. | asyncpg.py:13:21:13:28 | ControlFlowNode for username | a user-provided value | +| asyncpg.py:26:29:26:36 | ControlFlowNode for username | asyncpg.py:13:21:13:28 | ControlFlowNode for username | asyncpg.py:26:29:26:36 | ControlFlowNode for username | This SQL query depends on $@. | asyncpg.py:13:21:13:28 | ControlFlowNode for username | a user-provided value | +| asyncpg.py:28:36:28:43 | ControlFlowNode for username | asyncpg.py:13:21:13:28 | ControlFlowNode for username | asyncpg.py:28:36:28:43 | ControlFlowNode for username | This SQL query depends on $@. | asyncpg.py:13:21:13:28 | ControlFlowNode for username | a user-provided value | +| asyncpg.py:37:40:37:47 | ControlFlowNode for username | asyncpg.py:13:21:13:28 | ControlFlowNode for username | asyncpg.py:37:40:37:47 | ControlFlowNode for username | This SQL query depends on $@. | asyncpg.py:13:21:13:28 | ControlFlowNode for username | a user-provided value | +| asyncpg.py:40:40:40:47 | ControlFlowNode for username | asyncpg.py:13:21:13:28 | ControlFlowNode for username | asyncpg.py:40:40:40:47 | ControlFlowNode for username | This SQL query depends on $@. | asyncpg.py:13:21:13:28 | ControlFlowNode for username | a user-provided value | +| asyncpg.py:44:45:44:52 | ControlFlowNode for username | asyncpg.py:13:21:13:28 | ControlFlowNode for username | asyncpg.py:44:45:44:52 | ControlFlowNode for username | This SQL query depends on $@. | asyncpg.py:13:21:13:28 | ControlFlowNode for username | a user-provided value | +| asyncpg.py:47:42:47:49 | ControlFlowNode for username | asyncpg.py:13:21:13:28 | ControlFlowNode for username | asyncpg.py:47:42:47:49 | ControlFlowNode for username | This SQL query depends on $@. | asyncpg.py:13:21:13:28 | ControlFlowNode for username | a user-provided value | +| asyncpg.py:56:36:56:43 | ControlFlowNode for username | asyncpg.py:13:21:13:28 | ControlFlowNode for username | asyncpg.py:56:36:56:43 | ControlFlowNode for username | This SQL query depends on $@. | asyncpg.py:13:21:13:28 | ControlFlowNode for username | a user-provided value | +| asyncpg.py:57:36:57:43 | ControlFlowNode for username | asyncpg.py:13:21:13:28 | ControlFlowNode for username | asyncpg.py:57:36:57:43 | ControlFlowNode for username | This SQL query depends on $@. | asyncpg.py:13:21:13:28 | ControlFlowNode for username | a user-provided value | +| asyncpg.py:59:28:59:35 | ControlFlowNode for username | asyncpg.py:13:21:13:28 | ControlFlowNode for username | asyncpg.py:59:28:59:35 | ControlFlowNode for username | This SQL query depends on $@. | asyncpg.py:13:21:13:28 | ControlFlowNode for username | a user-provided value | +| asyncpg.py:60:32:60:39 | ControlFlowNode for username | asyncpg.py:13:21:13:28 | ControlFlowNode for username | asyncpg.py:60:32:60:39 | ControlFlowNode for username | This SQL query depends on $@. | asyncpg.py:13:21:13:28 | ControlFlowNode for username | a user-provided value | +| asyncpg.py:61:26:61:33 | ControlFlowNode for username | asyncpg.py:13:21:13:28 | ControlFlowNode for username | asyncpg.py:61:26:61:33 | ControlFlowNode for username | This SQL query depends on $@. | asyncpg.py:13:21:13:28 | ControlFlowNode for username | a user-provided value | +| asyncpg.py:62:29:62:36 | ControlFlowNode for username | asyncpg.py:13:21:13:28 | ControlFlowNode for username | asyncpg.py:62:29:62:36 | ControlFlowNode for username | This SQL query depends on $@. | asyncpg.py:13:21:13:28 | ControlFlowNode for username | a user-provided value | +| asyncpg.py:63:29:63:36 | ControlFlowNode for username | asyncpg.py:13:21:13:28 | ControlFlowNode for username | asyncpg.py:63:29:63:36 | ControlFlowNode for username | This SQL query depends on $@. | asyncpg.py:13:21:13:28 | ControlFlowNode for username | a user-provided value | +| asyncpg.py:66:32:66:39 | ControlFlowNode for username | asyncpg.py:13:21:13:28 | ControlFlowNode for username | asyncpg.py:66:32:66:39 | ControlFlowNode for username | This SQL query depends on $@. | asyncpg.py:13:21:13:28 | ControlFlowNode for username | a user-provided value | +| asyncpg.py:70:30:70:37 | ControlFlowNode for username | asyncpg.py:13:21:13:28 | ControlFlowNode for username | asyncpg.py:70:30:70:37 | ControlFlowNode for username | This SQL query depends on $@. | asyncpg.py:13:21:13:28 | ControlFlowNode for username | a user-provided value | +| asyncpg.py:78:28:78:35 | ControlFlowNode for username | asyncpg.py:13:21:13:28 | ControlFlowNode for username | asyncpg.py:78:28:78:35 | ControlFlowNode for username | This SQL query depends on $@. | asyncpg.py:13:21:13:28 | ControlFlowNode for username | a user-provided value | +| asyncpg.py:81:32:81:39 | ControlFlowNode for username | asyncpg.py:13:21:13:28 | ControlFlowNode for username | asyncpg.py:81:32:81:39 | ControlFlowNode for username | This SQL query depends on $@. | asyncpg.py:13:21:13:28 | ControlFlowNode for username | a user-provided value | +| asyncpg.py:85:30:85:37 | ControlFlowNode for username | asyncpg.py:13:21:13:28 | ControlFlowNode for username | asyncpg.py:85:30:85:37 | ControlFlowNode for username | This SQL query depends on $@. | asyncpg.py:13:21:13:28 | ControlFlowNode for username | a user-provided value | | sql_injection.py:21:24:21:77 | ControlFlowNode for BinaryExpr | sql_injection.py:14:15:14:22 | ControlFlowNode for username | sql_injection.py:21:24:21:77 | ControlFlowNode for BinaryExpr | This SQL query depends on $@. | sql_injection.py:14:15:14:22 | ControlFlowNode for username | a user-provided value | | sql_injection.py:24:38:24:95 | ControlFlowNode for BinaryExpr | sql_injection.py:14:15:14:22 | ControlFlowNode for username | sql_injection.py:24:38:24:95 | ControlFlowNode for BinaryExpr | This SQL query depends on $@. | sql_injection.py:14:15:14:22 | ControlFlowNode for username | a user-provided value | | sql_injection.py:25:26:25:83 | ControlFlowNode for BinaryExpr | sql_injection.py:14:15:14:22 | ControlFlowNode for username | sql_injection.py:25:26:25:83 | ControlFlowNode for BinaryExpr | This SQL query depends on $@. | sql_injection.py:14:15:14:22 | ControlFlowNode for username | a user-provided value | diff --git a/python/ql/test/query-tests/Security/CWE-089-SqlInjection/asyncpg.py b/python/ql/test/query-tests/Security/CWE-089-SqlInjection/asyncpg.py new file mode 100644 index 00000000000..fd9dfed5877 --- /dev/null +++ b/python/ql/test/query-tests/Security/CWE-089-SqlInjection/asyncpg.py @@ -0,0 +1,87 @@ +import asyncio +import asyncpg + +"""This is adapted from ql/python/ql/test/query-tests\Security\CWE-089 +we now prefer to setup routing by flask +""" + +from django.db import connection, models +from flask import Flask +app = Flask(__name__) + +@app.route("/users/") +async def show_user(username): + conn = await asyncpg.connect() + + try: + # The file-like object is passed in as a keyword-only argument. + # See https://magicstack.github.io/asyncpg/current/api/index.html#asyncpg.connection.Connection.copy_from_query + await conn.copy_from_query(username, output="filepath") # BAD + await conn.copy_from_query(username, "arg1", "arg2", output="filepath") # BAD + + await conn.execute(username) # BAD + await conn.executemany(username) # BAD + await conn.fetch(username) # BAD + await conn.fetchrow(username) # BAD + await conn.fetchval(username) # BAD + + pstmt = await conn.prepare(username) # BAD + pstmt.executemany() + pstmt.fetch() + pstmt.fetchrow() + pstmt.fetchval() + + # The sql statement is executed when the `CursorFactory` (obtained by e.g. `conn.cursor()`) is awaited. + # See https://magicstack.github.io/asyncpg/current/api/index.html#asyncpg.cursor.CursorFactory + async with conn.transaction(): + cursor = await conn.cursor(username) # BAD + await cursor.fetch() + + pstmt = await conn.prepare(username) # BAD + pcursor = await pstmt.cursor() + await pcursor.fetch() + + async for record in conn.cursor(username): # BAD + pass + + cursor_factory = conn.cursor(username) # BAD + cursor = await cursor_factory + + finally: + await conn.close() + + pool = await asyncpg.create_pool() + + try: + await pool.copy_from_query(username, output="filepath") # BAD + await pool.copy_from_query(username, "arg1", "arg2", output="filepath") # BAD + + await pool.execute(username) # BAD + await pool.executemany(username) # BAD + await pool.fetch(username) # BAD + await pool.fetchrow(username) # BAD + await pool.fetchval(username) # BAD + + async with pool.acquire() as conn: + await conn.execute(username) # BAD + + conn = await pool.acquire() + try: + await conn.fetch(username) # BAD + finally: + await pool.release(conn) + + finally: + await pool.close() + + async with asyncpg.create_pool() as pool: + await pool.execute(username) # BAD + + async with pool.acquire() as conn: + await conn.execute(username) # BAD + + conn = await pool.acquire() + try: + await conn.fetch(username) # BAD + finally: + await pool.release(conn) From 6ae5ef9f3b3bc4ce0d8576a31b91714e49a4f50c Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Thu, 5 May 2022 10:20:41 +0200 Subject: [PATCH 30/51] Revert "move most of asyncpg test into SqlInjection after moving MaD sql-injection sink" This reverts commit 4b9c9b0c8d71d8ed444ceff248443a38f32cbc37. --- .../library-tests/frameworks/asyncpg/test.py | 44 +++++----- .../SqlInjection.expected | 73 ---------------- .../Security/CWE-089-SqlInjection/asyncpg.py | 87 ------------------- 3 files changed, 22 insertions(+), 182 deletions(-) delete mode 100644 python/ql/test/query-tests/Security/CWE-089-SqlInjection/asyncpg.py diff --git a/python/ql/test/library-tests/frameworks/asyncpg/test.py b/python/ql/test/library-tests/frameworks/asyncpg/test.py index b9d6aee5b18..572fc454bed 100644 --- a/python/ql/test/library-tests/frameworks/asyncpg/test.py +++ b/python/ql/test/library-tests/frameworks/asyncpg/test.py @@ -7,17 +7,17 @@ async def test_connection(): try: # The file-like object is passed in as a keyword-only argument. # See https://magicstack.github.io/asyncpg/current/api/index.html#asyncpg.connection.Connection.copy_from_query - await conn.copy_from_query("sql", output="filepath") # $ getAPathArgument="filepath" - await conn.copy_from_query("sql", "arg1", "arg2", output="filepath") # $ getAPathArgument="filepath" + await conn.copy_from_query("sql", output="filepath") # $ getSql="sql" getAPathArgument="filepath" + await conn.copy_from_query("sql", "arg1", "arg2", output="filepath") # $ getSql="sql" getAPathArgument="filepath" await conn.copy_from_table("table", output="filepath") # $ getAPathArgument="filepath" await conn.copy_to_table("table", source="filepath") # $ getAPathArgument="filepath" - await conn.execute("sql") # $ - await conn.executemany("sql") # $ - await conn.fetch("sql") # $ - await conn.fetchrow("sql") # $ - await conn.fetchval("sql") # $ + await conn.execute("sql") # $ getSql="sql" + await conn.executemany("sql") # $ getSql="sql" + await conn.fetch("sql") # $ getSql="sql" + await conn.fetchrow("sql") # $ getSql="sql" + await conn.fetchval("sql") # $ getSql="sql" finally: await conn.close() @@ -27,7 +27,7 @@ async def test_prepared_statement(): conn = await asyncpg.connect() try: - pstmt = await conn.prepare("psql") # $ + pstmt = await conn.prepare("psql") # $ getSql="psql" pstmt.executemany() pstmt.fetch() pstmt.fetchrow() @@ -43,10 +43,10 @@ async def test_cursor(): try: async with conn.transaction(): - cursor = await conn.cursor("sql") # $ constructedSql="sql" getSql="sql" + cursor = await conn.cursor("sql") # $ getSql="sql" constructedSql="sql" await cursor.fetch() - pstmt = await conn.prepare("psql") # + pstmt = await conn.prepare("psql") # $ getSql="psql" pcursor = await pstmt.cursor() # $ getSql="psql" await pcursor.fetch() @@ -69,23 +69,23 @@ async def test_connection_pool(): pool = await asyncpg.create_pool() try: - await pool.copy_from_query("sql", output="filepath") # $ getAPathArgument="filepath" - await pool.copy_from_query("sql", "arg1", "arg2", output="filepath") # $ getAPathArgument="filepath" + await pool.copy_from_query("sql", output="filepath") # $ getSql="sql" getAPathArgument="filepath" + await pool.copy_from_query("sql", "arg1", "arg2", output="filepath") # $ getSql="sql" getAPathArgument="filepath" await pool.copy_from_table("table", output="filepath") # $ getAPathArgument="filepath" await pool.copy_to_table("table", source="filepath") # $ getAPathArgument="filepath" - await pool.execute("sql") # $ - await pool.executemany("sql") # $ - await pool.fetch("sql") # $ - await pool.fetchrow("sql") # $ - await pool.fetchval("sql") # $ + await pool.execute("sql") # $ getSql="sql" + await pool.executemany("sql") # $ getSql="sql" + await pool.fetch("sql") # $ getSql="sql" + await pool.fetchrow("sql") # $ getSql="sql" + await pool.fetchval("sql") # $ getSql="sql" async with pool.acquire() as conn: - await conn.execute("sql") # $ + await conn.execute("sql") # $ getSql="sql" conn = await pool.acquire() try: - await conn.fetch("sql") # $ + await conn.fetch("sql") # $ getSql="sql" finally: await pool.release(conn) @@ -93,13 +93,13 @@ async def test_connection_pool(): await pool.close() async with asyncpg.create_pool() as pool: - await pool.execute("sql") # $ + await pool.execute("sql") # $ getSql="sql" async with pool.acquire() as conn: - await conn.execute("sql") # $ + await conn.execute("sql") # $ getSql="sql" conn = await pool.acquire() try: - await conn.fetch("sql") # $ + await conn.fetch("sql") # $ getSql="sql" finally: await pool.release(conn) diff --git a/python/ql/test/query-tests/Security/CWE-089-SqlInjection/SqlInjection.expected b/python/ql/test/query-tests/Security/CWE-089-SqlInjection/SqlInjection.expected index fe01f3b548d..ed4c3e3d313 100644 --- a/python/ql/test/query-tests/Security/CWE-089-SqlInjection/SqlInjection.expected +++ b/python/ql/test/query-tests/Security/CWE-089-SqlInjection/SqlInjection.expected @@ -1,28 +1,4 @@ edges -| asyncpg.py:13:21:13:28 | ControlFlowNode for username | asyncpg.py:19:36:19:43 | ControlFlowNode for username | -| asyncpg.py:13:21:13:28 | ControlFlowNode for username | asyncpg.py:20:36:20:43 | ControlFlowNode for username | -| asyncpg.py:13:21:13:28 | ControlFlowNode for username | asyncpg.py:22:28:22:35 | ControlFlowNode for username | -| asyncpg.py:13:21:13:28 | ControlFlowNode for username | asyncpg.py:23:32:23:39 | ControlFlowNode for username | -| asyncpg.py:13:21:13:28 | ControlFlowNode for username | asyncpg.py:24:26:24:33 | ControlFlowNode for username | -| asyncpg.py:13:21:13:28 | ControlFlowNode for username | asyncpg.py:25:29:25:36 | ControlFlowNode for username | -| asyncpg.py:13:21:13:28 | ControlFlowNode for username | asyncpg.py:26:29:26:36 | ControlFlowNode for username | -| asyncpg.py:13:21:13:28 | ControlFlowNode for username | asyncpg.py:28:36:28:43 | ControlFlowNode for username | -| asyncpg.py:13:21:13:28 | ControlFlowNode for username | asyncpg.py:37:40:37:47 | ControlFlowNode for username | -| asyncpg.py:13:21:13:28 | ControlFlowNode for username | asyncpg.py:40:40:40:47 | ControlFlowNode for username | -| asyncpg.py:13:21:13:28 | ControlFlowNode for username | asyncpg.py:44:45:44:52 | ControlFlowNode for username | -| asyncpg.py:13:21:13:28 | ControlFlowNode for username | asyncpg.py:47:42:47:49 | ControlFlowNode for username | -| asyncpg.py:13:21:13:28 | ControlFlowNode for username | asyncpg.py:56:36:56:43 | ControlFlowNode for username | -| asyncpg.py:13:21:13:28 | ControlFlowNode for username | asyncpg.py:57:36:57:43 | ControlFlowNode for username | -| asyncpg.py:13:21:13:28 | ControlFlowNode for username | asyncpg.py:59:28:59:35 | ControlFlowNode for username | -| asyncpg.py:13:21:13:28 | ControlFlowNode for username | asyncpg.py:60:32:60:39 | ControlFlowNode for username | -| asyncpg.py:13:21:13:28 | ControlFlowNode for username | asyncpg.py:61:26:61:33 | ControlFlowNode for username | -| asyncpg.py:13:21:13:28 | ControlFlowNode for username | asyncpg.py:62:29:62:36 | ControlFlowNode for username | -| asyncpg.py:13:21:13:28 | ControlFlowNode for username | asyncpg.py:63:29:63:36 | ControlFlowNode for username | -| asyncpg.py:13:21:13:28 | ControlFlowNode for username | asyncpg.py:66:32:66:39 | ControlFlowNode for username | -| asyncpg.py:13:21:13:28 | ControlFlowNode for username | asyncpg.py:70:30:70:37 | ControlFlowNode for username | -| asyncpg.py:13:21:13:28 | ControlFlowNode for username | asyncpg.py:78:28:78:35 | ControlFlowNode for username | -| asyncpg.py:13:21:13:28 | ControlFlowNode for username | asyncpg.py:81:32:81:39 | ControlFlowNode for username | -| asyncpg.py:13:21:13:28 | ControlFlowNode for username | asyncpg.py:85:30:85:37 | ControlFlowNode for username | | sql_injection.py:14:15:14:22 | ControlFlowNode for username | sql_injection.py:21:24:21:77 | ControlFlowNode for BinaryExpr | | sql_injection.py:14:15:14:22 | ControlFlowNode for username | sql_injection.py:24:38:24:95 | ControlFlowNode for BinaryExpr | | sql_injection.py:14:15:14:22 | ControlFlowNode for username | sql_injection.py:25:26:25:83 | ControlFlowNode for BinaryExpr | @@ -40,31 +16,6 @@ edges | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | sqlalchemy_textclause.py:50:18:50:25 | ControlFlowNode for username | | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | sqlalchemy_textclause.py:51:24:51:31 | ControlFlowNode for username | nodes -| asyncpg.py:13:21:13:28 | ControlFlowNode for username | semmle.label | ControlFlowNode for username | -| asyncpg.py:19:36:19:43 | ControlFlowNode for username | semmle.label | ControlFlowNode for username | -| asyncpg.py:20:36:20:43 | ControlFlowNode for username | semmle.label | ControlFlowNode for username | -| asyncpg.py:22:28:22:35 | ControlFlowNode for username | semmle.label | ControlFlowNode for username | -| asyncpg.py:23:32:23:39 | ControlFlowNode for username | semmle.label | ControlFlowNode for username | -| asyncpg.py:24:26:24:33 | ControlFlowNode for username | semmle.label | ControlFlowNode for username | -| asyncpg.py:25:29:25:36 | ControlFlowNode for username | semmle.label | ControlFlowNode for username | -| asyncpg.py:26:29:26:36 | ControlFlowNode for username | semmle.label | ControlFlowNode for username | -| asyncpg.py:28:36:28:43 | ControlFlowNode for username | semmle.label | ControlFlowNode for username | -| asyncpg.py:37:40:37:47 | ControlFlowNode for username | semmle.label | ControlFlowNode for username | -| asyncpg.py:40:40:40:47 | ControlFlowNode for username | semmle.label | ControlFlowNode for username | -| asyncpg.py:44:45:44:52 | ControlFlowNode for username | semmle.label | ControlFlowNode for username | -| asyncpg.py:47:42:47:49 | ControlFlowNode for username | semmle.label | ControlFlowNode for username | -| asyncpg.py:56:36:56:43 | ControlFlowNode for username | semmle.label | ControlFlowNode for username | -| asyncpg.py:57:36:57:43 | ControlFlowNode for username | semmle.label | ControlFlowNode for username | -| asyncpg.py:59:28:59:35 | ControlFlowNode for username | semmle.label | ControlFlowNode for username | -| asyncpg.py:60:32:60:39 | ControlFlowNode for username | semmle.label | ControlFlowNode for username | -| asyncpg.py:61:26:61:33 | ControlFlowNode for username | semmle.label | ControlFlowNode for username | -| asyncpg.py:62:29:62:36 | ControlFlowNode for username | semmle.label | ControlFlowNode for username | -| asyncpg.py:63:29:63:36 | ControlFlowNode for username | semmle.label | ControlFlowNode for username | -| asyncpg.py:66:32:66:39 | ControlFlowNode for username | semmle.label | ControlFlowNode for username | -| asyncpg.py:70:30:70:37 | ControlFlowNode for username | semmle.label | ControlFlowNode for username | -| asyncpg.py:78:28:78:35 | ControlFlowNode for username | semmle.label | ControlFlowNode for username | -| asyncpg.py:81:32:81:39 | ControlFlowNode for username | semmle.label | ControlFlowNode for username | -| asyncpg.py:85:30:85:37 | ControlFlowNode for username | semmle.label | ControlFlowNode for username | | sql_injection.py:14:15:14:22 | ControlFlowNode for username | semmle.label | ControlFlowNode for username | | sql_injection.py:21:24:21:77 | ControlFlowNode for BinaryExpr | semmle.label | ControlFlowNode for BinaryExpr | | sql_injection.py:24:38:24:95 | ControlFlowNode for BinaryExpr | semmle.label | ControlFlowNode for BinaryExpr | @@ -85,30 +36,6 @@ nodes | sqlalchemy_textclause.py:51:24:51:31 | ControlFlowNode for username | semmle.label | ControlFlowNode for username | subpaths #select -| asyncpg.py:19:36:19:43 | ControlFlowNode for username | asyncpg.py:13:21:13:28 | ControlFlowNode for username | asyncpg.py:19:36:19:43 | ControlFlowNode for username | This SQL query depends on $@. | asyncpg.py:13:21:13:28 | ControlFlowNode for username | a user-provided value | -| asyncpg.py:20:36:20:43 | ControlFlowNode for username | asyncpg.py:13:21:13:28 | ControlFlowNode for username | asyncpg.py:20:36:20:43 | ControlFlowNode for username | This SQL query depends on $@. | asyncpg.py:13:21:13:28 | ControlFlowNode for username | a user-provided value | -| asyncpg.py:22:28:22:35 | ControlFlowNode for username | asyncpg.py:13:21:13:28 | ControlFlowNode for username | asyncpg.py:22:28:22:35 | ControlFlowNode for username | This SQL query depends on $@. | asyncpg.py:13:21:13:28 | ControlFlowNode for username | a user-provided value | -| asyncpg.py:23:32:23:39 | ControlFlowNode for username | asyncpg.py:13:21:13:28 | ControlFlowNode for username | asyncpg.py:23:32:23:39 | ControlFlowNode for username | This SQL query depends on $@. | asyncpg.py:13:21:13:28 | ControlFlowNode for username | a user-provided value | -| asyncpg.py:24:26:24:33 | ControlFlowNode for username | asyncpg.py:13:21:13:28 | ControlFlowNode for username | asyncpg.py:24:26:24:33 | ControlFlowNode for username | This SQL query depends on $@. | asyncpg.py:13:21:13:28 | ControlFlowNode for username | a user-provided value | -| asyncpg.py:25:29:25:36 | ControlFlowNode for username | asyncpg.py:13:21:13:28 | ControlFlowNode for username | asyncpg.py:25:29:25:36 | ControlFlowNode for username | This SQL query depends on $@. | asyncpg.py:13:21:13:28 | ControlFlowNode for username | a user-provided value | -| asyncpg.py:26:29:26:36 | ControlFlowNode for username | asyncpg.py:13:21:13:28 | ControlFlowNode for username | asyncpg.py:26:29:26:36 | ControlFlowNode for username | This SQL query depends on $@. | asyncpg.py:13:21:13:28 | ControlFlowNode for username | a user-provided value | -| asyncpg.py:28:36:28:43 | ControlFlowNode for username | asyncpg.py:13:21:13:28 | ControlFlowNode for username | asyncpg.py:28:36:28:43 | ControlFlowNode for username | This SQL query depends on $@. | asyncpg.py:13:21:13:28 | ControlFlowNode for username | a user-provided value | -| asyncpg.py:37:40:37:47 | ControlFlowNode for username | asyncpg.py:13:21:13:28 | ControlFlowNode for username | asyncpg.py:37:40:37:47 | ControlFlowNode for username | This SQL query depends on $@. | asyncpg.py:13:21:13:28 | ControlFlowNode for username | a user-provided value | -| asyncpg.py:40:40:40:47 | ControlFlowNode for username | asyncpg.py:13:21:13:28 | ControlFlowNode for username | asyncpg.py:40:40:40:47 | ControlFlowNode for username | This SQL query depends on $@. | asyncpg.py:13:21:13:28 | ControlFlowNode for username | a user-provided value | -| asyncpg.py:44:45:44:52 | ControlFlowNode for username | asyncpg.py:13:21:13:28 | ControlFlowNode for username | asyncpg.py:44:45:44:52 | ControlFlowNode for username | This SQL query depends on $@. | asyncpg.py:13:21:13:28 | ControlFlowNode for username | a user-provided value | -| asyncpg.py:47:42:47:49 | ControlFlowNode for username | asyncpg.py:13:21:13:28 | ControlFlowNode for username | asyncpg.py:47:42:47:49 | ControlFlowNode for username | This SQL query depends on $@. | asyncpg.py:13:21:13:28 | ControlFlowNode for username | a user-provided value | -| asyncpg.py:56:36:56:43 | ControlFlowNode for username | asyncpg.py:13:21:13:28 | ControlFlowNode for username | asyncpg.py:56:36:56:43 | ControlFlowNode for username | This SQL query depends on $@. | asyncpg.py:13:21:13:28 | ControlFlowNode for username | a user-provided value | -| asyncpg.py:57:36:57:43 | ControlFlowNode for username | asyncpg.py:13:21:13:28 | ControlFlowNode for username | asyncpg.py:57:36:57:43 | ControlFlowNode for username | This SQL query depends on $@. | asyncpg.py:13:21:13:28 | ControlFlowNode for username | a user-provided value | -| asyncpg.py:59:28:59:35 | ControlFlowNode for username | asyncpg.py:13:21:13:28 | ControlFlowNode for username | asyncpg.py:59:28:59:35 | ControlFlowNode for username | This SQL query depends on $@. | asyncpg.py:13:21:13:28 | ControlFlowNode for username | a user-provided value | -| asyncpg.py:60:32:60:39 | ControlFlowNode for username | asyncpg.py:13:21:13:28 | ControlFlowNode for username | asyncpg.py:60:32:60:39 | ControlFlowNode for username | This SQL query depends on $@. | asyncpg.py:13:21:13:28 | ControlFlowNode for username | a user-provided value | -| asyncpg.py:61:26:61:33 | ControlFlowNode for username | asyncpg.py:13:21:13:28 | ControlFlowNode for username | asyncpg.py:61:26:61:33 | ControlFlowNode for username | This SQL query depends on $@. | asyncpg.py:13:21:13:28 | ControlFlowNode for username | a user-provided value | -| asyncpg.py:62:29:62:36 | ControlFlowNode for username | asyncpg.py:13:21:13:28 | ControlFlowNode for username | asyncpg.py:62:29:62:36 | ControlFlowNode for username | This SQL query depends on $@. | asyncpg.py:13:21:13:28 | ControlFlowNode for username | a user-provided value | -| asyncpg.py:63:29:63:36 | ControlFlowNode for username | asyncpg.py:13:21:13:28 | ControlFlowNode for username | asyncpg.py:63:29:63:36 | ControlFlowNode for username | This SQL query depends on $@. | asyncpg.py:13:21:13:28 | ControlFlowNode for username | a user-provided value | -| asyncpg.py:66:32:66:39 | ControlFlowNode for username | asyncpg.py:13:21:13:28 | ControlFlowNode for username | asyncpg.py:66:32:66:39 | ControlFlowNode for username | This SQL query depends on $@. | asyncpg.py:13:21:13:28 | ControlFlowNode for username | a user-provided value | -| asyncpg.py:70:30:70:37 | ControlFlowNode for username | asyncpg.py:13:21:13:28 | ControlFlowNode for username | asyncpg.py:70:30:70:37 | ControlFlowNode for username | This SQL query depends on $@. | asyncpg.py:13:21:13:28 | ControlFlowNode for username | a user-provided value | -| asyncpg.py:78:28:78:35 | ControlFlowNode for username | asyncpg.py:13:21:13:28 | ControlFlowNode for username | asyncpg.py:78:28:78:35 | ControlFlowNode for username | This SQL query depends on $@. | asyncpg.py:13:21:13:28 | ControlFlowNode for username | a user-provided value | -| asyncpg.py:81:32:81:39 | ControlFlowNode for username | asyncpg.py:13:21:13:28 | ControlFlowNode for username | asyncpg.py:81:32:81:39 | ControlFlowNode for username | This SQL query depends on $@. | asyncpg.py:13:21:13:28 | ControlFlowNode for username | a user-provided value | -| asyncpg.py:85:30:85:37 | ControlFlowNode for username | asyncpg.py:13:21:13:28 | ControlFlowNode for username | asyncpg.py:85:30:85:37 | ControlFlowNode for username | This SQL query depends on $@. | asyncpg.py:13:21:13:28 | ControlFlowNode for username | a user-provided value | | sql_injection.py:21:24:21:77 | ControlFlowNode for BinaryExpr | sql_injection.py:14:15:14:22 | ControlFlowNode for username | sql_injection.py:21:24:21:77 | ControlFlowNode for BinaryExpr | This SQL query depends on $@. | sql_injection.py:14:15:14:22 | ControlFlowNode for username | a user-provided value | | sql_injection.py:24:38:24:95 | ControlFlowNode for BinaryExpr | sql_injection.py:14:15:14:22 | ControlFlowNode for username | sql_injection.py:24:38:24:95 | ControlFlowNode for BinaryExpr | This SQL query depends on $@. | sql_injection.py:14:15:14:22 | ControlFlowNode for username | a user-provided value | | sql_injection.py:25:26:25:83 | ControlFlowNode for BinaryExpr | sql_injection.py:14:15:14:22 | ControlFlowNode for username | sql_injection.py:25:26:25:83 | ControlFlowNode for BinaryExpr | This SQL query depends on $@. | sql_injection.py:14:15:14:22 | ControlFlowNode for username | a user-provided value | diff --git a/python/ql/test/query-tests/Security/CWE-089-SqlInjection/asyncpg.py b/python/ql/test/query-tests/Security/CWE-089-SqlInjection/asyncpg.py deleted file mode 100644 index fd9dfed5877..00000000000 --- a/python/ql/test/query-tests/Security/CWE-089-SqlInjection/asyncpg.py +++ /dev/null @@ -1,87 +0,0 @@ -import asyncio -import asyncpg - -"""This is adapted from ql/python/ql/test/query-tests\Security\CWE-089 -we now prefer to setup routing by flask -""" - -from django.db import connection, models -from flask import Flask -app = Flask(__name__) - -@app.route("/users/") -async def show_user(username): - conn = await asyncpg.connect() - - try: - # The file-like object is passed in as a keyword-only argument. - # See https://magicstack.github.io/asyncpg/current/api/index.html#asyncpg.connection.Connection.copy_from_query - await conn.copy_from_query(username, output="filepath") # BAD - await conn.copy_from_query(username, "arg1", "arg2", output="filepath") # BAD - - await conn.execute(username) # BAD - await conn.executemany(username) # BAD - await conn.fetch(username) # BAD - await conn.fetchrow(username) # BAD - await conn.fetchval(username) # BAD - - pstmt = await conn.prepare(username) # BAD - pstmt.executemany() - pstmt.fetch() - pstmt.fetchrow() - pstmt.fetchval() - - # The sql statement is executed when the `CursorFactory` (obtained by e.g. `conn.cursor()`) is awaited. - # See https://magicstack.github.io/asyncpg/current/api/index.html#asyncpg.cursor.CursorFactory - async with conn.transaction(): - cursor = await conn.cursor(username) # BAD - await cursor.fetch() - - pstmt = await conn.prepare(username) # BAD - pcursor = await pstmt.cursor() - await pcursor.fetch() - - async for record in conn.cursor(username): # BAD - pass - - cursor_factory = conn.cursor(username) # BAD - cursor = await cursor_factory - - finally: - await conn.close() - - pool = await asyncpg.create_pool() - - try: - await pool.copy_from_query(username, output="filepath") # BAD - await pool.copy_from_query(username, "arg1", "arg2", output="filepath") # BAD - - await pool.execute(username) # BAD - await pool.executemany(username) # BAD - await pool.fetch(username) # BAD - await pool.fetchrow(username) # BAD - await pool.fetchval(username) # BAD - - async with pool.acquire() as conn: - await conn.execute(username) # BAD - - conn = await pool.acquire() - try: - await conn.fetch(username) # BAD - finally: - await pool.release(conn) - - finally: - await pool.close() - - async with asyncpg.create_pool() as pool: - await pool.execute(username) # BAD - - async with pool.acquire() as conn: - await conn.execute(username) # BAD - - conn = await pool.acquire() - try: - await conn.fetch(username) # BAD - finally: - await pool.release(conn) From 0a589bed4e20cf4c0e7ee7f152c32245447f8a67 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Thu, 5 May 2022 13:11:43 +0200 Subject: [PATCH 31/51] Python: Add inline test of MaD sinks This enables us to keep the framework modeling tests under `/frameworks` folder I had hoped to use `mad-sink[]` syntax, but that was not allowed :( Maybe it oculd be allowed in the future, but for now I'll stick with the more ugly solution of `mad-sink__` --- python/ql/test/experimental/meta/MaDTest.qll | 50 +++++++++++++++++ .../frameworks/asyncpg/MaDTest.expected | 0 .../frameworks/asyncpg/MaDTest.ql | 2 + .../library-tests/frameworks/asyncpg/test.py | 54 +++++++++---------- 4 files changed, 79 insertions(+), 27 deletions(-) create mode 100644 python/ql/test/experimental/meta/MaDTest.qll create mode 100644 python/ql/test/library-tests/frameworks/asyncpg/MaDTest.expected create mode 100644 python/ql/test/library-tests/frameworks/asyncpg/MaDTest.ql diff --git a/python/ql/test/experimental/meta/MaDTest.qll b/python/ql/test/experimental/meta/MaDTest.qll new file mode 100644 index 00000000000..f3f701b6494 --- /dev/null +++ b/python/ql/test/experimental/meta/MaDTest.qll @@ -0,0 +1,50 @@ +import python +private import semmle.python.dataflow.new.DataFlow +private import semmle.python.dataflow.new.internal.PrintNode +private import semmle.python.frameworks.data.ModelsAsData +// need to import Frameworks to get the actual modeling imported +private import semmle.python.Frameworks +// this improt needs to be public to get the query predicates propagated to the actual test files +import TestUtilities.InlineExpectationsTest + +class MadSinkTest extends InlineExpectationsTest { + MadSinkTest() { this = "MadSinkTest" } + + override string getARelevantTag() { + exists(string kind | exists(ModelOutput::getASinkNode(kind)) | + result = "mad-sink__" + kind + ) + } + + override predicate hasActualResult(Location location, string element, string tag, string value) { + exists(location.getFile().getRelativePath()) and + exists(DataFlow::Node sink, string kind | + sink = ModelOutput::getASinkNode(kind).getARhs() and + location = sink.getLocation() and + element = sink.toString() and + value = prettyNodeForInlineTest(sink) and + tag = "mad-sink__" + kind + ) + } +} + +class MadSourceTest extends InlineExpectationsTest { + MadSourceTest() { this = "MadSourceTest" } + + override string getARelevantTag() { + exists(string kind | exists(ModelOutput::getASourceNode(kind)) | + result = "mad-source__" + kind + ) + } + + override predicate hasActualResult(Location location, string element, string tag, string value) { + exists(location.getFile().getRelativePath()) and + exists(DataFlow::Node source, string kind | + source = ModelOutput::getASourceNode(kind).getAnImmediateUse() and + location = source.getLocation() and + element = source.toString() and + value = prettyNodeForInlineTest(source) and + tag = "mad-source__" + kind + ) + } +} diff --git a/python/ql/test/library-tests/frameworks/asyncpg/MaDTest.expected b/python/ql/test/library-tests/frameworks/asyncpg/MaDTest.expected new file mode 100644 index 00000000000..e69de29bb2d diff --git a/python/ql/test/library-tests/frameworks/asyncpg/MaDTest.ql b/python/ql/test/library-tests/frameworks/asyncpg/MaDTest.ql new file mode 100644 index 00000000000..fef4356ab35 --- /dev/null +++ b/python/ql/test/library-tests/frameworks/asyncpg/MaDTest.ql @@ -0,0 +1,2 @@ +import python +import experimental.meta.MaDTest diff --git a/python/ql/test/library-tests/frameworks/asyncpg/test.py b/python/ql/test/library-tests/frameworks/asyncpg/test.py index 572fc454bed..d739ee88770 100644 --- a/python/ql/test/library-tests/frameworks/asyncpg/test.py +++ b/python/ql/test/library-tests/frameworks/asyncpg/test.py @@ -7,17 +7,17 @@ async def test_connection(): try: # The file-like object is passed in as a keyword-only argument. # See https://magicstack.github.io/asyncpg/current/api/index.html#asyncpg.connection.Connection.copy_from_query - await conn.copy_from_query("sql", output="filepath") # $ getSql="sql" getAPathArgument="filepath" - await conn.copy_from_query("sql", "arg1", "arg2", output="filepath") # $ getSql="sql" getAPathArgument="filepath" + await conn.copy_from_query("sql", output="filepath") # $ mad-sink__sql-injection="sql" mad-sink__file-access="filepath" getAPathArgument="filepath" + await conn.copy_from_query("sql", "arg1", "arg2", output="filepath") # $ mad-sink__sql-injection="sql" mad-sink__file-access="filepath" getAPathArgument="filepath" - await conn.copy_from_table("table", output="filepath") # $ getAPathArgument="filepath" - await conn.copy_to_table("table", source="filepath") # $ getAPathArgument="filepath" + await conn.copy_from_table("table", output="filepath") # $ mad-sink__file-access="filepath" getAPathArgument="filepath" + await conn.copy_to_table("table", source="filepath") # $ mad-sink__file-access="filepath" getAPathArgument="filepath" - await conn.execute("sql") # $ getSql="sql" - await conn.executemany("sql") # $ getSql="sql" - await conn.fetch("sql") # $ getSql="sql" - await conn.fetchrow("sql") # $ getSql="sql" - await conn.fetchval("sql") # $ getSql="sql" + await conn.execute("sql") # $ mad-sink__sql-injection="sql" + await conn.executemany("sql") # $ mad-sink__sql-injection="sql" + await conn.fetch("sql") # $ mad-sink__sql-injection="sql" + await conn.fetchrow("sql") # $ mad-sink__sql-injection="sql" + await conn.fetchval("sql") # $ mad-sink__sql-injection="sql" finally: await conn.close() @@ -27,9 +27,9 @@ async def test_prepared_statement(): conn = await asyncpg.connect() try: - pstmt = await conn.prepare("psql") # $ getSql="psql" - pstmt.executemany() - pstmt.fetch() + pstmt = await conn.prepare("psql") # $ mad-sink__sql-injection="psql" + pstmt.executemany() + pstmt.fetch() pstmt.fetchrow() pstmt.fetchval() @@ -46,7 +46,7 @@ async def test_cursor(): cursor = await conn.cursor("sql") # $ getSql="sql" constructedSql="sql" await cursor.fetch() - pstmt = await conn.prepare("psql") # $ getSql="psql" + pstmt = await conn.prepare("psql") # $ mad-sink__sql-injection="psql" pcursor = await pstmt.cursor() # $ getSql="psql" await pcursor.fetch() @@ -69,23 +69,23 @@ async def test_connection_pool(): pool = await asyncpg.create_pool() try: - await pool.copy_from_query("sql", output="filepath") # $ getSql="sql" getAPathArgument="filepath" - await pool.copy_from_query("sql", "arg1", "arg2", output="filepath") # $ getSql="sql" getAPathArgument="filepath" - await pool.copy_from_table("table", output="filepath") # $ getAPathArgument="filepath" - await pool.copy_to_table("table", source="filepath") # $ getAPathArgument="filepath" + await pool.copy_from_query("sql", output="filepath") # $ mad-sink__sql-injection="sql" mad-sink__file-access="filepath" getAPathArgument="filepath" + await pool.copy_from_query("sql", "arg1", "arg2", output="filepath") # $ mad-sink__sql-injection="sql" mad-sink__file-access="filepath" getAPathArgument="filepath" + await pool.copy_from_table("table", output="filepath") # $ mad-sink__file-access="filepath" getAPathArgument="filepath" + await pool.copy_to_table("table", source="filepath") # $ mad-sink__file-access="filepath" getAPathArgument="filepath" - await pool.execute("sql") # $ getSql="sql" - await pool.executemany("sql") # $ getSql="sql" - await pool.fetch("sql") # $ getSql="sql" - await pool.fetchrow("sql") # $ getSql="sql" - await pool.fetchval("sql") # $ getSql="sql" + await pool.execute("sql") # $ mad-sink__sql-injection="sql" + await pool.executemany("sql") # $ mad-sink__sql-injection="sql" + await pool.fetch("sql") # $ mad-sink__sql-injection="sql" + await pool.fetchrow("sql") # $ mad-sink__sql-injection="sql" + await pool.fetchval("sql") # $ mad-sink__sql-injection="sql" async with pool.acquire() as conn: - await conn.execute("sql") # $ getSql="sql" + await conn.execute("sql") # $ mad-sink__sql-injection="sql" conn = await pool.acquire() try: - await conn.fetch("sql") # $ getSql="sql" + await conn.fetch("sql") # $ mad-sink__sql-injection="sql" finally: await pool.release(conn) @@ -93,13 +93,13 @@ async def test_connection_pool(): await pool.close() async with asyncpg.create_pool() as pool: - await pool.execute("sql") # $ getSql="sql" + await pool.execute("sql") # $ mad-sink__sql-injection="sql" async with pool.acquire() as conn: - await conn.execute("sql") # $ getSql="sql" + await conn.execute("sql") # $ mad-sink__sql-injection="sql" conn = await pool.acquire() try: - await conn.fetch("sql") # $ getSql="sql" + await conn.fetch("sql") # $ mad-sink__sql-injection="sql" finally: await pool.release(conn) From dfe99b0b51b5f716cb4723f3eea54d08962d6f28 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Thu, 5 May 2022 14:14:44 +0200 Subject: [PATCH 32/51] Python: Apply suggestions from code review Co-authored-by: Erik Krogh Kristensen --- python/ql/test/experimental/meta/MaDTest.qll | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/ql/test/experimental/meta/MaDTest.qll b/python/ql/test/experimental/meta/MaDTest.qll index f3f701b6494..03358fe5414 100644 --- a/python/ql/test/experimental/meta/MaDTest.qll +++ b/python/ql/test/experimental/meta/MaDTest.qll @@ -4,7 +4,7 @@ private import semmle.python.dataflow.new.internal.PrintNode private import semmle.python.frameworks.data.ModelsAsData // need to import Frameworks to get the actual modeling imported private import semmle.python.Frameworks -// this improt needs to be public to get the query predicates propagated to the actual test files +// this import needs to be public to get the query predicates propagated to the actual test files import TestUtilities.InlineExpectationsTest class MadSinkTest extends InlineExpectationsTest { From efe306733e680771066a825b9977271b7db36781 Mon Sep 17 00:00:00 2001 From: Erik Krogh Kristensen Date: Thu, 5 May 2022 16:51:39 +0200 Subject: [PATCH 33/51] move path-injection MaD to PathInjectionCustomizations.qll --- python/ql/lib/semmle/python/Concepts.qll | 8 -------- .../ql/lib/semmle/python/frameworks/Asyncpg.qll | 4 ++-- .../dataflow/PathInjectionCustomizations.qll | 6 ++++++ .../library-tests/frameworks/asyncpg/test.py | 16 ++++++++-------- 4 files changed, 16 insertions(+), 18 deletions(-) diff --git a/python/ql/lib/semmle/python/Concepts.qll b/python/ql/lib/semmle/python/Concepts.qll index 296b36417f3..dc461861ace 100644 --- a/python/ql/lib/semmle/python/Concepts.qll +++ b/python/ql/lib/semmle/python/Concepts.qll @@ -62,14 +62,6 @@ module FileSystemAccess { /** Gets an argument to this file system access that is interpreted as a path. */ abstract DataFlow::Node getAPathArgument(); } - - private import semmle.python.frameworks.data.ModelsAsData - - private class DataAsFileAccess extends Range { - DataAsFileAccess() { this = ModelOutput::getASinkNode("file-access").getARhs() } - - override DataFlow::Node getAPathArgument() { result = this } - } } /** diff --git a/python/ql/lib/semmle/python/frameworks/Asyncpg.qll b/python/ql/lib/semmle/python/frameworks/Asyncpg.qll index 2b2b9ed2e1b..48d3e1534ae 100644 --- a/python/ql/lib/semmle/python/frameworks/Asyncpg.qll +++ b/python/ql/lib/semmle/python/frameworks/Asyncpg.qll @@ -38,8 +38,8 @@ private module Asyncpg { "asyncpg;~Connection;Member[copy_from_query,execute,fetch,fetchrow,fetchval].Argument[0,query:];sql-injection", "asyncpg;~Connection;Member[executemany].Argument[0,command:];sql-injection", // A model of `Connection` and `ConnectionPool`, which provide some methods that access the file system. - "asyncpg;~Connection;Member[copy_from_query,copy_from_table].Argument[output:];file-access", - "asyncpg;~Connection;Member[copy_to_table].Argument[source:];file-access", + "asyncpg;~Connection;Member[copy_from_query,copy_from_table].Argument[output:];path-injection", + "asyncpg;~Connection;Member[copy_to_table].Argument[source:];path-injection", // the `PreparedStatement` class in `asyncpg`. "asyncpg;Connection;Member[prepare].Argument[0,query:];sql-injection", ] diff --git a/python/ql/lib/semmle/python/security/dataflow/PathInjectionCustomizations.qll b/python/ql/lib/semmle/python/security/dataflow/PathInjectionCustomizations.qll index 410eee50b29..5a033664823 100644 --- a/python/ql/lib/semmle/python/security/dataflow/PathInjectionCustomizations.qll +++ b/python/ql/lib/semmle/python/security/dataflow/PathInjectionCustomizations.qll @@ -59,6 +59,12 @@ module PathInjection { FileSystemAccessAsSink() { this = any(FileSystemAccess e).getAPathArgument() } } + private import semmle.python.frameworks.data.ModelsAsData + + private class DataAsFileSink extends Sink { + DataAsFileSink() { this = ModelOutput::getASinkNode("path-injection").getARhs() } + } + /** * A comparison with a constant string, considered as a sanitizer-guard. */ diff --git a/python/ql/test/library-tests/frameworks/asyncpg/test.py b/python/ql/test/library-tests/frameworks/asyncpg/test.py index d739ee88770..e2e5e1c5826 100644 --- a/python/ql/test/library-tests/frameworks/asyncpg/test.py +++ b/python/ql/test/library-tests/frameworks/asyncpg/test.py @@ -7,11 +7,11 @@ async def test_connection(): try: # The file-like object is passed in as a keyword-only argument. # See https://magicstack.github.io/asyncpg/current/api/index.html#asyncpg.connection.Connection.copy_from_query - await conn.copy_from_query("sql", output="filepath") # $ mad-sink__sql-injection="sql" mad-sink__file-access="filepath" getAPathArgument="filepath" - await conn.copy_from_query("sql", "arg1", "arg2", output="filepath") # $ mad-sink__sql-injection="sql" mad-sink__file-access="filepath" getAPathArgument="filepath" + await conn.copy_from_query("sql", output="filepath") # $ mad-sink__sql-injection="sql" mad-sink__path-injection="filepath" + await conn.copy_from_query("sql", "arg1", "arg2", output="filepath") # $ mad-sink__sql-injection="sql" mad-sink__path-injection="filepath" - await conn.copy_from_table("table", output="filepath") # $ mad-sink__file-access="filepath" getAPathArgument="filepath" - await conn.copy_to_table("table", source="filepath") # $ mad-sink__file-access="filepath" getAPathArgument="filepath" + await conn.copy_from_table("table", output="filepath") # $ mad-sink__path-injection="filepath" + await conn.copy_to_table("table", source="filepath") # $ mad-sink__path-injection="filepath" await conn.execute("sql") # $ mad-sink__sql-injection="sql" await conn.executemany("sql") # $ mad-sink__sql-injection="sql" @@ -69,10 +69,10 @@ async def test_connection_pool(): pool = await asyncpg.create_pool() try: - await pool.copy_from_query("sql", output="filepath") # $ mad-sink__sql-injection="sql" mad-sink__file-access="filepath" getAPathArgument="filepath" - await pool.copy_from_query("sql", "arg1", "arg2", output="filepath") # $ mad-sink__sql-injection="sql" mad-sink__file-access="filepath" getAPathArgument="filepath" - await pool.copy_from_table("table", output="filepath") # $ mad-sink__file-access="filepath" getAPathArgument="filepath" - await pool.copy_to_table("table", source="filepath") # $ mad-sink__file-access="filepath" getAPathArgument="filepath" + await pool.copy_from_query("sql", output="filepath") # $ mad-sink__sql-injection="sql" mad-sink__path-injection="filepath" + await pool.copy_from_query("sql", "arg1", "arg2", output="filepath") # $ mad-sink__sql-injection="sql" mad-sink__path-injection="filepath" + await pool.copy_from_table("table", output="filepath") # $ mad-sink__path-injection="filepath" + await pool.copy_to_table("table", source="filepath") # $ mad-sink__path-injection="filepath" await pool.execute("sql") # $ mad-sink__sql-injection="sql" await pool.executemany("sql") # $ mad-sink__sql-injection="sql" From fc1ab06c1c2ac6d03a7058ce0bb3ab739255e570 Mon Sep 17 00:00:00 2001 From: Erik Krogh Kristensen Date: Mon, 9 May 2022 12:39:38 +0200 Subject: [PATCH 34/51] autoformat --- python/ql/test/experimental/meta/MaDTest.qll | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/python/ql/test/experimental/meta/MaDTest.qll b/python/ql/test/experimental/meta/MaDTest.qll index 03358fe5414..345fc973284 100644 --- a/python/ql/test/experimental/meta/MaDTest.qll +++ b/python/ql/test/experimental/meta/MaDTest.qll @@ -11,9 +11,7 @@ class MadSinkTest extends InlineExpectationsTest { MadSinkTest() { this = "MadSinkTest" } override string getARelevantTag() { - exists(string kind | exists(ModelOutput::getASinkNode(kind)) | - result = "mad-sink__" + kind - ) + exists(string kind | exists(ModelOutput::getASinkNode(kind)) | result = "mad-sink__" + kind) } override predicate hasActualResult(Location location, string element, string tag, string value) { @@ -32,9 +30,7 @@ class MadSourceTest extends InlineExpectationsTest { MadSourceTest() { this = "MadSourceTest" } override string getARelevantTag() { - exists(string kind | exists(ModelOutput::getASourceNode(kind)) | - result = "mad-source__" + kind - ) + exists(string kind | exists(ModelOutput::getASourceNode(kind)) | result = "mad-source__" + kind) } override predicate hasActualResult(Location location, string element, string tag, string value) { From dea55962893c52dbd9964ed07e0cfdc23745ff01 Mon Sep 17 00:00:00 2001 From: Erik Krogh Kristensen Date: Thu, 12 May 2022 14:45:29 +0200 Subject: [PATCH 35/51] update MaD test to reflect that dotted module names don't work --- python/ql/test/library-tests/frameworks/data/test.expected | 1 - python/ql/test/library-tests/frameworks/data/test.py | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/python/ql/test/library-tests/frameworks/data/test.expected b/python/ql/test/library-tests/frameworks/data/test.expected index 7fed228e022..591b5286b63 100644 --- a/python/ql/test/library-tests/frameworks/data/test.expected +++ b/python/ql/test/library-tests/frameworks/data/test.expected @@ -48,7 +48,6 @@ isSink | test.py:87:8:87:67 | ControlFlowNode for Attribute() | test-sink | | test.py:89:21:89:23 | ControlFlowNode for one | test-source | | test.py:90:25:90:27 | ControlFlowNode for one | test-source | -| test.py:95:6:95:9 | ControlFlowNode for baz1 | test-source | | test.py:98:6:98:9 | ControlFlowNode for baz2 | test-source | isSource | test.py:3:5:3:15 | ControlFlowNode for getSource() | test-source | diff --git a/python/ql/test/library-tests/frameworks/data/test.py b/python/ql/test/library-tests/frameworks/data/test.py index 67f3ac2fbad..4064d1a808f 100644 --- a/python/ql/test/library-tests/frameworks/data/test.py +++ b/python/ql/test/library-tests/frameworks/data/test.py @@ -92,7 +92,7 @@ CallFilter.arityOne(one, two=two) # NO match CallFilter.arityOne(one=one, two=two) # NO match from foo1.bar import baz1 -baz1(baz1) # match +baz1(baz1) # no match, and that's the point. from foo2.bar import baz2 baz2(baz2) # match \ No newline at end of file From fb077bec661fda388ef7f0c91ee4cd1eb35f9e3a Mon Sep 17 00:00:00 2001 From: Erik Krogh Kristensen Date: Thu, 12 May 2022 14:46:54 +0200 Subject: [PATCH 36/51] sync AccessPathSyntax changes --- .../python/frameworks/data/internal/AccessPathSyntax.qll | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/python/ql/lib/semmle/python/frameworks/data/internal/AccessPathSyntax.qll b/python/ql/lib/semmle/python/frameworks/data/internal/AccessPathSyntax.qll index 3d40e04b815..076e12f2671 100644 --- a/python/ql/lib/semmle/python/frameworks/data/internal/AccessPathSyntax.qll +++ b/python/ql/lib/semmle/python/frameworks/data/internal/AccessPathSyntax.qll @@ -167,9 +167,16 @@ class AccessPathToken extends string { /** Gets the `n`th argument to this token, such as `x` or `y` from `Member[x,y]`. */ string getArgument(int n) { result = this.getArgumentList().splitAt(",", n).trim() } + /** Gets the `n`th argument to this `name` token, such as `x` or `y` from `Member[x,y]`. */ + pragma[nomagic] + string getArgument(string name, int n) { name = this.getName() and result = this.getArgument(n) } + /** Gets an argument to this token, such as `x` or `y` from `Member[x,y]`. */ string getAnArgument() { result = this.getArgument(_) } + /** Gets an argument to this `name` token, such as `x` or `y` from `Member[x,y]`. */ + string getAnArgument(string name) { result = this.getArgument(name, _) } + /** Gets the number of arguments to this token, such as 2 for `Member[x,y]` or zero for `ReturnValue`. */ int getNumArgument() { result = count(int n | exists(this.getArgument(n))) } } From 1f8e7c39f4e874cc3d37d0dcb9f5d9aefe6c327b Mon Sep 17 00:00:00 2001 From: Erik Krogh Kristensen Date: Tue, 17 May 2022 10:32:31 +0200 Subject: [PATCH 37/51] fix typo in comment Co-authored-by: Rasmus Wriedt Larsen --- .../semmle/python/frameworks/data/internal/ApiGraphModels.qll | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/ql/lib/semmle/python/frameworks/data/internal/ApiGraphModels.qll b/python/ql/lib/semmle/python/frameworks/data/internal/ApiGraphModels.qll index 4681c2b91a5..0a81edf221d 100644 --- a/python/ql/lib/semmle/python/frameworks/data/internal/ApiGraphModels.qll +++ b/python/ql/lib/semmle/python/frameworks/data/internal/ApiGraphModels.qll @@ -299,7 +299,7 @@ private class AccessPathRange extends AccessPath::Range { bindingset[token] API::Node getSuccessorFromNode(API::Node node, AccessPathToken token) { // API graphs use the same label for arguments and parameters. An edge originating from a - // use-node represents be an argument, and an edge originating from a def-node represents a parameter. + // use-node represents an argument, and an edge originating from a def-node represents a parameter. // We just map both to the same thing. token.getName() = ["Argument", "Parameter"] and result = node.getParameter(AccessPath::parseIntUnbounded(token.getAnArgument())) From 55ffdb4aa144efc2f4cfd36f6680444fbfb6bdee Mon Sep 17 00:00:00 2001 From: Erik Krogh Kristensen Date: Tue, 17 May 2022 10:34:17 +0200 Subject: [PATCH 38/51] make most imports in ApiGraphModelsSpecific.qll private --- .../frameworks/data/internal/ApiGraphModelsSpecific.qll | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/ql/lib/semmle/python/frameworks/data/internal/ApiGraphModelsSpecific.qll b/python/ql/lib/semmle/python/frameworks/data/internal/ApiGraphModelsSpecific.qll index 206a6ff1e52..ae1c59a30d0 100644 --- a/python/ql/lib/semmle/python/frameworks/data/internal/ApiGraphModelsSpecific.qll +++ b/python/ql/lib/semmle/python/frameworks/data/internal/ApiGraphModelsSpecific.qll @@ -20,9 +20,9 @@ */ private import python as PY -import semmle.python.dataflow.new.DataFlow +private import semmle.python.dataflow.new.DataFlow private import ApiGraphModels -import semmle.python.ApiGraphs +import semmle.python.ApiGraphs::API as API class Unit = PY::Unit; From aef592fec843914e8e1a4a3e4006d90dde461ad7 Mon Sep 17 00:00:00 2001 From: Erik Krogh Kristensen Date: Tue, 17 May 2022 11:10:31 +0200 Subject: [PATCH 39/51] make a more realistic test for self-parameter --- python/ql/test/library-tests/frameworks/data/test.expected | 2 +- python/ql/test/library-tests/frameworks/data/test.py | 4 ++-- python/ql/test/library-tests/frameworks/data/test.ql | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/python/ql/test/library-tests/frameworks/data/test.expected b/python/ql/test/library-tests/frameworks/data/test.expected index 591b5286b63..6c23dc276de 100644 --- a/python/ql/test/library-tests/frameworks/data/test.expected +++ b/python/ql/test/library-tests/frameworks/data/test.expected @@ -30,7 +30,7 @@ isSink | test.py:33:22:33:24 | ControlFlowNode for one | test-source | | test.py:33:27:33:29 | ControlFlowNode for two | test-source | | test.py:33:32:33:36 | ControlFlowNode for three | test-source | -| test.py:57:7:57:12 | ControlFlowNode for ArgPos | test-source | +| test.py:57:27:57:33 | ControlFlowNode for arg_pos | test-source | | test.py:66:17:66:20 | ControlFlowNode for arg1 | test-source | | test.py:66:23:66:26 | ControlFlowNode for arg2 | test-source | | test.py:66:34:66:43 | ControlFlowNode for namedThing | test-source | diff --git a/python/ql/test/library-tests/frameworks/data/test.py b/python/ql/test/library-tests/frameworks/data/test.py index 4064d1a808f..a037ddba9a2 100644 --- a/python/ql/test/library-tests/frameworks/data/test.py +++ b/python/ql/test/library-tests/frameworks/data/test.py @@ -54,7 +54,7 @@ val = inst.foo() from testlib import ArgPos -val = ArgPos.selfThing(arg, named=2) +arg_pos = ArgPos(); val = arg_pos.self_thing(arg, named=2); class SubClass (ArgPos.MyClass): def foo(self, arg, named=2, otherName=3): @@ -95,4 +95,4 @@ from foo1.bar import baz1 baz1(baz1) # no match, and that's the point. from foo2.bar import baz2 -baz2(baz2) # match \ No newline at end of file +baz2(baz2) # match diff --git a/python/ql/test/library-tests/frameworks/data/test.ql b/python/ql/test/library-tests/frameworks/data/test.ql index 4b585309a6a..cc056058514 100644 --- a/python/ql/test/library-tests/frameworks/data/test.ql +++ b/python/ql/test/library-tests/frameworks/data/test.ql @@ -44,7 +44,7 @@ class Sinks extends ModelInput::SinkModelCsv { "testlib;;Member[CallFilter].Member[arityOne].WithArity[1].Argument[any];test-source", // "testlib;;Member[CallFilter].Member[twoOrMore].WithArity[2..].Argument[0..];test-source", // // testing non-positional arguments - "testlib;;Member[ArgPos].Member[selfThing].Argument[self];test-source", // + "testlib;;Member[ArgPos].Instance.Member[self_thing].Argument[self];test-source", // // any argument "testlib;;Member[ArgPos].Member[anyParam].Argument[any];test-source", // "testlib;;Member[ArgPos].Member[anyNamed].Argument[any-named];test-source", // From ce21d7e5a8bcdbaf3b6adbbf4d072d7e90041c98 Mon Sep 17 00:00:00 2001 From: Erik Krogh Kristensen Date: Tue, 17 May 2022 11:13:59 +0200 Subject: [PATCH 40/51] use `test-sink` for sinks in the MaD test --- .../frameworks/data/test.expected | 40 +++++++++---------- .../library-tests/frameworks/data/test.ql | 22 +++++----- 2 files changed, 31 insertions(+), 31 deletions(-) diff --git a/python/ql/test/library-tests/frameworks/data/test.expected b/python/ql/test/library-tests/frameworks/data/test.expected index 6c23dc276de..1a95bc0d359 100644 --- a/python/ql/test/library-tests/frameworks/data/test.expected +++ b/python/ql/test/library-tests/frameworks/data/test.expected @@ -18,23 +18,23 @@ isSink | test.py:10:8:10:22 | ControlFlowNode for Attribute() | test-sink | | test.py:11:8:11:30 | ControlFlowNode for Attribute() | test-sink | | test.py:12:8:12:34 | ControlFlowNode for Attribute() | test-sink | -| test.py:16:11:16:13 | ControlFlowNode for one | test-source | -| test.py:17:19:17:21 | ControlFlowNode for two | test-source | -| test.py:17:24:17:28 | ControlFlowNode for three | test-source | -| test.py:17:31:17:34 | ControlFlowNode for four | test-source | -| test.py:18:37:18:40 | ControlFlowNode for five | test-source | -| test.py:19:21:19:26 | ControlFlowNode for second | test-source | -| test.py:30:21:30:23 | ControlFlowNode for one | test-source | -| test.py:32:22:32:24 | ControlFlowNode for one | test-source | -| test.py:32:27:32:29 | ControlFlowNode for two | test-source | -| test.py:33:22:33:24 | ControlFlowNode for one | test-source | -| test.py:33:27:33:29 | ControlFlowNode for two | test-source | -| test.py:33:32:33:36 | ControlFlowNode for three | test-source | -| test.py:57:27:57:33 | ControlFlowNode for arg_pos | test-source | -| test.py:66:17:66:20 | ControlFlowNode for arg1 | test-source | -| test.py:66:23:66:26 | ControlFlowNode for arg2 | test-source | -| test.py:66:34:66:43 | ControlFlowNode for namedThing | test-source | -| test.py:67:34:67:44 | ControlFlowNode for secondNamed | test-source | +| test.py:16:11:16:13 | ControlFlowNode for one | test-sink | +| test.py:17:19:17:21 | ControlFlowNode for two | test-sink | +| test.py:17:24:17:28 | ControlFlowNode for three | test-sink | +| test.py:17:31:17:34 | ControlFlowNode for four | test-sink | +| test.py:18:37:18:40 | ControlFlowNode for five | test-sink | +| test.py:19:21:19:26 | ControlFlowNode for second | test-sink | +| test.py:30:21:30:23 | ControlFlowNode for one | test-sink | +| test.py:32:22:32:24 | ControlFlowNode for one | test-sink | +| test.py:32:27:32:29 | ControlFlowNode for two | test-sink | +| test.py:33:22:33:24 | ControlFlowNode for one | test-sink | +| test.py:33:27:33:29 | ControlFlowNode for two | test-sink | +| test.py:33:32:33:36 | ControlFlowNode for three | test-sink | +| test.py:57:27:57:33 | ControlFlowNode for arg_pos | test-sink | +| test.py:66:17:66:20 | ControlFlowNode for arg1 | test-sink | +| test.py:66:23:66:26 | ControlFlowNode for arg2 | test-sink | +| test.py:66:34:66:43 | ControlFlowNode for namedThing | test-sink | +| test.py:67:34:67:44 | ControlFlowNode for secondNamed | test-sink | | test.py:71:8:71:39 | ControlFlowNode for Attribute() | test-sink | | test.py:72:8:72:47 | ControlFlowNode for Attribute() | test-sink | | test.py:76:22:76:22 | ControlFlowNode for x | test-sink | @@ -46,9 +46,9 @@ isSink | test.py:85:8:85:53 | ControlFlowNode for Attribute() | test-sink | | test.py:86:8:86:60 | ControlFlowNode for Attribute() | test-sink | | test.py:87:8:87:67 | ControlFlowNode for Attribute() | test-sink | -| test.py:89:21:89:23 | ControlFlowNode for one | test-source | -| test.py:90:25:90:27 | ControlFlowNode for one | test-source | -| test.py:98:6:98:9 | ControlFlowNode for baz2 | test-source | +| test.py:89:21:89:23 | ControlFlowNode for one | test-sink | +| test.py:90:25:90:27 | ControlFlowNode for one | test-sink | +| test.py:98:6:98:9 | ControlFlowNode for baz2 | test-sink | isSource | test.py:3:5:3:15 | ControlFlowNode for getSource() | test-source | | test.py:9:8:9:14 | ControlFlowNode for alias() | test-source | diff --git a/python/ql/test/library-tests/frameworks/data/test.ql b/python/ql/test/library-tests/frameworks/data/test.ql index cc056058514..bff07833a2a 100644 --- a/python/ql/test/library-tests/frameworks/data/test.ql +++ b/python/ql/test/library-tests/frameworks/data/test.ql @@ -36,21 +36,21 @@ class Sinks extends ModelInput::SinkModelCsv { [ "testlib;;Member[mySink].Argument[0,sinkName:];test-sink", // testing argument syntax - "testlib;;Member[Args].Member[arg0].Argument[0];test-source", // - "testlib;;Member[Args].Member[arg1to3].Argument[1..3];test-source", // - "testlib;;Member[Args].Member[lastarg].Argument[N-1];test-source", // - "testlib;;Member[Args].Member[nonFist].Argument[1..];test-source", // + "testlib;;Member[Args].Member[arg0].Argument[0];test-sink", // + "testlib;;Member[Args].Member[arg1to3].Argument[1..3];test-sink", // + "testlib;;Member[Args].Member[lastarg].Argument[N-1];test-sink", // + "testlib;;Member[Args].Member[nonFist].Argument[1..];test-sink", // // callsite filter. - "testlib;;Member[CallFilter].Member[arityOne].WithArity[1].Argument[any];test-source", // - "testlib;;Member[CallFilter].Member[twoOrMore].WithArity[2..].Argument[0..];test-source", // + "testlib;;Member[CallFilter].Member[arityOne].WithArity[1].Argument[any];test-sink", // + "testlib;;Member[CallFilter].Member[twoOrMore].WithArity[2..].Argument[0..];test-sink", // // testing non-positional arguments - "testlib;;Member[ArgPos].Instance.Member[self_thing].Argument[self];test-source", // + "testlib;;Member[ArgPos].Instance.Member[self_thing].Argument[self];test-sink", // // any argument - "testlib;;Member[ArgPos].Member[anyParam].Argument[any];test-source", // - "testlib;;Member[ArgPos].Member[anyNamed].Argument[any-named];test-source", // + "testlib;;Member[ArgPos].Member[anyParam].Argument[any];test-sink", // + "testlib;;Member[ArgPos].Member[anyNamed].Argument[any-named];test-sink", // // testing package syntax - "foo1.bar;;Member[baz1].Argument[any];test-source", // - "foo2;;Member[bar].Member[baz2].Argument[any];test-source", // + "foo1.bar;;Member[baz1].Argument[any];test-sink", // + "foo2;;Member[bar].Member[baz2].Argument[any];test-sink", // ] } } From f273ccf73bc5d9b97972ff4b18d21a07e2e62cc8 Mon Sep 17 00:00:00 2001 From: Erik Krogh Kristensen Date: Tue, 17 May 2022 11:17:15 +0200 Subject: [PATCH 41/51] add explicit test of what Parameter[0] matches --- python/ql/test/library-tests/frameworks/data/test.expected | 1 + python/ql/test/library-tests/frameworks/data/test.py | 4 ++++ python/ql/test/library-tests/frameworks/data/test.ql | 1 + 3 files changed, 6 insertions(+) diff --git a/python/ql/test/library-tests/frameworks/data/test.expected b/python/ql/test/library-tests/frameworks/data/test.expected index 1a95bc0d359..0b962e9be63 100644 --- a/python/ql/test/library-tests/frameworks/data/test.expected +++ b/python/ql/test/library-tests/frameworks/data/test.expected @@ -84,6 +84,7 @@ isSource | test.py:85:42:85:52 | ControlFlowNode for getSource() | test-source | | test.py:86:49:86:59 | ControlFlowNode for getSource() | test-source | | test.py:87:56:87:66 | ControlFlowNode for getSource() | test-source | +| test.py:101:29:101:31 | ControlFlowNode for arg | test-source | syntaxErrors | Member[foo | | Member[foo] .Member[bar] | diff --git a/python/ql/test/library-tests/frameworks/data/test.py b/python/ql/test/library-tests/frameworks/data/test.py index a037ddba9a2..7b060946e57 100644 --- a/python/ql/test/library-tests/frameworks/data/test.py +++ b/python/ql/test/library-tests/frameworks/data/test.py @@ -96,3 +96,7 @@ baz1(baz1) # no match, and that's the point. from foo2.bar import baz2 baz2(baz2) # match + +class OtherSubClass (ArgPos.MyClass): + def otherSelfTest(self, arg, named=2, otherName=3): # test that Parameter[0] hits `arg`. + pass diff --git a/python/ql/test/library-tests/frameworks/data/test.ql b/python/ql/test/library-tests/frameworks/data/test.ql index bff07833a2a..e42fd024e98 100644 --- a/python/ql/test/library-tests/frameworks/data/test.ql +++ b/python/ql/test/library-tests/frameworks/data/test.ql @@ -76,6 +76,7 @@ class Sources extends ModelInput::SourceModelCsv { "testlib;;Member[ArgPos].Member[MyClass].Subclass.Member[foo].Parameter[self];test-source", // "testlib;;Member[ArgPos].Member[MyClass].Subclass.Member[foo].Parameter[named:];test-source", // "testlib;;Member[ArgPos].Member[MyClass].Subclass.Member[secondAndAfter].Parameter[1..];test-source", // + "testlib;;Member[ArgPos].Member[MyClass].Subclass.Member[otherSelfTest].Parameter[0];test-source", // ] } } From 2868eb61eab77b9fa038a91a75c68f375d2516d0 Mon Sep 17 00:00:00 2001 From: Erik Krogh Kristensen Date: Tue, 17 May 2022 12:08:53 +0200 Subject: [PATCH 42/51] add test for Parameter[any] and Parameter[any-named] --- python/ql/test/library-tests/frameworks/data/test.expected | 6 ++++++ python/ql/test/library-tests/frameworks/data/test.py | 6 ++++++ python/ql/test/library-tests/frameworks/data/test.ql | 2 ++ 3 files changed, 14 insertions(+) diff --git a/python/ql/test/library-tests/frameworks/data/test.expected b/python/ql/test/library-tests/frameworks/data/test.expected index 0b962e9be63..16ec37811a0 100644 --- a/python/ql/test/library-tests/frameworks/data/test.expected +++ b/python/ql/test/library-tests/frameworks/data/test.expected @@ -85,6 +85,12 @@ isSource | test.py:86:49:86:59 | ControlFlowNode for getSource() | test-source | | test.py:87:56:87:66 | ControlFlowNode for getSource() | test-source | | test.py:101:29:101:31 | ControlFlowNode for arg | test-source | +| test.py:104:18:104:21 | ControlFlowNode for self | test-source | +| test.py:104:24:104:29 | ControlFlowNode for param1 | test-source | +| test.py:104:32:104:37 | ControlFlowNode for param2 | test-source | +| test.py:107:18:107:21 | ControlFlowNode for self | test-source | +| test.py:107:24:107:28 | ControlFlowNode for name1 | test-source | +| test.py:107:31:107:35 | ControlFlowNode for name2 | test-source | syntaxErrors | Member[foo | | Member[foo] .Member[bar] | diff --git a/python/ql/test/library-tests/frameworks/data/test.py b/python/ql/test/library-tests/frameworks/data/test.py index 7b060946e57..abb4af545fa 100644 --- a/python/ql/test/library-tests/frameworks/data/test.py +++ b/python/ql/test/library-tests/frameworks/data/test.py @@ -100,3 +100,9 @@ baz2(baz2) # match class OtherSubClass (ArgPos.MyClass): def otherSelfTest(self, arg, named=2, otherName=3): # test that Parameter[0] hits `arg`. pass + + def anyParam(self, param1, param2): # Parameter[any] matches all 3. + pass + + def anyNamed(self, name1, name2=2): # Parameter[any-named] matches all 3. + pass diff --git a/python/ql/test/library-tests/frameworks/data/test.ql b/python/ql/test/library-tests/frameworks/data/test.ql index e42fd024e98..86f960b1adf 100644 --- a/python/ql/test/library-tests/frameworks/data/test.ql +++ b/python/ql/test/library-tests/frameworks/data/test.ql @@ -77,6 +77,8 @@ class Sources extends ModelInput::SourceModelCsv { "testlib;;Member[ArgPos].Member[MyClass].Subclass.Member[foo].Parameter[named:];test-source", // "testlib;;Member[ArgPos].Member[MyClass].Subclass.Member[secondAndAfter].Parameter[1..];test-source", // "testlib;;Member[ArgPos].Member[MyClass].Subclass.Member[otherSelfTest].Parameter[0];test-source", // + "testlib;;Member[ArgPos].Member[MyClass].Subclass.Member[anyParam].Parameter[any];test-source", // + "testlib;;Member[ArgPos].Member[MyClass].Subclass.Member[anyNamed].Parameter[any-named];test-source", // ] } } From 818975dc569cc96b07930dfc86880fc5acdfcd58 Mon Sep 17 00:00:00 2001 From: Erik Krogh Kristensen Date: Tue, 17 May 2022 12:25:52 +0200 Subject: [PATCH 43/51] sync upstream typo fixes --- .../semmle/python/frameworks/data/internal/ApiGraphModels.qll | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/ql/lib/semmle/python/frameworks/data/internal/ApiGraphModels.qll b/python/ql/lib/semmle/python/frameworks/data/internal/ApiGraphModels.qll index 0a81edf221d..69563a3eab4 100644 --- a/python/ql/lib/semmle/python/frameworks/data/internal/ApiGraphModels.qll +++ b/python/ql/lib/semmle/python/frameworks/data/internal/ApiGraphModels.qll @@ -59,7 +59,7 @@ * A `(package,type)` pair may refer to a static type or a synthetic type name used internally in the model. * Synthetic type names can be used to reuse intermediate sub-paths, when there are multiple ways to access the same * element. - * See `ModelsAsData.qll` for the langauge-specific interpretation of packages and static type names. + * See `ModelsAsData.qll` for the language-specific interpretation of packages and static type names. * * By convention, if one wants to avoid clashes with static types from the package, the type name * should be prefixed with a tilde character (`~`). For example, `(foo, ~Bar)` can be used to indicate that @@ -396,7 +396,7 @@ predicate isValidTokenNameInIdentifyingAccessPath(string name) { } /** - * Holds if `name` is a valid name for an access path token with no arguments, occuring + * Holds if `name` is a valid name for an access path token with no arguments, occurring * in an identifying access path. */ bindingset[name] From bb289e29b90b6afdee91e0b1d045ee555cb4522a Mon Sep 17 00:00:00 2001 From: Erik Krogh Kristensen Date: Tue, 17 May 2022 12:26:31 +0200 Subject: [PATCH 44/51] sync typo fix to JS/RB --- .../javascript/frameworks/data/internal/ApiGraphModels.qll | 2 +- .../lib/codeql/ruby/frameworks/data/internal/ApiGraphModels.qll | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/javascript/ql/lib/semmle/javascript/frameworks/data/internal/ApiGraphModels.qll b/javascript/ql/lib/semmle/javascript/frameworks/data/internal/ApiGraphModels.qll index 127d9ca5122..69563a3eab4 100644 --- a/javascript/ql/lib/semmle/javascript/frameworks/data/internal/ApiGraphModels.qll +++ b/javascript/ql/lib/semmle/javascript/frameworks/data/internal/ApiGraphModels.qll @@ -299,7 +299,7 @@ private class AccessPathRange extends AccessPath::Range { bindingset[token] API::Node getSuccessorFromNode(API::Node node, AccessPathToken token) { // API graphs use the same label for arguments and parameters. An edge originating from a - // use-node represents be an argument, and an edge originating from a def-node represents a parameter. + // use-node represents an argument, and an edge originating from a def-node represents a parameter. // We just map both to the same thing. token.getName() = ["Argument", "Parameter"] and result = node.getParameter(AccessPath::parseIntUnbounded(token.getAnArgument())) diff --git a/ruby/ql/lib/codeql/ruby/frameworks/data/internal/ApiGraphModels.qll b/ruby/ql/lib/codeql/ruby/frameworks/data/internal/ApiGraphModels.qll index 127d9ca5122..69563a3eab4 100644 --- a/ruby/ql/lib/codeql/ruby/frameworks/data/internal/ApiGraphModels.qll +++ b/ruby/ql/lib/codeql/ruby/frameworks/data/internal/ApiGraphModels.qll @@ -299,7 +299,7 @@ private class AccessPathRange extends AccessPath::Range { bindingset[token] API::Node getSuccessorFromNode(API::Node node, AccessPathToken token) { // API graphs use the same label for arguments and parameters. An edge originating from a - // use-node represents be an argument, and an edge originating from a def-node represents a parameter. + // use-node represents an argument, and an edge originating from a def-node represents a parameter. // We just map both to the same thing. token.getName() = ["Argument", "Parameter"] and result = node.getParameter(AccessPath::parseIntUnbounded(token.getAnArgument())) From 03da62713cd0560e1581503e4cb282d1fa0b5536 Mon Sep 17 00:00:00 2001 From: Erik Krogh Kristensen Date: Tue, 17 May 2022 12:32:40 +0200 Subject: [PATCH 45/51] fix typo identified by QL-for-QL --- .../python/frameworks/data/internal/ApiGraphModelsSpecific.qll | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/ql/lib/semmle/python/frameworks/data/internal/ApiGraphModelsSpecific.qll b/python/ql/lib/semmle/python/frameworks/data/internal/ApiGraphModelsSpecific.qll index ae1c59a30d0..20d99eb0335 100644 --- a/python/ql/lib/semmle/python/frameworks/data/internal/ApiGraphModelsSpecific.qll +++ b/python/ql/lib/semmle/python/frameworks/data/internal/ApiGraphModelsSpecific.qll @@ -177,7 +177,7 @@ predicate isExtraValidTokenNameInIdentifyingAccessPath(string name) { } /** - * Holds if `name` is a valid name for an access path token with no arguments, occuring + * Holds if `name` is a valid name for an access path token with no arguments, occurring * in an identifying access path. */ predicate isExtraValidNoArgumentTokenInIdentifyingAccessPath(string name) { From d5f0446940b7430aca96aa9fba5d6adb217dc50f Mon Sep 17 00:00:00 2001 From: Erik Krogh Kristensen Date: Tue, 17 May 2022 22:34:38 +0200 Subject: [PATCH 46/51] exclude self parameter from the API-graph edge for keywordParameter --- python/ql/lib/semmle/python/ApiGraphs.qll | 6 ++++-- python/ql/test/library-tests/frameworks/data/test.expected | 2 -- python/ql/test/library-tests/frameworks/data/test.py | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/python/ql/lib/semmle/python/ApiGraphs.qll b/python/ql/lib/semmle/python/ApiGraphs.qll index eff5b6ab64e..33aabe35383 100644 --- a/python/ql/lib/semmle/python/ApiGraphs.qll +++ b/python/ql/lib/semmle/python/ApiGraphs.qll @@ -610,9 +610,11 @@ module API { ref.asExpr() = fn.getInnerScope().getArg(i) ) or - exists(string name | + exists(string name, PY::Parameter param | lbl = Label::keywordParameter(name) and - ref.asExpr() = fn.getInnerScope().getArgByName(name) + param = fn.getInnerScope().getArgByName(name) and + not param.isSelf() and + ref.asExpr() = param ) or lbl = Label::selfParameter() and diff --git a/python/ql/test/library-tests/frameworks/data/test.expected b/python/ql/test/library-tests/frameworks/data/test.expected index 16ec37811a0..08300f4d905 100644 --- a/python/ql/test/library-tests/frameworks/data/test.expected +++ b/python/ql/test/library-tests/frameworks/data/test.expected @@ -85,10 +85,8 @@ isSource | test.py:86:49:86:59 | ControlFlowNode for getSource() | test-source | | test.py:87:56:87:66 | ControlFlowNode for getSource() | test-source | | test.py:101:29:101:31 | ControlFlowNode for arg | test-source | -| test.py:104:18:104:21 | ControlFlowNode for self | test-source | | test.py:104:24:104:29 | ControlFlowNode for param1 | test-source | | test.py:104:32:104:37 | ControlFlowNode for param2 | test-source | -| test.py:107:18:107:21 | ControlFlowNode for self | test-source | | test.py:107:24:107:28 | ControlFlowNode for name1 | test-source | | test.py:107:31:107:35 | ControlFlowNode for name2 | test-source | syntaxErrors diff --git a/python/ql/test/library-tests/frameworks/data/test.py b/python/ql/test/library-tests/frameworks/data/test.py index abb4af545fa..f01b613ce40 100644 --- a/python/ql/test/library-tests/frameworks/data/test.py +++ b/python/ql/test/library-tests/frameworks/data/test.py @@ -101,8 +101,8 @@ class OtherSubClass (ArgPos.MyClass): def otherSelfTest(self, arg, named=2, otherName=3): # test that Parameter[0] hits `arg`. pass - def anyParam(self, param1, param2): # Parameter[any] matches all 3. + def anyParam(self, param1, param2): # Parameter[any] matches all non-self parameters pass - def anyNamed(self, name1, name2=2): # Parameter[any-named] matches all 3. + def anyNamed(self, name1, name2=2): # Parameter[any-named] matches all non-self named parameters pass From a5b11e88b42d3b46ac84709a3bc31423176ee428 Mon Sep 17 00:00:00 2001 From: Erik Krogh Kristensen Date: Thu, 19 May 2022 20:00:43 +0200 Subject: [PATCH 47/51] update doc to make it clear that moduleImport(..) does not refer to PyPI names Co-authored-by: Rasmus Wriedt Larsen --- python/ql/lib/semmle/python/frameworks/data/ModelsAsData.qll | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/python/ql/lib/semmle/python/frameworks/data/ModelsAsData.qll b/python/ql/lib/semmle/python/frameworks/data/ModelsAsData.qll index 1ced4d15ee9..2af91a69432 100644 --- a/python/ql/lib/semmle/python/frameworks/data/ModelsAsData.qll +++ b/python/ql/lib/semmle/python/frameworks/data/ModelsAsData.qll @@ -5,7 +5,8 @@ * - Use the `ModelInput` module to contribute new models. * - Use the `ModelOutput` module to access the model results in terms of API nodes. * - * The package name refers to a Pip package name. + * The package name refers to the top-level module the import comes from, and not a PyPI package. + * So for `from foo.bar import baz`, the package will be `foo`. */ private import python From 204e01fc2438854ff44691eb461108ad364d25e8 Mon Sep 17 00:00:00 2001 From: Erik Krogh Kristensen Date: Thu, 19 May 2022 21:03:59 +0200 Subject: [PATCH 48/51] change getNumArgument to only count positional arguments --- python/ql/lib/semmle/python/ApiGraphs.qll | 2 +- python/ql/test/library-tests/frameworks/data/test.expected | 3 ++- python/ql/test/library-tests/frameworks/data/test.py | 4 ++-- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/python/ql/lib/semmle/python/ApiGraphs.qll b/python/ql/lib/semmle/python/ApiGraphs.qll index 33aabe35383..43bba85ba79 100644 --- a/python/ql/lib/semmle/python/ApiGraphs.qll +++ b/python/ql/lib/semmle/python/ApiGraphs.qll @@ -356,7 +356,7 @@ module API { } /** Gets the number of arguments of this call. Both positional and named arguments are counted. */ - int getNumArgument() { result = count([this.getArg(_), this.getArgByName(_)]) } + int getNumArgument() { result = count(this.getArg(_)) } } /** diff --git a/python/ql/test/library-tests/frameworks/data/test.expected b/python/ql/test/library-tests/frameworks/data/test.expected index 08300f4d905..fcf94bb784e 100644 --- a/python/ql/test/library-tests/frameworks/data/test.expected +++ b/python/ql/test/library-tests/frameworks/data/test.expected @@ -47,7 +47,8 @@ isSink | test.py:86:8:86:60 | ControlFlowNode for Attribute() | test-sink | | test.py:87:8:87:67 | ControlFlowNode for Attribute() | test-sink | | test.py:89:21:89:23 | ControlFlowNode for one | test-sink | -| test.py:90:25:90:27 | ControlFlowNode for one | test-sink | +| test.py:91:21:91:23 | ControlFlowNode for one | test-sink | +| test.py:91:30:91:32 | ControlFlowNode for two | test-sink | | test.py:98:6:98:9 | ControlFlowNode for baz2 | test-sink | isSource | test.py:3:5:3:15 | ControlFlowNode for getSource() | test-source | diff --git a/python/ql/test/library-tests/frameworks/data/test.py b/python/ql/test/library-tests/frameworks/data/test.py index f01b613ce40..6ff4271dca2 100644 --- a/python/ql/test/library-tests/frameworks/data/test.py +++ b/python/ql/test/library-tests/frameworks/data/test.py @@ -87,8 +87,8 @@ mySink(Steps.preserveAllButFirstArgument("foo", getSource())) # FLOW mySink(Steps.preserveAllButFirstArgument("foo", "bar", getSource())) # FLOW CallFilter.arityOne(one) # match -CallFilter.arityOne(one=one) # match -CallFilter.arityOne(one, two=two) # NO match +CallFilter.arityOne(one=one) # NO match +CallFilter.arityOne(one, two=two) # match - on both the named and positional arguments CallFilter.arityOne(one=one, two=two) # NO match from foo1.bar import baz1 From b6a4f4373730722f6f23cbd77cfb9478439b05bb Mon Sep 17 00:00:00 2001 From: Erik Krogh Kristensen Date: Mon, 23 May 2022 18:51:33 +0200 Subject: [PATCH 49/51] expand qldoc for `getNumArgument` Co-authored-by: Rasmus Wriedt Larsen --- python/ql/lib/semmle/python/ApiGraphs.qll | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/python/ql/lib/semmle/python/ApiGraphs.qll b/python/ql/lib/semmle/python/ApiGraphs.qll index 43bba85ba79..11ad08f1341 100644 --- a/python/ql/lib/semmle/python/ApiGraphs.qll +++ b/python/ql/lib/semmle/python/ApiGraphs.qll @@ -355,7 +355,12 @@ module API { result.getAnImmediateUse() = this } - /** Gets the number of arguments of this call. Both positional and named arguments are counted. */ + /** + * Gets the number of positional arguments of this call. + * + * Note: This is used for `WithArity[]` in modeling-as-data, where we thought + * including keyword arguments didn't make much sense. + */ int getNumArgument() { result = count(this.getArg(_)) } } From f8281b43b16fd47909e1c6b66cda463b4b3ab28d Mon Sep 17 00:00:00 2001 From: Erik Krogh Kristensen Date: Mon, 23 May 2022 19:58:48 +0200 Subject: [PATCH 50/51] autoformat --- python/ql/lib/semmle/python/ApiGraphs.qll | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/ql/lib/semmle/python/ApiGraphs.qll b/python/ql/lib/semmle/python/ApiGraphs.qll index 11ad08f1341..fcb89e5f866 100644 --- a/python/ql/lib/semmle/python/ApiGraphs.qll +++ b/python/ql/lib/semmle/python/ApiGraphs.qll @@ -358,7 +358,7 @@ module API { /** * Gets the number of positional arguments of this call. * - * Note: This is used for `WithArity[]` in modeling-as-data, where we thought + * Note: This is used for `WithArity[]` in modeling-as-data, where we thought * including keyword arguments didn't make much sense. */ int getNumArgument() { result = count(this.getArg(_)) } From e557d8839bae0dbc2dbb5d611da747f4afbeaf69 Mon Sep 17 00:00:00 2001 From: Erik Krogh Kristensen Date: Mon, 30 May 2022 12:21:42 +0200 Subject: [PATCH 51/51] have the Instance token just be an alias for ReturnValue --- .../python/frameworks/data/internal/ApiGraphModelsSpecific.qll | 2 +- python/ql/test/library-tests/frameworks/data/test.expected | 1 - python/ql/test/library-tests/frameworks/data/test.py | 2 +- 3 files changed, 2 insertions(+), 3 deletions(-) diff --git a/python/ql/lib/semmle/python/frameworks/data/internal/ApiGraphModelsSpecific.qll b/python/ql/lib/semmle/python/frameworks/data/internal/ApiGraphModelsSpecific.qll index 20d99eb0335..92f7dcbd50b 100644 --- a/python/ql/lib/semmle/python/frameworks/data/internal/ApiGraphModelsSpecific.qll +++ b/python/ql/lib/semmle/python/frameworks/data/internal/ApiGraphModelsSpecific.qll @@ -53,7 +53,7 @@ API::Node getExtraSuccessorFromNode(API::Node node, AccessPathToken token) { result = node.getMember(token.getAnArgument()) or token.getName() = "Instance" and - result = node.getASubclass*().getReturn() // In Python `Instance` is just an alias for `Subclass.ReturnValue` + result = node.getReturn() // In Python `Instance` is just an alias for `ReturnValue` or token.getName() = "Awaited" and result = node.getAwaited() diff --git a/python/ql/test/library-tests/frameworks/data/test.expected b/python/ql/test/library-tests/frameworks/data/test.expected index fcf94bb784e..68de6ecd878 100644 --- a/python/ql/test/library-tests/frameworks/data/test.expected +++ b/python/ql/test/library-tests/frameworks/data/test.expected @@ -68,7 +68,6 @@ isSource | test.py:39:11:39:20 | ControlFlowNode for Await | test-source | | test.py:41:8:41:27 | ControlFlowNode for Attribute() | test-source | | test.py:46:7:46:16 | ControlFlowNode for SubClass() | test-source | -| test.py:51:8:51:18 | ControlFlowNode for Sub2Class() | test-source | | test.py:53:7:53:16 | ControlFlowNode for Attribute() | test-source | | test.py:60:13:60:16 | ControlFlowNode for self | test-source | | test.py:60:24:60:28 | ControlFlowNode for named | test-source | diff --git a/python/ql/test/library-tests/frameworks/data/test.py b/python/ql/test/library-tests/frameworks/data/test.py index 6ff4271dca2..ea1a6e0d4d4 100644 --- a/python/ql/test/library-tests/frameworks/data/test.py +++ b/python/ql/test/library-tests/frameworks/data/test.py @@ -48,7 +48,7 @@ sub = SubClass() class Sub2Class (CommonTokens.Class): pass -sub2 = Sub2Class() +sub2 = Sub2Class() # TODO: Currently not recognized as an instance of CommonTokens.Class val = inst.foo()