Merge branch 'main' into jorgectf/python/deserialization

This commit is contained in:
Rasmus Wriedt Larsen
2021-09-28 16:49:33 +02:00
7130 changed files with 597514 additions and 224228 deletions

View File

@@ -106,7 +106,7 @@
"prefix": "type tracking",
"body": [
"/** Gets a reference to ${3:a thing}. */",
"private DataFlow::LocalSourceNode ${1:myType}(DataFlow::TypeTracker t) {",
"private DataFlow::TypeTrackingNode ${1:myType}(DataFlow::TypeTracker t) {",
" t.start() and",
" result = ${2:value}",
" or",
@@ -152,4 +152,102 @@
]
},
"Type tracking class": {
"scope": "ql",
"prefix": "type tracking class",
"body": [
"/**",
" * Provides models for the `${TM_SELECTED_TEXT}` class",
" *",
" * See ${1:https://apiref (TODO)}.",
" */",
"module ${TM_SELECTED_TEXT/^(.*)\\.([^.]+)$/$2/} {",
" /** Gets a reference to the `${TM_SELECTED_TEXT}` class. */",
" private API::Node classRef() {",
" result = API::moduleImport(\"${TM_SELECTED_TEXT/\\.([^.]+)/\").getMember(\"$1/g}\")",
" }",
"",
" /**",
" * A source of instances of `${TM_SELECTED_TEXT}`, extend this class to model new instances.",
" *",
" * This can include instantiations of the class, return values from function",
" * calls, or a special parameter that will be set when functions are called by an external",
" * library.",
" *",
" * Use the predicate `${TM_SELECTED_TEXT/^(.*)\\.([^.]+)$/$2/}::instance()` to get references to instances of `${TM_SELECTED_TEXT}`.",
" */",
" abstract class InstanceSource extends DataFlow::LocalSourceNode { }",
"",
" /** A direct instantiation of `${TM_SELECTED_TEXT}`. */",
" private class ClassInstantiation extends InstanceSource, DataFlow::CallCfgNode {",
" ClassInstantiation() { this = classRef().getACall() }",
" }",
"",
" /** Gets a reference to an instance of `${TM_SELECTED_TEXT}`. */",
" private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t) {",
" t.start() and",
" result instanceof InstanceSource",
" or",
" exists(DataFlow::TypeTracker t2 | result = instance(t2).track(t2, t))",
" }",
"",
" /** Gets a reference to an instance of `${TM_SELECTED_TEXT}`. */",
" DataFlow::Node instance() { instance(DataFlow::TypeTracker::end()).flowsTo(result) }",
"",
" /**",
" * Taint propagation for `${TM_SELECTED_TEXT}`.",
" */",
" private class InstanceTaintSteps extends InstanceTaintStepsHelper {",
" InstanceTaintSteps() { this = \"${TM_SELECTED_TEXT}\" }",
" ",
" override DataFlow::Node getInstance() { result = instance() }",
" ",
" override string getAttributeName() { none() }",
" ",
" override string getMethodName() { none() }",
" ",
" override string getAsyncMethodName() { none() }",
" }",
"",
" /**",
" * Extra taint propagation for `${TM_SELECTED_TEXT}`, not covered by `InstanceTaintSteps`.",
" */",
" private class AdditionalTaintStep extends TaintTracking::AdditionalTaintStep {",
" override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {",
" // TODO",
" none()",
" }",
" }",
"}",
],
"description": "Type tracking class (select full class path before inserting)",
},
"foo": {
"scope": "ql",
"prefix": "foo",
"body": [
" /**",
" * Taint propagation for `$1`.",
" */",
" private class InstanceTaintSteps extends InstanceTaintStepsHelper {",
" InstanceTaintSteps() { this = \"$1\" }",
"",
" override DataFlow::Node getInstance() { result = instance() }",
"",
" override string getAttributeName() { none() }",
"",
" override string getMethodName() { none() }",
"",
" override string getAsyncMethodName() { none() }",
" }",
],
},
"API graph .getMember chain": {
"scope": "ql",
"prefix": "api graph .getMember chain",
"body": [
"API::moduleImport(\"${TM_SELECTED_TEXT/\\.([^.]+)/\").getMember(\"$1/g}\")"
],
"description": "API graph .getMember chain (select full path before inserting)",
},
}

View File

@@ -0,0 +1,2 @@
lgtm,codescanning
* Updated the _Use of a broken or weak cryptographic algorithm_ (`py/weak-cryptographic-algorithm`) query, so it alerts on any use of a weak cryptographic non-hashing algorithm. Introduced a new query _Use of a broken or weak cryptographic hashing algorithm on sensitive data_ (`py/weak-sensitive-data-hashing`) to handle weak cryptographic hashing algorithms, which only alerts when used on sensitive data.

View File

@@ -0,0 +1,2 @@
lgtm,codescanning
* Added modeling of the PyPI package `idna`, for encoding/decoding Internationalised Domain Names in Applications.

View File

@@ -0,0 +1,2 @@
lgtm,codescanning
* Added modeling of the PyPI package `simplejson`.

View File

@@ -0,0 +1,2 @@
lgtm,codescanning
* Added modeling of the PyPI package `ujson`.

View File

@@ -0,0 +1,2 @@
lgtm,codescanning
* API graph nodes now contain a `getAwaited()` member predicate, for getting the result of awaiting an item, such as `await foo`.

View File

@@ -0,0 +1,2 @@
lgtm,codescanning
* Added model of SQL execution in `clickhouse-driver` and `aioch` PyPI packages, resulting in additional sinks for the SQL Injection query (`py/sql-injection`). This modeling was originally [submitted as a contribution by @japroc](https://github.com/github/codeql/pull/5889).

View File

@@ -0,0 +1,2 @@
lgtm,codescanning
* Added modeling of sources/sinks when using the `aiohttp.web` web framework.

View File

@@ -0,0 +1,2 @@
lgtm,codescanning
* Expanded modeling of sensitive data sources to include: subscripting with a key that indicates sensitive data (`obj["password"]`), parameters whose names indicate sensitive data (`def func(password):`), and assignments to variables whose names indicate sensitive data (`password = ...`).

View File

@@ -0,0 +1,2 @@
lgtm,codescanning
* Added modeling of sources/sinks when using `twisted` to create web servers.

View File

@@ -0,0 +1,2 @@
lgtm,codescanning
* Added modeling of the PyPI package `jmespath`.

View File

@@ -0,0 +1,2 @@
lgtm,codescanning
* Added modeling of the PyPI package `rsa`.

View File

@@ -0,0 +1,5 @@
lgtm,codescanning
* A new class `DataFlow::MethodCallNode` extends `DataFlow::CallCfgNode` with convenient methods for
accessing the receiver and method name of a method call.
* The `LocalSourceNode` class now has a `getAMethodCall` method, with which one can easily access
method calls with the given node as a receiver.

View File

@@ -0,0 +1,2 @@
lgtm,codescanning
* Added modeling of the PyPI package `MarkupSafe`.

View File

@@ -0,0 +1,2 @@
lgtm,codescanning
* Added `HTTP::Server::CookieWrite` concept for statements that sets a cookie in an HTTP response, along with modeling of this in supported web frameworks (aiohttp/flask/django/tornado/twisted).

View File

@@ -0,0 +1,2 @@
lgtm,codescanning
* The DataFlow libraries have been augmented with support for `Configuration`-specific in-place read steps at, for example, sinks and custom taint steps. This means that it is now possible to specify sinks that accept flow with non-empty access paths.

View File

@@ -0,0 +1,2 @@
lgtm,codescanning
* Added modeling of raw SQL execution from the PyPI package `peewee`.

View File

@@ -0,0 +1,2 @@
lgtm,codescanning
* The `track` and `backtrack` methods on `LocalSourceNode` are in the process of being deprecated. When using type trackers, the corresponding methods on `TypeTrackingNode` should be used instead.

View File

@@ -0,0 +1,2 @@
lgtm,codescanning
* Changed the way to provide extra sources/sinks for `@kind path-problem` queries, to avoid a potential performance problem due to re-evaluation of data-flow configurations. Please use the new `<query>Customization.qll` files and extend their classes instead (such as extending the `Sink` class from `python/ql/src/semmle/python/security/dataflow/SqlInjectionCustomizations.qll`). This is relevant for the queries: `py/sql-injection`, `py/code-injection`, `py/command-line-injection`, `py/reflective-xss`, `py/url-redirection`, `py/unsafe-deserialization`, `py/stack-trace-exposure`, `py/path-injection`.

View File

@@ -0,0 +1,4 @@
lgtm,codescanning
* The `importNode` predicate from the data-flow library has been deprecated. In its place, we
recommend using the API graphs library, accessible via `import semmle.python.ApiGraphs`.

View File

@@ -0,0 +1,3 @@
lgtm,codescanning
* Added an experimental _Inefficient regular expression_ (`py/redos`) query, which is already available in JavaScript.
* Added an experimental _Polynomial regular expression used on uncontrolled data_ (`py/polynomial-redos`), which is already available in JavaScript.

View File

@@ -0,0 +1,2 @@
lgtm,codescanning
* Updated _Modification of parameter with default_ (`py/modification-of-default-value`) query to use the new data flow library instead of the old taint tracking library and to remove the use of points-to analysis. You may see differences in the results found by the query, but overall this change should result in a more robust and accurate analysis.

View File

@@ -0,0 +1,2 @@
lgtm,codescanning
* Added modeling of SQL execution in the `Flask-SQLAlchemy` PyPI package, resulting in additional sinks for the SQL Injection query (`py/sql-injection`).

View File

@@ -0,0 +1,2 @@
lgtm,codescanning
* Added modeling of SQL execution in the `SQLAlchemy` PyPI package, resulting in additional sinks for the SQL Injection query (`py/sql-injection`). This modeling was originally [submitted as a contribution by @mrthankyou](https://github.com/github/codeql/pull/5680).

View File

@@ -0,0 +1,2 @@
lgtm,codescanning
* Expanded the query _SQL query built from user-controlled sources_ (`py/sql-injection`) to alert if user-input is added to a TextClause from SQLAlchemy, since that can lead to SQL injection.

View File

@@ -0,0 +1,2 @@
lgtm,codescanning
* Function parameters with default values will now see flow from those values.

View File

@@ -0,0 +1,4 @@
---
dependencies: {}
compiled: false
lockVersion: 1.0.0

View File

@@ -1,3 +1,4 @@
name: codeql-python-examples
version: 0.0.0
libraryPathDependencies: codeql-python
name: codeql/python-examples
version: 0.0.2
dependencies:
codeql/python-all: "*"

View File

@@ -0,0 +1,4 @@
---
dependencies: {}
compiled: false
lockVersion: 1.0.0

7
python/ql/lib/qlpack.yml Normal file
View File

@@ -0,0 +1,7 @@
name: codeql/python-all
version: 0.0.2
dbscheme: semmlecode.python.dbscheme
extractor: python
library: true
dependencies:
codeql/python-upgrades: 0.0.2

View File

@@ -0,0 +1,3 @@
/** DEPRECATED: Use `semmle.python.concepts.CryptoAlgorithms` instead. */
import semmle.python.concepts.CryptoAlgorithms

View File

@@ -97,6 +97,11 @@ module API {
*/
Node getASubclass() { result = getASuccessor(Label::subclass()) }
/**
* Gets a node representing the result from awaiting this node.
*/
Node getAwaited() { result = getASuccessor(Label::await()) }
/**
* Gets a string representation of the lexicographically least among all shortest access paths
* from the root to this node.
@@ -349,22 +354,135 @@ module API {
)
}
private import semmle.python.types.Builtins as Builtins
/** Gets the name of a known built-in. */
private string getBuiltInName() {
// These lists were created by inspecting the `builtins` and `__builtin__` modules in
// Python 3 and 2 respectively, using the `dir` built-in.
// Built-in functions and exceptions shared between Python 2 and 3
result in [
"abs", "all", "any", "bin", "bool", "bytearray", "callable", "chr", "classmethod",
"compile", "complex", "delattr", "dict", "dir", "divmod", "enumerate", "eval", "filter",
"float", "format", "frozenset", "getattr", "globals", "hasattr", "hash", "help", "hex",
"id", "input", "int", "isinstance", "issubclass", "iter", "len", "list", "locals", "map",
"max", "memoryview", "min", "next", "object", "oct", "open", "ord", "pow", "print",
"property", "range", "repr", "reversed", "round", "set", "setattr", "slice", "sorted",
"staticmethod", "str", "sum", "super", "tuple", "type", "vars", "zip", "__import__",
// Exceptions
"ArithmeticError", "AssertionError", "AttributeError", "BaseException", "BufferError",
"BytesWarning", "DeprecationWarning", "EOFError", "EnvironmentError", "Exception",
"FloatingPointError", "FutureWarning", "GeneratorExit", "IOError", "ImportError",
"ImportWarning", "IndentationError", "IndexError", "KeyError", "KeyboardInterrupt",
"LookupError", "MemoryError", "NameError", "NotImplemented", "NotImplementedError",
"OSError", "OverflowError", "PendingDeprecationWarning", "ReferenceError", "RuntimeError",
"RuntimeWarning", "StandardError", "StopIteration", "SyntaxError", "SyntaxWarning",
"SystemError", "SystemExit", "TabError", "TypeError", "UnboundLocalError",
"UnicodeDecodeError", "UnicodeEncodeError", "UnicodeError", "UnicodeTranslateError",
"UnicodeWarning", "UserWarning", "ValueError", "Warning", "ZeroDivisionError",
// Added for compatibility
"exec"
]
or
// Built-in constants shared between Python 2 and 3
result in ["False", "True", "None", "NotImplemented", "Ellipsis", "__debug__"]
or
// Python 3 only
result in [
"ascii", "breakpoint", "bytes", "exec",
// Exceptions
"BlockingIOError", "BrokenPipeError", "ChildProcessError", "ConnectionAbortedError",
"ConnectionError", "ConnectionRefusedError", "ConnectionResetError", "FileExistsError",
"FileNotFoundError", "InterruptedError", "IsADirectoryError", "ModuleNotFoundError",
"NotADirectoryError", "PermissionError", "ProcessLookupError", "RecursionError",
"ResourceWarning", "StopAsyncIteration", "TimeoutError"
]
or
// Python 2 only
result in [
"basestring", "cmp", "execfile", "file", "long", "raw_input", "reduce", "reload",
"unichr", "unicode", "xrange"
]
}
/**
* Gets a data flow node that is likely to refer to a built-in with the name `name`.
*
* Currently this is an over-approximation, and does not account for things like overwriting a
* Currently this is an over-approximation, and may not account for things like overwriting a
* built-in with a different value.
*/
private DataFlow::Node likely_builtin(string name) {
result.asCfgNode() =
any(NameNode n |
n.isGlobal() and
n.isLoad() and
name = n.getId() and
name in [any(Builtins::Builtin b).getName(), "None", "True", "False"]
)
exists(Module m |
result.asCfgNode() =
any(NameNode n |
possible_builtin_accessed_in_module(n, name, m) and
not possible_builtin_defined_in_module(name, m)
)
)
}
/**
* Holds if a global variable called `name` (which is also the name of a built-in) is assigned
* a value in the module `m`.
*/
private predicate possible_builtin_defined_in_module(string name, Module m) {
global_name_defined_in_module(name, m) and
name = getBuiltInName()
}
/**
* Holds if `n` is an access of a global variable called `name` (which is also the name of a
* built-in) inside the module `m`.
*/
private predicate possible_builtin_accessed_in_module(NameNode n, string name, Module m) {
n.isGlobal() and
n.isLoad() and
name = n.getId() and
name = getBuiltInName() and
m = n.getEnclosingModule()
}
/**
* Holds if `n` is an access of a variable called `name` (which is _not_ the name of a
* built-in, and which is _not_ a global defined in the enclosing module) inside the scope `s`.
*/
private predicate name_possibly_defined_in_import_star(NameNode n, string name, Scope s) {
n.isLoad() and
name = n.getId() and
// Not already defined in an enclosing scope.
not exists(LocalVariable v |
v.getId() = name and v.getScope() = n.getScope().getEnclosingScope*()
) and
not name = getBuiltInName() and
s = n.getScope().getEnclosingScope*() and
exists(potential_import_star_base(s)) and
not global_name_defined_in_module(name, n.getEnclosingModule())
}
/** Holds if a global variable called `name` is assigned a value in the module `m`. */
private predicate global_name_defined_in_module(string name, Module m) {
exists(NameNode n |
not exists(LocalVariable v | n.defines(v)) and
n.isStore() and
name = n.getId() and
m = n.getEnclosingModule()
)
}
/**
* Gets the API graph node for all modules imported with `from ... import *` inside the scope `s`.
*
* For example, given
*
* `from foo.bar import *`
*
* this would be the API graph node with the path
*
* `moduleImport("foo").getMember("bar")`
*/
private TApiNode potential_import_star_base(Scope s) {
exists(DataFlow::Node ref |
ref.asCfgNode() = any(ImportStarNode n | n.getScope() = s).getModule() and
use(result, ref)
)
}
/**
@@ -396,11 +514,28 @@ module API {
exists(DataFlow::Node superclass | pred.flowsTo(superclass) |
ref.asExpr().(ClassExpr).getABase() = superclass.asExpr()
)
or
// awaiting
exists(Await await, DataFlow::Node awaitedValue |
lbl = Label::await() and
ref.asExpr() = await and
await.getValue() = awaitedValue.asExpr() and
pred.flowsTo(awaitedValue)
)
)
or
// Built-ins, treated as members of the module `builtins`
base = MkModuleImport("builtins") and
lbl = Label::member(any(string name | ref = likely_builtin(name)))
or
// Unknown variables that may belong to a module imported with `import *`
exists(Scope s |
base = potential_import_star_base(s) and
lbl =
Label::member(any(string name |
name_possibly_defined_in_import_star(ref.asCfgNode(), name, s)
))
)
}
/**
@@ -426,7 +561,7 @@ module API {
*
* The flow from `src` to that node may be inter-procedural.
*/
private DataFlow::LocalSourceNode trackUseNode(
private DataFlow::TypeTrackingNode trackUseNode(
DataFlow::LocalSourceNode src, DataFlow::TypeTracker t
) {
t.start() and
@@ -444,7 +579,6 @@ module API {
cached
DataFlow::LocalSourceNode trackUseNode(DataFlow::LocalSourceNode src) {
result = trackUseNode(src, DataFlow::TypeTracker::end()) and
// We exclude module variable nodes, as these do not correspond to real uses.
not result instanceof DataFlow::ModuleVariableNode
}
@@ -512,5 +646,9 @@ private module Label {
/** Gets the `return` edge label. */
string return() { result = "getReturn()" }
/** Gets the `subclass` edge label. */
string subclass() { result = "getASubclass()" }
/** Gets the `await` edge label. */
string await() { result = "getAwaited()" }
}

View File

@@ -49,12 +49,15 @@ abstract class AstNode extends AstNode_ {
/** Whether this contains `inner` syntactically */
predicate contains(AstNode inner) { this.getAChildNode+() = inner }
/** Whether this contains `inner` syntactically and `inner` has the same scope as `this` */
predicate containsInScope(AstNode inner) {
pragma[noinline]
private predicate containsInScope(AstNode inner, Scope scope) {
this.contains(inner) and
this.getScope() = inner.getScope() and
not inner instanceof Scope
not inner instanceof Scope and
scope = this.getScope()
}
/** Whether this contains `inner` syntactically and `inner` has the same scope as `this` */
predicate containsInScope(AstNode inner) { this.containsInScope(inner, inner.getScope()) }
}
/* Parents */

View File

@@ -4,7 +4,7 @@
* provide concrete subclasses.
*/
import python
private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.dataflow.new.TaintTracking
@@ -72,6 +72,39 @@ module FileSystemAccess {
}
}
/**
* A data flow node that writes data to the file system access.
*
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `FileSystemWriteAccess::Range` instead.
*/
class FileSystemWriteAccess extends FileSystemAccess {
override FileSystemWriteAccess::Range range;
/**
* Gets a node that represents data to be written to the file system (possibly with
* some transformation happening before it is written, like JSON encoding).
*/
DataFlow::Node getADataNode() { result = range.getADataNode() }
}
/** Provides a class for modeling new file system writes. */
module FileSystemWriteAccess {
/**
* A data flow node that writes data to the file system access.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `FileSystemWriteAccess` instead.
*/
abstract class Range extends FileSystemAccess::Range {
/**
* Gets a node that represents data to be written to the file system (possibly with
* some transformation happening before it is written, like JSON encoding).
*/
abstract DataFlow::Node getADataNode();
}
}
/** Provides classes for modeling path-related APIs. */
module Path {
/**
@@ -235,6 +268,35 @@ private class EncodingAdditionalTaintStep extends TaintTracking::AdditionalTaint
}
}
/**
* A data-flow node that logs data.
*
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `Logging::Range` instead.
*/
class Logging extends DataFlow::Node {
Logging::Range range;
Logging() { this = range }
/** Gets an input that is logged. */
DataFlow::Node getAnInput() { result = range.getAnInput() }
}
/** Provides a class for modeling new logging mechanisms. */
module Logging {
/**
* A data-flow node that logs data.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `Logging` instead.
*/
abstract class Range extends DataFlow::Node {
/** Gets an input that is logged. */
abstract DataFlow::Node getAnInput();
}
}
/**
* A data-flow node that dynamically executes Python code.
*
@@ -293,6 +355,78 @@ module SqlExecution {
}
}
/**
* A data-flow node that escapes meta-characters, which could be used to prevent
* injection attacks.
*
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `Escaping::Range` instead.
*/
class Escaping extends DataFlow::Node {
Escaping::Range range;
Escaping() {
this = range and
// escapes that don't have _both_ input/output defined are not valid
exists(range.getAnInput()) and
exists(range.getOutput())
}
/** Gets an input that will be escaped. */
DataFlow::Node getAnInput() { result = range.getAnInput() }
/** Gets the output that contains the escaped data. */
DataFlow::Node getOutput() { result = range.getOutput() }
/**
* Gets the context that this function escapes for, such as `html`, or `url`.
*/
string getKind() { result = range.getKind() }
}
/** Provides a class for modeling new escaping APIs. */
module Escaping {
/**
* A data-flow node that escapes meta-characters, which could be used to prevent
* injection attacks.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `Escaping` instead.
*/
abstract class Range extends DataFlow::Node {
/** Gets an input that will be escaped. */
abstract DataFlow::Node getAnInput();
/** Gets the output that contains the escaped data. */
abstract DataFlow::Node getOutput();
/**
* Gets the context that this function escapes for.
*
* While kinds are represented as strings, this should not be relied upon. Use the
* predicates in the `Escaping` module, such as `getHtmlKind`.
*/
abstract string getKind();
}
/** Gets the escape-kind for escaping a string so it can safely be included in HTML. */
string getHtmlKind() { result = "html" }
// TODO: If adding an XML kind, update the modeling of the `MarkupSafe` PyPI package.
//
// Technically it claims to escape for both HTML and XML, but for now we don't have
// anything that relies on XML escaping, so I'm going to defer deciding whether they
// should be the same kind, or whether they deserve to be treated differently.
}
/**
* An escape of a string so it can be safely included in
* the body of an HTML element, for example, replacing `{}` in
* `<p>{}</p>`.
*/
class HtmlEscaping extends Escaping {
HtmlEscaping() { range.getKind() = Escaping::getHtmlKind() }
}
/** Provides classes for modeling HTTP-related APIs. */
module HTTP {
import semmle.python.web.HttpConstants
@@ -345,7 +479,7 @@ module HTTP {
/** Gets the URL pattern for this route, if it can be statically determined. */
string getUrlPattern() {
exists(StrConst str |
DataFlow::exprNode(str).(DataFlow::LocalSourceNode).flowsTo(this.getUrlPatternArg()) and
this.getUrlPatternArg().getALocalSource() = DataFlow::exprNode(str) and
result = str.getText()
)
}
@@ -478,9 +612,7 @@ module HTTP {
/** Gets the mimetype of this HTTP response, if it can be statically determined. */
string getMimetype() {
exists(StrConst str |
DataFlow::exprNode(str)
.(DataFlow::LocalSourceNode)
.flowsTo(this.getMimetypeOrContentTypeArg()) and
this.getMimetypeOrContentTypeArg().getALocalSource() = DataFlow::exprNode(str) and
result = str.getText().splitAt(";", 0)
)
or
@@ -524,10 +656,73 @@ module HTTP {
abstract DataFlow::Node getRedirectLocation();
}
}
/**
* A data-flow node that sets a cookie in an HTTP response.
*
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `HTTP::CookieWrite::Range` instead.
*/
class CookieWrite extends DataFlow::Node {
CookieWrite::Range range;
CookieWrite() { this = range }
/**
* Gets the argument, if any, specifying the raw cookie header.
*/
DataFlow::Node getHeaderArg() { result = range.getHeaderArg() }
/**
* Gets the argument, if any, specifying the cookie name.
*/
DataFlow::Node getNameArg() { result = range.getNameArg() }
/**
* Gets the argument, if any, specifying the cookie value.
*/
DataFlow::Node getValueArg() { result = range.getValueArg() }
}
/** Provides a class for modeling new cookie writes on HTTP responses. */
module CookieWrite {
/**
* A data-flow node that sets a cookie in an HTTP response.
*
* Note: we don't require that this redirect must be sent to a client (a kind of
* "if a tree falls in a forest and nobody hears it" situation).
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `HttpResponse` instead.
*/
abstract class Range extends DataFlow::Node {
/**
* Gets the argument, if any, specifying the raw cookie header.
*/
abstract DataFlow::Node getHeaderArg();
/**
* Gets the argument, if any, specifying the cookie name.
*/
abstract DataFlow::Node getNameArg();
/**
* Gets the argument, if any, specifying the cookie value.
*/
abstract DataFlow::Node getValueArg();
}
}
}
}
/** Provides models for cryptographic things. */
/**
* Provides models for cryptographic things.
*
* Note: The `CryptographicAlgorithm` class currently doesn't take weak keys into
* consideration for the `isWeak` member predicate. So RSA is always considered
* secure, although using a low number of bits will actually make it insecure. We plan
* to improve our libraries in the future to more precisely capture this aspect.
*/
module Cryptography {
/** Provides models for public-key cryptography, also called asymmetric cryptography. */
module PublicKey {
@@ -563,7 +758,7 @@ module Cryptography {
/** Provides classes for modeling new key-pair generation APIs. */
module KeyGeneration {
/** Gets a back-reference to the keysize argument `arg` that was used to generate a new key-pair. */
private DataFlow::LocalSourceNode keysizeBacktracker(
private DataFlow::TypeTrackingNode keysizeBacktracker(
DataFlow::TypeBackTracker t, DataFlow::Node arg
) {
t.start() and
@@ -626,4 +821,43 @@ module Cryptography {
}
}
}
import semmle.python.concepts.CryptoAlgorithms
/**
* A data-flow node that is an application of a cryptographic algorithm. For example,
* encryption, decryption, signature-validation.
*
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `CryptographicOperation::Range` instead.
*/
class CryptographicOperation extends DataFlow::Node {
CryptographicOperation::Range range;
CryptographicOperation() { this = range }
/** Gets the algorithm used, if it matches a known `CryptographicAlgorithm`. */
CryptographicAlgorithm getAlgorithm() { result = range.getAlgorithm() }
/** Gets an input the algorithm is used on, for example the plain text input to be encrypted. */
DataFlow::Node getAnInput() { result = range.getAnInput() }
}
/** Provides classes for modeling new applications of a cryptographic algorithms. */
module CryptographicOperation {
/**
* A data-flow node that is an application of a cryptographic algorithm. For example,
* encryption, decryption, signature-validation.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `CryptographicOperation` instead.
*/
abstract class Range extends DataFlow::Node {
/** Gets the algorithm used, if it matches a known `CryptographicAlgorithm`. */
abstract CryptographicAlgorithm getAlgorithm();
/** Gets an input the algorithm is used on, for example the plain text input to be encrypted. */
abstract DataFlow::Node getAnInput();
}
}
}

View File

@@ -1,9 +1,7 @@
import python
/** A file */
class File extends Container {
File() { files(this, _, _, _, _) }
class File extends Container, @file {
/** DEPRECATED: Use `getAbsolutePath` instead. */
deprecated override string getName() { result = this.getAbsolutePath() }
@@ -34,9 +32,7 @@ class File extends Container {
}
/** Gets a short name for this file (just the file name) */
string getShortName() {
exists(string simple, string ext | files(this, _, simple, ext, _) | result = simple + ext)
}
string getShortName() { result = this.getBaseName() }
private int lastLine() {
result = max(int i | exists(Location l | l.getFile() = this and l.getEndLine() = i))
@@ -55,7 +51,7 @@ class File extends Container {
)
}
override string getAbsolutePath() { files(this, result, _, _, _) }
override string getAbsolutePath() { files(this, result) }
/** Gets the URL of this file. */
override string getURL() { result = "file://" + this.getAbsolutePath() + ":0:0:0:0" }
@@ -89,7 +85,15 @@ class File extends Container {
i.getTest().(Compare).compares(name, op, main) and
name.getId() = "__name__" and
main.getText() = "__main__"
)
) and
// Exclude files named `__main__.py`. These are often _not_ meant to be run directly, but
// contain this construct anyway.
//
// Their presence in a package (say, `foo`) means one can execute the package directly using
// `python -m foo` (which will run the `foo/__main__.py` file). Since being an entry point for
// execution means treating imports as absolute, this causes trouble, since when run with
// `python -m`, the interpreter uses the usual package semantics.
not this.getShortName() = "__main__.py"
or
// The file contains a `#!` line referencing the python interpreter
exists(Comment c |
@@ -110,15 +114,10 @@ private predicate occupied_line(File f, int n) {
}
/** A folder (directory) */
class Folder extends Container {
Folder() { folders(this, _, _) }
class Folder extends Container, @folder {
/** DEPRECATED: Use `getAbsolutePath` instead. */
deprecated override string getName() { result = this.getAbsolutePath() }
/** DEPRECATED: Use `getBaseName` instead. */
deprecated string getSimple() { folders(this, _, result) }
/**
* Holds if this element is at the specified location.
* The location spans column `startcolumn` of line `startline` to
@@ -136,7 +135,7 @@ class Folder extends Container {
endcolumn = 0
}
override string getAbsolutePath() { folders(this, result, _) }
override string getAbsolutePath() { folders(this, result) }
/** Gets the URL of this folder. */
override string getURL() { result = "folder://" + this.getAbsolutePath() }

View File

@@ -653,6 +653,8 @@ class DefinitionNode extends ControlFlowNode {
DefinitionNode() {
exists(Assign a | a.getATarget().getAFlowNode() = this)
or
exists(AnnAssign a | a.getTarget().getAFlowNode() = this and exists(a.getValue()))
or
exists(Alias a | a.getAsname().getAFlowNode() = this)
or
augstore(_, this)
@@ -795,6 +797,9 @@ private AstNode assigned_value(Expr lhs) {
/* lhs = result */
exists(Assign a | a.getATarget() = lhs and result = a.getValue())
or
/* lhs : annotation = result */
exists(AnnAssign a | a.getTarget() = lhs and result = a.getValue())
or
/* import result as lhs */
exists(Alias a | a.getAsname() = lhs and result = a.getValue())
or

View File

@@ -0,0 +1,35 @@
/**
* Helper file that imports all framework modeling.
*/
// If you add modeling of a new framework/library, remember to add it it to the docs in
// `docs/codeql/support/reusables/frameworks.rst`
private import semmle.python.frameworks.Aioch
private import semmle.python.frameworks.Aiohttp
private import semmle.python.frameworks.ClickhouseDriver
private import semmle.python.frameworks.Cryptodome
private import semmle.python.frameworks.Cryptography
private import semmle.python.frameworks.Dill
private import semmle.python.frameworks.Django
private import semmle.python.frameworks.Fabric
private import semmle.python.frameworks.Flask
private import semmle.python.frameworks.FlaskSqlAlchemy
private import semmle.python.frameworks.Idna
private import semmle.python.frameworks.Invoke
private import semmle.python.frameworks.Jmespath
private import semmle.python.frameworks.MarkupSafe
private import semmle.python.frameworks.Multidict
private import semmle.python.frameworks.Mysql
private import semmle.python.frameworks.MySQLdb
private import semmle.python.frameworks.Peewee
private import semmle.python.frameworks.Psycopg2
private import semmle.python.frameworks.PyMySQL
private import semmle.python.frameworks.Rsa
private import semmle.python.frameworks.Simplejson
private import semmle.python.frameworks.SqlAlchemy
private import semmle.python.frameworks.Stdlib
private import semmle.python.frameworks.Tornado
private import semmle.python.frameworks.Twisted
private import semmle.python.frameworks.Ujson
private import semmle.python.frameworks.Yaml
private import semmle.python.frameworks.Yarl

View File

@@ -7,6 +7,7 @@
*/
import python
import semmle.python.RegexTreeView
private newtype TPrintAstConfiguration = MkPrintAstConfiguration()
@@ -53,6 +54,9 @@ private newtype TPrintAstNode =
not list = any(Module mod).getBody() and
not forall(AstNode child | child = list.getAnItem() | isNotNeeded(child)) and
exists(list.getAnItem())
} or
TRegExpTermNode(RegExpTerm term) {
exists(StrConst str | term.getRootTerm() = getParsedRegExp(str) and shouldPrint(str, _))
}
/**
@@ -419,6 +423,42 @@ class ParameterNode extends AstElementNode {
}
}
/**
* A print node for a `StrConst`.
*
* The string has a child, if the child is used as a regular expression,
* which is the root of the regular expression.
*/
class StrConstNode extends AstElementNode {
override StrConst element;
override PrintAstNode getChild(int childIndex) {
childIndex = 0 and result.(RegExpTermNode).getTerm() = getParsedRegExp(element)
}
}
/**
* A print node for a regular expression term.
*/
class RegExpTermNode extends TRegExpTermNode, PrintAstNode {
RegExpTerm term;
RegExpTermNode() { this = TRegExpTermNode(term) }
/** Gets the `RegExpTerm` for this node. */
RegExpTerm getTerm() { result = term }
override PrintAstNode getChild(int childIndex) {
result.(RegExpTermNode).getTerm() = term.getChild(childIndex)
}
override string toString() {
result = "[" + strictconcat(term.getPrimaryQLClass(), " | ") + "] " + term.toString()
}
override Location getLocation() { result = term.getLocation() }
}
/**
* Gets the `i`th child from `node` ordered by location.
*/
@@ -447,7 +487,7 @@ private module PrettyPrinting {
string getQlClass(AstNode a) {
shouldPrint(a, _) and
(
not exists(getQlCustomClass(a)) and result = a.toString()
not exists(getQlCustomClass(a)) and result = strictconcat(a.toString(), " | ")
or
result = strictconcat(getQlCustomClass(a), " | ")
)

View File

@@ -0,0 +1,986 @@
/** Provides a class hierarchy corresponding to a parse tree of regular expressions. */
import python
private import semmle.python.regex
/**
* An element containing a regular expression term, that is, either
* a string literal (parsed as a regular expression)
* or another regular expression term.
*
* For sequences and alternations, we require at least one child.
* Otherwise, we wish to represent the term differently.
* This avoids multiple representations of the same term.
*/
newtype TRegExpParent =
/** A string literal used as a regular expression */
TRegExpLiteral(Regex re) or
/** A quantified term */
TRegExpQuantifier(Regex re, int start, int end) { re.qualifiedItem(start, end, _, _) } or
/** A sequence term */
TRegExpSequence(Regex re, int start, int end) {
re.sequence(start, end) and
exists(seqChild(re, start, end, 1)) // if a sequence does not have more than one element, it should be treated as that element instead.
} or
/** An alternation term */
TRegExpAlt(Regex re, int start, int end) {
re.alternation(start, end) and
exists(int part_end |
re.alternationOption(start, end, start, part_end) and
part_end < end
) // if an alternation does not have more than one element, it should be treated as that element instead.
} or
/** A character class term */
TRegExpCharacterClass(Regex re, int start, int end) { re.charSet(start, end) } or
/** A character range term */
TRegExpCharacterRange(Regex re, int start, int end) { re.charRange(_, start, _, _, end) } or
/** A group term */
TRegExpGroup(Regex re, int start, int end) { re.group(start, end) } or
/** A special character */
TRegExpSpecialChar(Regex re, int start, int end) { re.specialCharacter(start, end, _) } or
/** A normal character */
TRegExpNormalChar(Regex re, int start, int end) { re.normalCharacter(start, end) } or
/** A back reference */
TRegExpBackRef(Regex re, int start, int end) { re.backreference(start, end) }
/**
* An element containing a regular expression term, that is, either
* a string literal (parsed as a regular expression)
* or another regular expression term.
*/
class RegExpParent extends TRegExpParent {
string toString() { result = "RegExpParent" }
/** Gets the `i`th child term. */
abstract RegExpTerm getChild(int i);
/** Gets a child term . */
RegExpTerm getAChild() { result = getChild(_) }
/** Gets the number of child terms. */
int getNumChild() { result = count(getAChild()) }
/** Gets the associated regex. */
abstract Regex getRegex();
}
/** A string literal used as a regular expression */
class RegExpLiteral extends TRegExpLiteral, RegExpParent {
Regex re;
RegExpLiteral() { this = TRegExpLiteral(re) }
override RegExpTerm getChild(int i) { i = 0 and result.getRegex() = re and result.isRootTerm() }
predicate isDotAll() { re.getAMode() = "DOTALL" }
predicate isIgnoreCase() { re.getAMode() = "IGNORECASE" }
string getFlags() { result = concat(string mode | mode = re.getAMode() | mode, " | ") }
override Regex getRegex() { result = re }
string getPrimaryQLClass() { result = "RegExpLiteral" }
}
/**
* A regular expression term, that is, a syntactic part of a regular expression.
*/
class RegExpTerm extends RegExpParent {
Regex re;
int start;
int end;
RegExpTerm() {
this = TRegExpAlt(re, start, end)
or
this = TRegExpBackRef(re, start, end)
or
this = TRegExpCharacterClass(re, start, end)
or
this = TRegExpCharacterRange(re, start, end)
or
this = TRegExpNormalChar(re, start, end)
or
this = TRegExpGroup(re, start, end)
or
this = TRegExpQuantifier(re, start, end)
or
this = TRegExpSequence(re, start, end)
or
this = TRegExpSpecialChar(re, start, end)
}
/**
* Gets the outermost term of this regular expression.
*/
RegExpTerm getRootTerm() {
this.isRootTerm() and result = this
or
result = getParent().(RegExpTerm).getRootTerm()
}
/**
* Holds if this term is part of a string literal
* that is interpreted as a regular expression.
*/
predicate isUsedAsRegExp() { any() }
/**
* Holds if this is the root term of a regular expression.
*/
predicate isRootTerm() { start = 0 and end = re.getText().length() }
override RegExpTerm getChild(int i) {
result = this.(RegExpAlt).getChild(i)
or
result = this.(RegExpBackRef).getChild(i)
or
result = this.(RegExpCharacterClass).getChild(i)
or
result = this.(RegExpCharacterRange).getChild(i)
or
result = this.(RegExpNormalChar).getChild(i)
or
result = this.(RegExpGroup).getChild(i)
or
result = this.(RegExpQuantifier).getChild(i)
or
result = this.(RegExpSequence).getChild(i)
or
result = this.(RegExpSpecialChar).getChild(i)
}
/**
* Gets the parent term of this regular expression term, or the
* regular expression literal if this is the root term.
*/
RegExpParent getParent() { result.getAChild() = this }
override Regex getRegex() { result = re }
/** Gets the offset at which this term starts. */
int getStart() { result = start }
/** Gets the offset at which this term ends. */
int getEnd() { result = end }
override string toString() { result = re.getText().substring(start, end) }
/**
* Gets the location of the surrounding regex, as locations inside the regex do not exist.
* To get location information corresponding to the term inside the regex,
* use `hasLocationInfo`.
*/
Location getLocation() { result = re.getLocation() }
/** Holds if this term is found at the specified location offsets. */
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
) {
exists(int re_start, int re_end |
re.getLocation().hasLocationInfo(filepath, startline, re_start, endline, re_end) and
startcolumn = re_start + start + 4 and
endcolumn = re_start + end + 3
)
}
/** Gets the file in which this term is found. */
File getFile() { result = this.getLocation().getFile() }
/** Gets the raw source text of this term. */
string getRawValue() { result = this.toString() }
/** Gets the string literal in which this term is found. */
RegExpLiteral getLiteral() { result = TRegExpLiteral(re) }
/** Gets the regular expression term that is matched (textually) before this one, if any. */
RegExpTerm getPredecessor() {
exists(RegExpTerm parent | parent = getParent() |
result = parent.(RegExpSequence).previousElement(this)
or
not exists(parent.(RegExpSequence).previousElement(this)) and
not parent instanceof RegExpSubPattern and
result = parent.getPredecessor()
)
}
/** Gets the regular expression term that is matched (textually) after this one, if any. */
RegExpTerm getSuccessor() {
exists(RegExpTerm parent | parent = getParent() |
result = parent.(RegExpSequence).nextElement(this)
or
not exists(parent.(RegExpSequence).nextElement(this)) and
not parent instanceof RegExpSubPattern and
result = parent.getSuccessor()
)
}
/** Gets the primary QL class for this term. */
string getPrimaryQLClass() { result = "RegExpTerm" }
}
/**
* A quantified regular expression term.
*
* Example:
*
* ```
* ((ECMA|Java)[sS]cript)*
* ```
*/
class RegExpQuantifier extends RegExpTerm, TRegExpQuantifier {
int part_end;
boolean maybe_empty;
boolean may_repeat_forever;
RegExpQuantifier() {
this = TRegExpQuantifier(re, start, end) and
re.qualifiedPart(start, part_end, end, maybe_empty, may_repeat_forever)
}
override RegExpTerm getChild(int i) {
i = 0 and
result.getRegex() = re and
result.getStart() = start and
result.getEnd() = part_end
}
predicate mayRepeatForever() { may_repeat_forever = true }
string getQualifier() { result = re.getText().substring(part_end, end) }
override string getPrimaryQLClass() { result = "RegExpQuantifier" }
}
/**
* A regular expression term that permits unlimited repetitions.
*/
class InfiniteRepetitionQuantifier extends RegExpQuantifier {
InfiniteRepetitionQuantifier() { this.mayRepeatForever() }
}
/**
* A star-quantified term.
*
* Example:
*
* ```
* \w*
* ```
*/
class RegExpStar extends InfiniteRepetitionQuantifier {
RegExpStar() { this.getQualifier().charAt(0) = "*" }
override string getPrimaryQLClass() { result = "RegExpStar" }
}
/**
* A plus-quantified term.
*
* Example:
*
* ```
* \w+
* ```
*/
class RegExpPlus extends InfiniteRepetitionQuantifier {
RegExpPlus() { this.getQualifier().charAt(0) = "+" }
override string getPrimaryQLClass() { result = "RegExpPlus" }
}
/**
* An optional term.
*
* Example:
*
* ```
* ;?
* ```
*/
class RegExpOpt extends RegExpQuantifier {
RegExpOpt() { this.getQualifier().charAt(0) = "?" }
override string getPrimaryQLClass() { result = "RegExpOpt" }
}
/**
* A range-quantified term
*
* Examples:
*
* ```
* \w{2,4}
* \w{2,}
* \w{2}
* ```
*/
class RegExpRange extends RegExpQuantifier {
string upper;
string lower;
RegExpRange() { re.multiples(part_end, end, lower, upper) }
string getUpper() { result = upper }
string getLower() { result = lower }
/**
* Gets the upper bound of the range, if any.
*
* If there is no upper bound, any number of repetitions is allowed.
* For a term of the form `r{lo}`, both the lower and the upper bound
* are `lo`.
*/
int getUpperBound() { result = this.getUpper().toInt() }
/** Gets the lower bound of the range. */
int getLowerBound() { result = this.getLower().toInt() }
override string getPrimaryQLClass() { result = "RegExpRange" }
}
/**
* A sequence term.
*
* Example:
*
* ```
* (ECMA|Java)Script
* ```
*
* This is a sequence with the elements `(ECMA|Java)` and `Script`.
*/
class RegExpSequence extends RegExpTerm, TRegExpSequence {
RegExpSequence() { this = TRegExpSequence(re, start, end) }
override RegExpTerm getChild(int i) { result = seqChild(re, start, end, i) }
/** Gets the element preceding `element` in this sequence. */
RegExpTerm previousElement(RegExpTerm element) { element = nextElement(result) }
/** Gets the element following `element` in this sequence. */
RegExpTerm nextElement(RegExpTerm element) {
exists(int i |
element = this.getChild(i) and
result = this.getChild(i + 1)
)
}
override string getPrimaryQLClass() { result = "RegExpSequence" }
}
pragma[nomagic]
private int seqChildEnd(Regex re, int start, int end, int i) {
result = seqChild(re, start, end, i).getEnd()
}
// moved out so we can use it in the charpred
private RegExpTerm seqChild(Regex re, int start, int end, int i) {
re.sequence(start, end) and
(
i = 0 and
result.getRegex() = re and
result.getStart() = start and
exists(int itemEnd |
re.item(start, itemEnd) and
result.getEnd() = itemEnd
)
or
i > 0 and
result.getRegex() = re and
exists(int itemStart | itemStart = seqChildEnd(re, start, end, i - 1) |
result.getStart() = itemStart and
re.item(itemStart, result.getEnd())
)
)
}
/**
* An alternative term, that is, a term of the form `a|b`.
*
* Example:
*
* ```
* ECMA|Java
* ```
*/
class RegExpAlt extends RegExpTerm, TRegExpAlt {
RegExpAlt() { this = TRegExpAlt(re, start, end) }
override RegExpTerm getChild(int i) {
i = 0 and
result.getRegex() = re and
result.getStart() = start and
exists(int part_end |
re.alternationOption(start, end, start, part_end) and
result.getEnd() = part_end
)
or
i > 0 and
result.getRegex() = re and
exists(int part_start |
part_start = this.getChild(i - 1).getEnd() + 1 // allow for the |
|
result.getStart() = part_start and
re.alternationOption(start, end, part_start, result.getEnd())
)
}
override string getPrimaryQLClass() { result = "RegExpAlt" }
}
/**
* An escaped regular expression term, that is, a regular expression
* term starting with a backslash, which is not a backreference.
*
* Example:
*
* ```
* \.
* \w
* ```
*/
class RegExpEscape extends RegExpNormalChar {
RegExpEscape() { re.escapedCharacter(start, end) }
/**
* Gets the name of the escaped; for example, `w` for `\w`.
* TODO: Handle named escapes.
*/
override string getValue() {
this.isIdentityEscape() and result = this.getUnescaped()
or
this.getUnescaped() = "n" and result = "\n"
or
this.getUnescaped() = "r" and result = "\r"
or
this.getUnescaped() = "t" and result = "\t"
or
// TODO: Find a way to include a formfeed character
// this.getUnescaped() = "f" and result = " "
// or
isUnicode() and
result = getUnicode()
}
predicate isIdentityEscape() { not this.getUnescaped() in ["n", "r", "t", "f"] }
override string getPrimaryQLClass() { result = "RegExpEscape" }
string getUnescaped() { result = this.getText().suffix(1) }
/**
* Gets the text for this escape. That is e.g. "\w".
*/
private string getText() { result = re.getText().substring(start, end) }
/**
* Holds if this is a unicode escape.
*/
private predicate isUnicode() { getText().prefix(2) = ["\\u", "\\U"] }
/**
* Gets the unicode char for this escape.
* E.g. for `\u0061` this returns "a".
*/
private string getUnicode() {
exists(int codepoint | codepoint = sum(this.getHexValueFromUnicode(_)) |
result = codepoint.toUnicode()
)
}
/**
* Gets int value for the `index`th char in the hex number of the unicode escape.
* E.g. for `\u0061` and `index = 2` this returns 96 (the number `6` interpreted as hex).
*/
private int getHexValueFromUnicode(int index) {
this.isUnicode() and
exists(string hex, string char | hex = this.getText().suffix(2) |
char = hex.charAt(index) and
result = 16.pow(hex.length() - index - 1) * toHex(char)
)
}
}
/**
* Gets the hex number for the `hex` char.
*/
private int toHex(string hex) {
hex = [0 .. 9].toString() and
result = hex.toInt()
or
result = 10 and hex = ["a", "A"]
or
result = 11 and hex = ["b", "B"]
or
result = 12 and hex = ["c", "C"]
or
result = 13 and hex = ["d", "D"]
or
result = 14 and hex = ["e", "E"]
or
result = 15 and hex = ["f", "F"]
}
/**
* A character class escape in a regular expression.
* That is, an escaped charachter that denotes multiple characters.
*
* Examples:
*
* ```
* \w
* \S
* ```
*/
class RegExpCharacterClassEscape extends RegExpEscape {
// string value;
RegExpCharacterClassEscape() {
// value = re.getText().substring(start + 1, end) and
// value in ["d", "D", "s", "S", "w", "W"]
this.getValue() in ["d", "D", "s", "S", "w", "W"]
}
/** Gets the name of the character class; for example, `w` for `\w`. */
// override string getValue() { result = value }
override RegExpTerm getChild(int i) { none() }
override string getPrimaryQLClass() { result = "RegExpCharacterClassEscape" }
}
/**
* A character class in a regular expression.
*
* Examples:
*
* ```
* [a-z_]
* [^<>&]
* ```
*/
class RegExpCharacterClass extends RegExpTerm, TRegExpCharacterClass {
RegExpCharacterClass() { this = TRegExpCharacterClass(re, start, end) }
predicate isInverted() { re.getChar(start + 1) = "^" }
string getCharThing(int i) { result = re.getChar(i + start) }
predicate isUniversalClass() {
// [^]
isInverted() and not exists(getAChild())
or
// [\w\W] and similar
not isInverted() and
exists(string cce1, string cce2 |
cce1 = getAChild().(RegExpCharacterClassEscape).getValue() and
cce2 = getAChild().(RegExpCharacterClassEscape).getValue()
|
cce1 != cce2 and cce1.toLowerCase() = cce2.toLowerCase()
)
}
override RegExpTerm getChild(int i) {
i = 0 and
result.getRegex() = re and
exists(int itemStart, int itemEnd |
result.getStart() = itemStart and
re.char_set_start(start, itemStart) and
re.char_set_child(start, itemStart, itemEnd) and
result.getEnd() = itemEnd
)
or
i > 0 and
result.getRegex() = re and
exists(int itemStart | itemStart = this.getChild(i - 1).getEnd() |
result.getStart() = itemStart and
re.char_set_child(start, itemStart, result.getEnd())
)
}
override string getPrimaryQLClass() { result = "RegExpCharacterClass" }
}
/**
* A character range in a character class in a regular expression.
*
* Example:
*
* ```
* a-z
* ```
*/
class RegExpCharacterRange extends RegExpTerm, TRegExpCharacterRange {
int lower_end;
int upper_start;
RegExpCharacterRange() {
this = TRegExpCharacterRange(re, start, end) and
re.charRange(_, start, lower_end, upper_start, end)
}
predicate isRange(string lo, string hi) {
lo = re.getText().substring(start, lower_end) and
hi = re.getText().substring(upper_start, end)
}
override RegExpTerm getChild(int i) {
i = 0 and
result.getRegex() = re and
result.getStart() = start and
result.getEnd() = lower_end
or
i = 1 and
result.getRegex() = re and
result.getStart() = upper_start and
result.getEnd() = end
}
override string getPrimaryQLClass() { result = "RegExpCharacterRange" }
}
/**
* A normal character in a regular expression, that is, a character
* without special meaning. This includes escaped characters.
*
* Examples:
* ```
* t
* \t
* ```
*/
class RegExpNormalChar extends RegExpTerm, TRegExpNormalChar {
RegExpNormalChar() { this = TRegExpNormalChar(re, start, end) }
predicate isCharacter() { any() }
string getValue() { result = re.getText().substring(start, end) }
override RegExpTerm getChild(int i) { none() }
override string getPrimaryQLClass() { result = "RegExpNormalChar" }
}
/**
* A constant regular expression term, that is, a regular expression
* term matching a single string. Currently, this will always be a single character.
*
* Example:
*
* ```
* a
* ```
*/
class RegExpConstant extends RegExpTerm {
string value;
RegExpConstant() {
this = TRegExpNormalChar(re, start, end) and
not this instanceof RegExpCharacterClassEscape and
// exclude chars in qualifiers
// TODO: push this into regex library
not exists(int qstart, int qend | re.qualifiedPart(_, qstart, qend, _, _) |
qstart <= start and end <= qend
) and
value = this.(RegExpNormalChar).getValue()
// This will never hold
// or
// this = TRegExpSpecialChar(re, start, end) and
// re.inCharSet(start) and
// value = this.(RegExpSpecialChar).getChar()
}
predicate isCharacter() { any() }
string getValue() { result = value }
override RegExpTerm getChild(int i) { none() }
override string getPrimaryQLClass() { result = "RegExpConstant" }
}
/**
* A grouped regular expression.
*
* Examples:
*
* ```
* (ECMA|Java)
* (?:ECMA|Java)
* (?<quote>['"])
* ```
*/
class RegExpGroup extends RegExpTerm, TRegExpGroup {
RegExpGroup() { this = TRegExpGroup(re, start, end) }
/**
* Gets the index of this capture group within the enclosing regular
* expression literal.
*
* For example, in the regular expression `/((a?).)(?:b)/`, the
* group `((a?).)` has index 1, the group `(a?)` nested inside it
* has index 2, and the group `(?:b)` has no index, since it is
* not a capture group.
*/
int getNumber() { result = re.getGroupNumber(start, end) }
/** Holds if this is a named capture group. */
predicate isNamed() { exists(this.getName()) }
/** Gets the name of this capture group, if any. */
string getName() { result = re.getGroupName(start, end) }
predicate isCharacter() { any() }
string getValue() { result = re.getText().substring(start, end) }
override RegExpTerm getChild(int i) {
result.getRegex() = re and
i = 0 and
re.groupContents(start, end, result.getStart(), result.getEnd())
}
override string getPrimaryQLClass() { result = "RegExpGroup" }
}
/**
* A special character in a regular expression.
*
* Examples:
* ```
* ^
* $
* .
* ```
*/
class RegExpSpecialChar extends RegExpTerm, TRegExpSpecialChar {
string char;
RegExpSpecialChar() {
this = TRegExpSpecialChar(re, start, end) and
re.specialCharacter(start, end, char)
}
predicate isCharacter() { any() }
string getChar() { result = char }
override RegExpTerm getChild(int i) { none() }
override string getPrimaryQLClass() { result = "RegExpSpecialChar" }
}
/**
* A dot regular expression.
*
* Example:
*
* ```
* .
* ```
*/
class RegExpDot extends RegExpSpecialChar {
RegExpDot() { this.getChar() = "." }
override string getPrimaryQLClass() { result = "RegExpDot" }
}
/**
* A dollar assertion `$` matching the end of a line.
*
* Example:
*
* ```
* $
* ```
*/
class RegExpDollar extends RegExpSpecialChar {
RegExpDollar() { this.getChar() = "$" }
override string getPrimaryQLClass() { result = "RegExpDollar" }
}
/**
* A caret assertion `^` matching the beginning of a line.
*
* Example:
*
* ```
* ^
* ```
*/
class RegExpCaret extends RegExpSpecialChar {
RegExpCaret() { this.getChar() = "^" }
override string getPrimaryQLClass() { result = "RegExpCaret" }
}
/**
* A zero-width match, that is, either an empty group or an assertion.
*
* Examples:
* ```
* ()
* (?=\w)
* ```
*/
class RegExpZeroWidthMatch extends RegExpGroup {
RegExpZeroWidthMatch() { re.zeroWidthMatch(start, end) }
override predicate isCharacter() { any() }
override RegExpTerm getChild(int i) { none() }
override string getPrimaryQLClass() { result = "RegExpZeroWidthMatch" }
}
/**
* A zero-width lookahead or lookbehind assertion.
*
* Examples:
*
* ```
* (?=\w)
* (?!\n)
* (?<=\.)
* (?<!\\)
* ```
*/
class RegExpSubPattern extends RegExpZeroWidthMatch {
RegExpSubPattern() { not re.emptyGroup(start, end) }
/** Gets the lookahead term. */
RegExpTerm getOperand() {
exists(int in_start, int in_end | re.groupContents(start, end, in_start, in_end) |
result.getRegex() = re and
result.getStart() = in_start and
result.getEnd() = in_end
)
}
}
/**
* A zero-width lookahead assertion.
*
* Examples:
*
* ```
* (?=\w)
* (?!\n)
* ```
*/
abstract class RegExpLookahead extends RegExpSubPattern { }
/**
* A positive-lookahead assertion.
*
* Examples:
*
* ```
* (?=\w)
* ```
*/
class RegExpPositiveLookahead extends RegExpLookahead {
RegExpPositiveLookahead() { re.positiveLookaheadAssertionGroup(start, end) }
override string getPrimaryQLClass() { result = "RegExpPositiveLookahead" }
}
/**
* A negative-lookahead assertion.
*
* Examples:
*
* ```
* (?!\n)
* ```
*/
class RegExpNegativeLookahead extends RegExpLookahead {
RegExpNegativeLookahead() { re.negativeLookaheadAssertionGroup(start, end) }
override string getPrimaryQLClass() { result = "RegExpNegativeLookahead" }
}
/**
* A zero-width lookbehind assertion.
*
* Examples:
*
* ```
* (?<=\.)
* (?<!\\)
* ```
*/
abstract class RegExpLookbehind extends RegExpSubPattern { }
/**
* A positive-lookbehind assertion.
*
* Examples:
*
* ```
* (?<=\.)
* ```
*/
class RegExpPositiveLookbehind extends RegExpLookbehind {
RegExpPositiveLookbehind() { re.positiveLookbehindAssertionGroup(start, end) }
override string getPrimaryQLClass() { result = "RegExpPositiveLookbehind" }
}
/**
* A negative-lookbehind assertion.
*
* Examples:
*
* ```
* (?<!\\)
* ```
*/
class RegExpNegativeLookbehind extends RegExpLookbehind {
RegExpNegativeLookbehind() { re.negativeLookbehindAssertionGroup(start, end) }
override string getPrimaryQLClass() { result = "RegExpNegativeLookbehind" }
}
/**
* A back reference, that is, a term of the form `\i` or `\k<name>`
* in a regular expression.
*
* Examples:
*
* ```
* \1
* (?P=quote)
* ```
*/
class RegExpBackRef extends RegExpTerm, TRegExpBackRef {
RegExpBackRef() { this = TRegExpBackRef(re, start, end) }
/**
* Gets the number of the capture group this back reference refers to, if any.
*/
int getNumber() { result = re.getBackrefNumber(start, end) }
/**
* Gets the name of the capture group this back reference refers to, if any.
*/
string getName() { result = re.getBackrefName(start, end) }
/** Gets the capture group this back reference refers to. */
RegExpGroup getGroup() {
result.getLiteral() = this.getLiteral() and
(
result.getNumber() = this.getNumber() or
result.getName() = this.getName()
)
}
override RegExpTerm getChild(int i) { none() }
override string getPrimaryQLClass() { result = "RegExpBackRef" }
}
/** Gets the parse tree resulting from parsing `re`, if such has been constructed. */
RegExpTerm getParsedRegExp(StrConst re) { result.getRegex() = re and result.isRootTerm() }

View File

@@ -153,6 +153,12 @@ class ExceptStmt extends ExceptStmt_ {
override Stmt getASubStatement() { result = this.getAStmt() }
override Stmt getLastStatement() { result = this.getBody().getLastItem().getLastStatement() }
override Expr getType() {
result = super.getType() and not result instanceof Tuple
or
result = super.getType().(Tuple).getAnElt()
}
}
/** An assert statement, such as `assert a == b, "A is not equal to b"` */

View File

@@ -0,0 +1,317 @@
/**
* Provides an extension point for for modeling sensitive data, such as secrets, certificates, or passwords.
* Sensitive data can be interesting to use as data-flow sources in security queries.
*/
private import python
private import semmle.python.dataflow.new.DataFlow
// Need to import `semmle.python.Frameworks` since frameworks can extend `SensitiveDataSource::Range`
private import semmle.python.Frameworks
private import semmle.python.security.internal.SensitiveDataHeuristics as SensitiveDataHeuristics
// We export these explicitly, so we don't also export the `HeuristicNames` module.
class SensitiveDataClassification = SensitiveDataHeuristics::SensitiveDataClassification;
module SensitiveDataClassification = SensitiveDataHeuristics::SensitiveDataClassification;
/**
* A data flow source of sensitive data, such as secrets, certificates, or passwords.
*
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `SensitiveDataSource::Range` instead.
*/
class SensitiveDataSource extends DataFlow::Node {
SensitiveDataSource::Range range;
SensitiveDataSource() { this = range }
/**
* Gets the classification of the sensitive data.
*/
SensitiveDataClassification getClassification() { result = range.getClassification() }
}
/** Provides a class for modeling new sources of sensitive data, such as secrets, certificates, or passwords. */
module SensitiveDataSource {
/**
* A data flow source of sensitive data, such as secrets, certificates, or passwords.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `SensitiveDataSource` instead.
*/
abstract class Range extends DataFlow::Node {
/**
* Gets the classification of the sensitive data.
*/
abstract SensitiveDataClassification getClassification();
}
}
/** Actual sensitive data modeling */
private module SensitiveDataModeling {
private import SensitiveDataHeuristics::HeuristicNames
/**
* Gets a reference to a function that is considered to be a sensitive source of
* `classification`.
*/
private DataFlow::TypeTrackingNode sensitiveFunction(
DataFlow::TypeTracker t, SensitiveDataClassification classification
) {
t.start() and
exists(Function f |
f.getName() = sensitiveString(classification) and
result.asExpr() = f.getDefinition()
)
or
exists(DataFlow::TypeTracker t2 | result = sensitiveFunction(t2, classification).track(t2, t))
}
/**
* Gets a reference to a function that is considered to be a sensitive source of
* `classification`.
*/
DataFlow::Node sensitiveFunction(SensitiveDataClassification classification) {
sensitiveFunction(DataFlow::TypeTracker::end(), classification).flowsTo(result)
}
/**
* Gets a reference (in local scope) to a string constant that, if used as the key in
* a lookup, indicates the presence of sensitive data with `classification`.
*/
DataFlow::Node sensitiveLookupStringConst(SensitiveDataClassification classification) {
// Note: If this is implemented with type-tracking, we will get cross-talk as
// illustrated in python/ql/test/experimental/dataflow/sensitive-data/test.py
exists(DataFlow::LocalSourceNode source |
source.asExpr().(StrConst).getText() = sensitiveString(classification) and
source.flowsTo(result)
)
}
/** A function call that is considered a source of sensitive data. */
class SensitiveFunctionCall extends SensitiveDataSource::Range, DataFlow::CallCfgNode {
SensitiveDataClassification classification;
SensitiveFunctionCall() {
this.getFunction() = sensitiveFunction(classification)
or
// to cover functions that we don't have the definition for, and where the
// reference to the function has not already been marked as being sensitive
this.getFunction().asCfgNode().(NameNode).getId() = sensitiveString(classification)
}
override SensitiveDataClassification getClassification() { result = classification }
}
/**
* Tracks any modeled source of sensitive data (with any classification),
* to limit the scope of `extraStepForCalls`. See it's QLDoc for more context.
*
* Also see `extraStepForCalls`.
*/
private DataFlow::TypeTrackingNode possibleSensitiveCallable(DataFlow::TypeTracker t) {
t.start() and
result instanceof SensitiveDataSource
or
exists(DataFlow::TypeTracker t2 | result = possibleSensitiveCallable(t2).track(t2, t))
}
/**
* Tracks any modeled source of sensitive data (with any classification),
* to limit the scope of `extraStepForCalls`. See it's QLDoc for more context.
*
* Also see `extraStepForCalls`.
*/
private DataFlow::Node possibleSensitiveCallable() {
possibleSensitiveCallable(DataFlow::TypeTracker::end()).flowsTo(result)
}
/**
* Holds if the step from `nodeFrom` to `nodeTo` should be considered a
* taint-flow step for sensitive-data, to ensure calls are handled correctly.
*
* To handle calls properly, while preserving a good source for path explanations,
* you need to include this predicate as an additional taint step in your taint-tracking
* configurations.
*
* The core problem can be illustrated by the example below. If we consider the
* `print` a sink, what path and what source do we want to show? My initial approach
* would be to use type-tracking to propagate from the `not_found.get_passwd` attribute
* lookup, to the use of `non_sensitive_name`, and then create a new `SensitiveDataSource::Range`
* like `SensitiveFunctionCall`. Although that seems likely to work, it will also end up
* with a non-optimal path, which starts at _bad source_, and therefore doesn't show
* how we figured out that `non_sensitive_name`
* could be a function that returns a password (and in cases where there is many calls to
* `my_func` it will be annoying for someone to figure this out manually).
*
* By including this additional taint-step in the taint-tracking configuration, it's possible
* to get a path explanation going from _good source_ to the sink.
*
* ```python
* def my_func(non_sensitive_name):
* x = non_sensitive_name() # <-- bad source
* print(x) # <-- sink
*
* import not_found
* f = not_found.get_passwd # <-- good source
* my_func(f)
* ```
*/
predicate extraStepForCalls(DataFlow::Node nodeFrom, DataFlow::CallCfgNode nodeTo) {
// However, we do still use the type-tracking approach to limit the size of this
// predicate.
nodeTo.getFunction() = nodeFrom and
nodeFrom = possibleSensitiveCallable()
}
pragma[nomagic]
private string sensitiveStrConstCandidate() {
result = any(StrConst s | not s.isDocString()).getText() and
not result.regexpMatch(notSensitiveRegexp())
}
pragma[nomagic]
private string sensitiveAttributeNameCandidate() {
result = any(DataFlow::AttrRead a).getAttributeName() and
not result.regexpMatch(notSensitiveRegexp())
}
pragma[nomagic]
private string sensitiveParameterNameCandidate() {
result = any(Parameter p).getName() and
not result.regexpMatch(notSensitiveRegexp())
}
pragma[nomagic]
private string sensitiveFunctionNameCandidate() {
result = any(Function f).getName() and
not result.regexpMatch(notSensitiveRegexp())
}
pragma[nomagic]
private string sensitiveNameCandidate() {
result = any(Name n).getId() and
not result.regexpMatch(notSensitiveRegexp())
}
/**
* This helper predicate serves to deduplicate the results of the preceding predicates. This
* means that if, say, an attribute and a function parameter have the same name, then that name will
* only be matched once, which greatly cuts down on the number of regexp matches that have to be
* performed.
*
* Under normal circumstances, deduplication is only performed when a predicate is materialized, and
* so to see the effect of this we must create a separate predicate that calculates the union of the
* preceding predicates.
*/
pragma[nomagic]
private string sensitiveStringCandidate() {
result in [
sensitiveNameCandidate(), sensitiveAttributeNameCandidate(),
sensitiveParameterNameCandidate(), sensitiveFunctionNameCandidate(),
sensitiveStrConstCandidate()
]
}
/**
* Returns strings (primarily the names of various program entities) that may contain sensitive data
* with the classification `classification`.
*
* This helper predicate ends up being very similar to `nameIndicatesSensitiveData`,
* but is performance optimized to limit the number of regexp matches that have to be performed.
*/
pragma[nomagic]
private string sensitiveString(SensitiveDataClassification classification) {
result = sensitiveStringCandidate() and
result.regexpMatch(maybeSensitiveRegexp(classification))
}
/**
* Any kind of variable assignment (also including with/for) where the name indicates
* it contains sensitive data.
*
* Note: We _could_ make any access to a variable with a sensitive name a source of
* sensitive data, but to make path explanations in data-flow/taint-tracking good,
* we don't want that, since it works against allowing users to understand the flow
* in the program (which is the whole point).
*
* Note: To make data-flow/taint-tracking work, the expression that is _assigned_ to
* the variable is marked as the source (as compared to marking the variable as the
* source).
*/
class SensitiveVariableAssignment extends SensitiveDataSource::Range {
SensitiveDataClassification classification;
SensitiveVariableAssignment() {
exists(DefinitionNode def |
def.(NameNode).getId() = sensitiveString(classification) and
(
this.asCfgNode() = def.getValue()
or
this.asCfgNode() = def.getValue().(ForNode).getSequence()
) and
not this.asExpr() instanceof FunctionExpr and
not this.asExpr() instanceof ClassExpr
)
or
exists(With with |
with.getOptionalVars().(Name).getId() = sensitiveString(classification) and
this.asExpr() = with.getContextExpr()
)
}
override SensitiveDataClassification getClassification() { result = classification }
}
/** An attribute access that is considered a source of sensitive data. */
class SensitiveAttributeAccess extends SensitiveDataSource::Range {
SensitiveDataClassification classification;
SensitiveAttributeAccess() {
// Things like `foo.<sensitive-name>` or `from <module> import <sensitive-name>`
// I considered excluding any `from ... import something_sensitive`, but then realized that
// we should flag up `form ... import password as ...` as a password
this.(DataFlow::AttrRead).getAttributeName() = sensitiveString(classification)
or
// Things like `getattr(foo, <reference-to-string>)`
this.(DataFlow::AttrRead).getAttributeNameExpr() = sensitiveLookupStringConst(classification)
}
override SensitiveDataClassification getClassification() { result = classification }
}
/** A subscript, where the key indicates the result will be sensitive data. */
class SensitiveSubscript extends SensitiveDataSource::Range {
SensitiveDataClassification classification;
SensitiveSubscript() {
this.asCfgNode().(SubscriptNode).getIndex() =
sensitiveLookupStringConst(classification).asCfgNode()
}
override SensitiveDataClassification getClassification() { result = classification }
}
/** A call to `get` on an object, where the key indicates the result will be sensitive data. */
class SensitiveGetCall extends SensitiveDataSource::Range, DataFlow::CallCfgNode {
SensitiveDataClassification classification;
SensitiveGetCall() {
this.getFunction().(DataFlow::AttrRef).getAttributeName() = "get" and
this.getArg(0) = sensitiveLookupStringConst(classification)
}
override SensitiveDataClassification getClassification() { result = classification }
}
/** A parameter where the name indicates it will receive sensitive data. */
class SensitiveParameter extends SensitiveDataSource::Range, DataFlow::ParameterNode {
SensitiveDataClassification classification;
SensitiveParameter() { this.getParameter().getName() = sensitiveString(classification) }
override SensitiveDataClassification getClassification() { result = classification }
}
}
predicate sensitiveDataExtraStepForCalls = SensitiveDataModeling::extraStepForCalls/2;

View File

@@ -0,0 +1,65 @@
/**
* This file acts as a wrapper for `internal.TypeTracker`, exposing some of the functionality with
* names that are more appropriate for Python.
*/
private import python
private import internal.TypeTracker as Internal
/** Any string that may appear as the name of an attribute or access path. */
class AttributeName = Internal::ContentName;
/** Either an attribute name, or the empty string (representing no attribute). */
class OptionalAttributeName = Internal::OptionalContentName;
/**
* Summary of the steps needed to track a value to a given dataflow node.
*
* This can be used to track objects that implement a certain API in order to
* recognize calls to that API. Note that type-tracking does not by itself provide a
* source/sink relation, that is, it may determine that a node has a given type,
* but it won't determine where that type came from.
*
* It is recommended that all uses of this type are written in the following form,
* for tracking some type `myType`:
* ```ql
* DataFlow::TypeTrackingNode myType(DataFlow::TypeTracker t) {
* t.start() and
* result = < source of myType >
* or
* exists (DataFlow::TypeTracker t2 |
* result = myType(t2).track(t2, t)
* )
* }
*
* DataFlow::LocalSourceNode myType() { myType(DataFlow::TypeTracker::end()) }
* ```
*
* Instead of `result = myType(t2).track(t2, t)`, you can also use the equivalent
* `t = t2.step(myType(t2), result)`. If you additionally want to track individual
* intra-procedural steps, use `t = t2.smallstep(myCallback(t2), result)`.
*/
class TypeTracker extends Internal::TypeTracker {
/**
* Holds if this is the starting point of type tracking, and the value starts in the attribute named `attrName`.
* The type tracking only ends after the attribute has been loaded.
*/
predicate startInAttr(string attrName) { this.startInContent(attrName) }
/**
* INTERNAL. DO NOT USE.
*
* Gets the attribute associated with this type tracker.
*/
string getAttr() { result = this.getContent() }
}
module TypeTracker = Internal::TypeTracker;
class StepSummary = Internal::StepSummary;
module StepSummary = Internal::StepSummary;
class TypeBackTracker = Internal::TypeBackTracker;
module TypeBackTracker = Internal::TypeBackTracker;

View File

@@ -17,6 +17,13 @@ abstract class AttrRef extends Node {
*/
abstract Node getObject();
/**
* Holds if this data flow node accesses attribute named `attrName` on object `object`.
*/
predicate accesses(Node object, string attrName) {
this.getObject() = object and this.getAttributeName() = attrName
}
/**
* Gets the expression node that defines the attribute being accessed, if any. This is
* usually an identifier or literal.
@@ -191,7 +198,7 @@ private class ClassDefinitionAsAttrWrite extends AttrWrite, CfgNode {
* - Dynamic attribute reads using `getattr`: `getattr(object, attr)`
* - Qualified imports: `from module import attr as name`
*/
abstract class AttrRead extends AttrRef, Node { }
abstract class AttrRead extends AttrRef, Node, LocalSourceNode { }
/** A simple attribute read, e.g. `object.attr` */
private class AttributeReadAsAttrRead extends AttrRead, CfgNode {

View File

@@ -35,22 +35,22 @@ predicate accessPathCostLimits(int apLimit, int tupleLimit) {
* calls. For this reason, we cannot reuse the code from `DataFlowImpl.qll` directly.
*/
private module LambdaFlow {
private predicate viableParamNonLambda(DataFlowCall call, int i, ParameterNode p) {
private predicate viableParamNonLambda(DataFlowCall call, int i, ParamNode p) {
p.isParameterOf(viableCallable(call), i)
}
private predicate viableParamLambda(DataFlowCall call, int i, ParameterNode p) {
private predicate viableParamLambda(DataFlowCall call, int i, ParamNode p) {
p.isParameterOf(viableCallableLambda(call, _), i)
}
private predicate viableParamArgNonLambda(DataFlowCall call, ParameterNode p, ArgumentNode arg) {
private predicate viableParamArgNonLambda(DataFlowCall call, ParamNode p, ArgNode arg) {
exists(int i |
viableParamNonLambda(call, i, p) and
arg.argumentOf(call, i)
)
}
private predicate viableParamArgLambda(DataFlowCall call, ParameterNode p, ArgumentNode arg) {
private predicate viableParamArgLambda(DataFlowCall call, ParamNode p, ArgNode arg) {
exists(int i |
viableParamLambda(call, i, p) and
arg.argumentOf(call, i)
@@ -118,8 +118,8 @@ private module LambdaFlow {
boolean toJump, DataFlowCallOption lastCall
) {
revLambdaFlow0(lambdaCall, kind, node, t, toReturn, toJump, lastCall) and
if node instanceof CastNode or node instanceof ArgumentNode or node instanceof ReturnNode
then compatibleTypes(t, getNodeType(node))
if castNode(node) or node instanceof ArgNode or node instanceof ReturnNode
then compatibleTypes(t, getNodeDataFlowType(node))
else any()
}
@@ -129,7 +129,7 @@ private module LambdaFlow {
boolean toJump, DataFlowCallOption lastCall
) {
lambdaCall(lambdaCall, kind, node) and
t = getNodeType(node) and
t = getNodeDataFlowType(node) and
toReturn = false and
toJump = false and
lastCall = TDataFlowCallNone()
@@ -146,7 +146,7 @@ private module LambdaFlow {
getNodeEnclosingCallable(node) = getNodeEnclosingCallable(mid)
|
preservesValue = false and
t = getNodeType(node)
t = getNodeDataFlowType(node)
or
preservesValue = true and
t = t0
@@ -160,7 +160,7 @@ private module LambdaFlow {
toJump = true and
lastCall = TDataFlowCallNone()
|
jumpStep(node, mid) and
jumpStepCached(node, mid) and
t = t0
or
exists(boolean preservesValue |
@@ -168,7 +168,7 @@ private module LambdaFlow {
getNodeEnclosingCallable(node) != getNodeEnclosingCallable(mid)
|
preservesValue = false and
t = getNodeType(node)
t = getNodeDataFlowType(node)
or
preservesValue = true and
t = t0
@@ -176,7 +176,7 @@ private module LambdaFlow {
)
or
// flow into a callable
exists(ParameterNode p, DataFlowCallOption lastCall0, DataFlowCall call |
exists(ParamNode p, DataFlowCallOption lastCall0, DataFlowCall call |
revLambdaFlowIn(lambdaCall, kind, p, t, toJump, lastCall0) and
(
if lastCall0 = TDataFlowCallNone() and toJump = false
@@ -227,7 +227,7 @@ private module LambdaFlow {
pragma[nomagic]
predicate revLambdaFlowIn(
DataFlowCall lambdaCall, LambdaCallKind kind, ParameterNode p, DataFlowType t, boolean toJump,
DataFlowCall lambdaCall, LambdaCallKind kind, ParamNode p, DataFlowType t, boolean toJump,
DataFlowCallOption lastCall
) {
revLambdaFlow(lambdaCall, kind, p, t, false, toJump, lastCall)
@@ -242,6 +242,89 @@ private DataFlowCallable viableCallableExt(DataFlowCall call) {
cached
private module Cached {
/**
* If needed, call this predicate from `DataFlowImplSpecific.qll` in order to
* force a stage-dependency on the `DataFlowImplCommon.qll` stage and therby
* collapsing the two stages.
*/
cached
predicate forceCachingInSameStage() { any() }
cached
predicate nodeEnclosingCallable(Node n, DataFlowCallable c) { c = n.getEnclosingCallable() }
cached
predicate callEnclosingCallable(DataFlowCall call, DataFlowCallable c) {
c = call.getEnclosingCallable()
}
cached
predicate nodeDataFlowType(Node n, DataFlowType t) { t = getNodeType(n) }
cached
predicate jumpStepCached(Node node1, Node node2) { jumpStep(node1, node2) }
cached
predicate clearsContentCached(Node n, Content c) { clearsContent(n, c) }
cached
predicate isUnreachableInCallCached(Node n, DataFlowCall call) { isUnreachableInCall(n, call) }
cached
predicate outNodeExt(Node n) {
n instanceof OutNode
or
n.(PostUpdateNode).getPreUpdateNode() instanceof ArgNode
}
cached
predicate hiddenNode(Node n) { nodeIsHidden(n) }
cached
OutNodeExt getAnOutNodeExt(DataFlowCall call, ReturnKindExt k) {
result = getAnOutNode(call, k.(ValueReturnKind).getKind())
or
exists(ArgNode arg |
result.(PostUpdateNode).getPreUpdateNode() = arg and
arg.argumentOf(call, k.(ParamUpdateReturnKind).getPosition())
)
}
cached
predicate returnNodeExt(Node n, ReturnKindExt k) {
k = TValueReturn(n.(ReturnNode).getKind())
or
exists(ParamNode p, int pos |
parameterValueFlowsToPreUpdate(p, n) and
p.isParameterOf(_, pos) and
k = TParamUpdate(pos)
)
}
cached
predicate castNode(Node n) { n instanceof CastNode }
cached
predicate castingNode(Node n) {
castNode(n) or
n instanceof ParamNode or
n instanceof OutNodeExt or
// For reads, `x.f`, we want to check that the tracked type after the read (which
// is obtained by popping the head of the access path stack) is compatible with
// the type of `x.f`.
read(_, _, n)
}
cached
predicate parameterNode(Node n, DataFlowCallable c, int i) {
n.(ParameterNode).isParameterOf(c, i)
}
cached
predicate argumentNode(Node n, DataFlowCall call, int pos) {
n.(ArgumentNode).argumentOf(call, pos)
}
/**
* Gets a viable target for the lambda call `call`.
*
@@ -261,7 +344,7 @@ private module Cached {
* The instance parameter is considered to have index `-1`.
*/
pragma[nomagic]
private predicate viableParam(DataFlowCall call, int i, ParameterNode p) {
private predicate viableParam(DataFlowCall call, int i, ParamNode p) {
p.isParameterOf(viableCallableExt(call), i)
}
@@ -270,11 +353,11 @@ private module Cached {
* dispatch into account.
*/
cached
predicate viableParamArg(DataFlowCall call, ParameterNode p, ArgumentNode arg) {
predicate viableParamArg(DataFlowCall call, ParamNode p, ArgNode arg) {
exists(int i |
viableParam(call, i, p) and
arg.argumentOf(call, i) and
compatibleTypes(getNodeType(arg), getNodeType(p))
compatibleTypes(getNodeDataFlowType(arg), getNodeDataFlowType(p))
)
}
@@ -312,7 +395,7 @@ private module Cached {
* `read` indicates whether it is contents of `p` that can flow to `node`.
*/
pragma[nomagic]
private predicate parameterValueFlowCand(ParameterNode p, Node node, boolean read) {
private predicate parameterValueFlowCand(ParamNode p, Node node, boolean read) {
p = node and
read = false
or
@@ -325,30 +408,30 @@ private module Cached {
// read
exists(Node mid |
parameterValueFlowCand(p, mid, false) and
readStep(mid, _, node) and
read(mid, _, node) and
read = true
)
or
// flow through: no prior read
exists(ArgumentNode arg |
exists(ArgNode arg |
parameterValueFlowArgCand(p, arg, false) and
argumentValueFlowsThroughCand(arg, node, read)
)
or
// flow through: no read inside method
exists(ArgumentNode arg |
exists(ArgNode arg |
parameterValueFlowArgCand(p, arg, read) and
argumentValueFlowsThroughCand(arg, node, false)
)
}
pragma[nomagic]
private predicate parameterValueFlowArgCand(ParameterNode p, ArgumentNode arg, boolean read) {
private predicate parameterValueFlowArgCand(ParamNode p, ArgNode arg, boolean read) {
parameterValueFlowCand(p, arg, read)
}
pragma[nomagic]
predicate parameterValueFlowsToPreUpdateCand(ParameterNode p, PostUpdateNode n) {
predicate parameterValueFlowsToPreUpdateCand(ParamNode p, PostUpdateNode n) {
parameterValueFlowCand(p, n.getPreUpdateNode(), false)
}
@@ -360,7 +443,7 @@ private module Cached {
* `read` indicates whether it is contents of `p` that can flow to the return
* node.
*/
predicate parameterValueFlowReturnCand(ParameterNode p, ReturnKind kind, boolean read) {
predicate parameterValueFlowReturnCand(ParamNode p, ReturnKind kind, boolean read) {
exists(ReturnNode ret |
parameterValueFlowCand(p, ret, read) and
kind = ret.getKind()
@@ -369,9 +452,9 @@ private module Cached {
pragma[nomagic]
private predicate argumentValueFlowsThroughCand0(
DataFlowCall call, ArgumentNode arg, ReturnKind kind, boolean read
DataFlowCall call, ArgNode arg, ReturnKind kind, boolean read
) {
exists(ParameterNode param | viableParamArg(call, param, arg) |
exists(ParamNode param | viableParamArg(call, param, arg) |
parameterValueFlowReturnCand(param, kind, read)
)
}
@@ -382,14 +465,14 @@ private module Cached {
*
* `read` indicates whether it is contents of `arg` that can flow to `out`.
*/
predicate argumentValueFlowsThroughCand(ArgumentNode arg, Node out, boolean read) {
predicate argumentValueFlowsThroughCand(ArgNode arg, Node out, boolean read) {
exists(DataFlowCall call, ReturnKind kind |
argumentValueFlowsThroughCand0(call, arg, kind, read) and
out = getAnOutNode(call, kind)
)
}
predicate cand(ParameterNode p, Node n) {
predicate cand(ParamNode p, Node n) {
parameterValueFlowCand(p, n, _) and
(
parameterValueFlowReturnCand(p, _, _)
@@ -416,21 +499,21 @@ private module Cached {
* If a read step was taken, then `read` captures the `Content`, the
* container type, and the content type.
*/
predicate parameterValueFlow(ParameterNode p, Node node, ReadStepTypesOption read) {
predicate parameterValueFlow(ParamNode p, Node node, ReadStepTypesOption read) {
parameterValueFlow0(p, node, read) and
if node instanceof CastingNode
then
// normal flow through
read = TReadStepTypesNone() and
compatibleTypes(getNodeType(p), getNodeType(node))
compatibleTypes(getNodeDataFlowType(p), getNodeDataFlowType(node))
or
// getter
compatibleTypes(read.getContentType(), getNodeType(node))
compatibleTypes(read.getContentType(), getNodeDataFlowType(node))
else any()
}
pragma[nomagic]
private predicate parameterValueFlow0(ParameterNode p, Node node, ReadStepTypesOption read) {
private predicate parameterValueFlow0(ParamNode p, Node node, ReadStepTypesOption read) {
p = node and
Cand::cand(p, _) and
read = TReadStepTypesNone()
@@ -447,7 +530,7 @@ private module Cached {
readStepWithTypes(mid, read.getContainerType(), read.getContent(), node,
read.getContentType()) and
Cand::parameterValueFlowReturnCand(p, _, true) and
compatibleTypes(getNodeType(p), read.getContainerType())
compatibleTypes(getNodeDataFlowType(p), read.getContainerType())
)
or
parameterValueFlow0_0(TReadStepTypesNone(), p, node, read)
@@ -455,34 +538,32 @@ private module Cached {
pragma[nomagic]
private predicate parameterValueFlow0_0(
ReadStepTypesOption mustBeNone, ParameterNode p, Node node, ReadStepTypesOption read
ReadStepTypesOption mustBeNone, ParamNode p, Node node, ReadStepTypesOption read
) {
// flow through: no prior read
exists(ArgumentNode arg |
exists(ArgNode arg |
parameterValueFlowArg(p, arg, mustBeNone) and
argumentValueFlowsThrough(arg, read, node)
)
or
// flow through: no read inside method
exists(ArgumentNode arg |
exists(ArgNode arg |
parameterValueFlowArg(p, arg, read) and
argumentValueFlowsThrough(arg, mustBeNone, node)
)
}
pragma[nomagic]
private predicate parameterValueFlowArg(
ParameterNode p, ArgumentNode arg, ReadStepTypesOption read
) {
private predicate parameterValueFlowArg(ParamNode p, ArgNode arg, ReadStepTypesOption read) {
parameterValueFlow(p, arg, read) and
Cand::argumentValueFlowsThroughCand(arg, _, _)
}
pragma[nomagic]
private predicate argumentValueFlowsThrough0(
DataFlowCall call, ArgumentNode arg, ReturnKind kind, ReadStepTypesOption read
DataFlowCall call, ArgNode arg, ReturnKind kind, ReadStepTypesOption read
) {
exists(ParameterNode param | viableParamArg(call, param, arg) |
exists(ParamNode param | viableParamArg(call, param, arg) |
parameterValueFlowReturn(param, kind, read)
)
}
@@ -496,18 +577,18 @@ private module Cached {
* container type, and the content type.
*/
pragma[nomagic]
predicate argumentValueFlowsThrough(ArgumentNode arg, ReadStepTypesOption read, Node out) {
predicate argumentValueFlowsThrough(ArgNode arg, ReadStepTypesOption read, Node out) {
exists(DataFlowCall call, ReturnKind kind |
argumentValueFlowsThrough0(call, arg, kind, read) and
out = getAnOutNode(call, kind)
|
// normal flow through
read = TReadStepTypesNone() and
compatibleTypes(getNodeType(arg), getNodeType(out))
compatibleTypes(getNodeDataFlowType(arg), getNodeDataFlowType(out))
or
// getter
compatibleTypes(getNodeType(arg), read.getContainerType()) and
compatibleTypes(read.getContentType(), getNodeType(out))
compatibleTypes(getNodeDataFlowType(arg), read.getContainerType()) and
compatibleTypes(read.getContentType(), getNodeDataFlowType(out))
)
}
@@ -516,7 +597,7 @@ private module Cached {
* value-preserving steps and a single read step, not taking call
* contexts into account, thus representing a getter-step.
*/
predicate getterStep(ArgumentNode arg, Content c, Node out) {
predicate getterStep(ArgNode arg, Content c, Node out) {
argumentValueFlowsThrough(arg, TReadStepTypesSome(_, c, _), out)
}
@@ -529,7 +610,7 @@ private module Cached {
* container type, and the content type.
*/
private predicate parameterValueFlowReturn(
ParameterNode p, ReturnKind kind, ReadStepTypesOption read
ParamNode p, ReturnKind kind, ReadStepTypesOption read
) {
exists(ReturnNode ret |
parameterValueFlow(p, ret, read) and
@@ -553,7 +634,7 @@ private module Cached {
private predicate mayBenefitFromCallContextExt(DataFlowCall call, DataFlowCallable callable) {
mayBenefitFromCallContext(call, callable)
or
callable = call.getEnclosingCallable() and
callEnclosingCallable(call, callable) and
exists(viableCallableLambda(call, TDataFlowCallSome(_)))
}
@@ -611,7 +692,7 @@ private module Cached {
mayBenefitFromCallContextExt(call, _) and
c = viableCallableExt(call) and
ctxtgts = count(DataFlowCall ctx | c = viableImplInCallContextExt(call, ctx)) and
tgts = strictcount(DataFlowCall ctx | viableCallableExt(ctx) = call.getEnclosingCallable()) and
tgts = strictcount(DataFlowCall ctx | callEnclosingCallable(call, viableCallableExt(ctx))) and
ctxtgts < tgts
)
}
@@ -635,8 +716,7 @@ private module Cached {
* Holds if `p` can flow to the pre-update node associated with post-update
* node `n`, in the same callable, using only value-preserving steps.
*/
cached
predicate parameterValueFlowsToPreUpdate(ParameterNode p, PostUpdateNode n) {
private predicate parameterValueFlowsToPreUpdate(ParamNode p, PostUpdateNode n) {
parameterValueFlow(p, n.getPreUpdateNode(), TReadStepTypesNone())
}
@@ -644,9 +724,8 @@ private module Cached {
Node node1, Content c, Node node2, DataFlowType contentType, DataFlowType containerType
) {
storeStep(node1, c, node2) and
readStep(_, c, _) and
contentType = getNodeType(node1) and
containerType = getNodeType(node2)
contentType = getNodeDataFlowType(node1) and
containerType = getNodeDataFlowType(node2)
or
exists(Node n1, Node n2 |
n1 = node1.(PostUpdateNode).getPreUpdateNode() and
@@ -654,12 +733,15 @@ private module Cached {
|
argumentValueFlowsThrough(n2, TReadStepTypesSome(containerType, c, contentType), n1)
or
readStep(n2, c, n1) and
contentType = getNodeType(n1) and
containerType = getNodeType(n2)
read(n2, c, n1) and
contentType = getNodeDataFlowType(n1) and
containerType = getNodeDataFlowType(n2)
)
}
cached
predicate read(Node node1, Content c, Node node2) { readStep(node1, c, node2) }
/**
* Holds if data can flow from `node1` to `node2` via a direct assignment to
* `f`.
@@ -678,8 +760,9 @@ private module Cached {
* are aliases. A typical example is a function returning `this`, implementing a fluent
* interface.
*/
cached
predicate reverseStepThroughInputOutputAlias(PostUpdateNode fromNode, PostUpdateNode toNode) {
private predicate reverseStepThroughInputOutputAlias(
PostUpdateNode fromNode, PostUpdateNode toNode
) {
exists(Node fromPre, Node toPre |
fromPre = fromNode.getPreUpdateNode() and
toPre = toNode.getPreUpdateNode()
@@ -688,23 +771,34 @@ private module Cached {
// Does the language-specific simpleLocalFlowStep already model flow
// from function input to output?
fromPre = getAnOutNode(c, _) and
toPre.(ArgumentNode).argumentOf(c, _) and
simpleLocalFlowStep(toPre.(ArgumentNode), fromPre)
toPre.(ArgNode).argumentOf(c, _) and
simpleLocalFlowStep(toPre.(ArgNode), fromPre)
)
or
argumentValueFlowsThrough(toPre, TReadStepTypesNone(), fromPre)
)
}
cached
predicate simpleLocalFlowStepExt(Node node1, Node node2) {
simpleLocalFlowStep(node1, node2) or
reverseStepThroughInputOutputAlias(node1, node2)
}
/**
* Holds if the call context `call` either improves virtual dispatch in
* `callable` or if it allows us to prune unreachable nodes in `callable`.
* Holds if the call context `call` improves virtual dispatch in `callable`.
*/
cached
predicate recordDataFlowCallSite(DataFlowCall call, DataFlowCallable callable) {
predicate recordDataFlowCallSiteDispatch(DataFlowCall call, DataFlowCallable callable) {
reducedViableImplInCallContext(_, callable, call)
or
exists(Node n | getNodeEnclosingCallable(n) = callable | isUnreachableInCall(n, call))
}
/**
* Holds if the call context `call` allows us to prune unreachable nodes in `callable`.
*/
cached
predicate recordDataFlowCallSiteUnreachable(DataFlowCall call, DataFlowCallable callable) {
exists(Node n | getNodeEnclosingCallable(n) = callable | isUnreachableInCallCached(n, call))
}
cached
@@ -726,12 +820,12 @@ private module Cached {
cached
newtype TLocalFlowCallContext =
TAnyLocalCall() or
TSpecificLocalCall(DataFlowCall call) { isUnreachableInCall(_, call) }
TSpecificLocalCall(DataFlowCall call) { isUnreachableInCallCached(_, call) }
cached
newtype TReturnKindExt =
TValueReturn(ReturnKind kind) or
TParamUpdate(int pos) { exists(ParameterNode p | p.isParameterOf(_, pos)) }
TParamUpdate(int pos) { exists(ParamNode p | p.isParameterOf(_, pos)) }
cached
newtype TBooleanOption =
@@ -757,27 +851,28 @@ private module Cached {
TAccessPathFrontSome(AccessPathFront apf)
}
/**
* Holds if the call context `call` either improves virtual dispatch in
* `callable` or if it allows us to prune unreachable nodes in `callable`.
*/
predicate recordDataFlowCallSite(DataFlowCall call, DataFlowCallable callable) {
recordDataFlowCallSiteDispatch(call, callable) or
recordDataFlowCallSiteUnreachable(call, callable)
}
/**
* A `Node` at which a cast can occur such that the type should be checked.
*/
class CastingNode extends Node {
CastingNode() {
this instanceof ParameterNode or
this instanceof CastNode or
this instanceof OutNodeExt or
// For reads, `x.f`, we want to check that the tracked type after the read (which
// is obtained by popping the head of the access path stack) is compatible with
// the type of `x.f`.
readStep(_, _, this)
}
CastingNode() { castingNode(this) }
}
private predicate readStepWithTypes(
Node n1, DataFlowType container, Content c, Node n2, DataFlowType content
) {
readStep(n1, c, n2) and
container = getNodeType(n1) and
content = getNodeType(n2)
read(n1, c, n2) and
container = getNodeDataFlowType(n1) and
content = getNodeDataFlowType(n2)
}
private newtype TReadStepTypesOption =
@@ -854,7 +949,7 @@ class CallContextSomeCall extends CallContextCall, TSomeCall {
override string toString() { result = "CcSomeCall" }
override predicate relevantFor(DataFlowCallable callable) {
exists(ParameterNode p | getNodeEnclosingCallable(p) = callable)
exists(ParamNode p | getNodeEnclosingCallable(p) = callable)
}
override predicate matchesCall(DataFlowCall call) { any() }
@@ -866,7 +961,7 @@ class CallContextReturn extends CallContextNoCall, TReturn {
}
override predicate relevantFor(DataFlowCallable callable) {
exists(DataFlowCall call | this = TReturn(_, call) and call.getEnclosingCallable() = callable)
exists(DataFlowCall call | this = TReturn(_, call) and callEnclosingCallable(call, callable))
}
}
@@ -899,7 +994,7 @@ class LocalCallContextSpecificCall extends LocalCallContext, TSpecificLocalCall
}
private predicate relevantLocalCCtx(DataFlowCall call, DataFlowCallable callable) {
exists(Node n | getNodeEnclosingCallable(n) = callable and isUnreachableInCall(n, call))
exists(Node n | getNodeEnclosingCallable(n) = callable and isUnreachableInCallCached(n, call))
}
/**
@@ -913,26 +1008,37 @@ LocalCallContext getLocalCallContext(CallContext ctx, DataFlowCallable callable)
else result instanceof LocalCallContextAny
}
/**
* The value of a parameter at function entry, viewed as a node in a data
* flow graph.
*/
class ParamNode extends Node {
ParamNode() { parameterNode(this, _, _) }
/**
* Holds if this node is the parameter of callable `c` at the specified
* (zero-based) position.
*/
predicate isParameterOf(DataFlowCallable c, int i) { parameterNode(this, c, i) }
}
/** A data-flow node that represents a call argument. */
class ArgNode extends Node {
ArgNode() { argumentNode(this, _, _) }
/** Holds if this argument occurs at the given position in the given call. */
final predicate argumentOf(DataFlowCall call, int pos) { argumentNode(this, call, pos) }
}
/**
* A node from which flow can return to the caller. This is either a regular
* `ReturnNode` or a `PostUpdateNode` corresponding to the value of a parameter.
*/
class ReturnNodeExt extends Node {
ReturnNodeExt() {
this instanceof ReturnNode or
parameterValueFlowsToPreUpdate(_, this)
}
ReturnNodeExt() { returnNodeExt(this, _) }
/** Gets the kind of this returned value. */
ReturnKindExt getKind() {
result = TValueReturn(this.(ReturnNode).getKind())
or
exists(ParameterNode p, int pos |
parameterValueFlowsToPreUpdate(p, this) and
p.isParameterOf(_, pos) and
result = TParamUpdate(pos)
)
}
ReturnKindExt getKind() { returnNodeExt(this, result) }
}
/**
@@ -940,11 +1046,7 @@ class ReturnNodeExt extends Node {
* or a post-update node associated with a call argument.
*/
class OutNodeExt extends Node {
OutNodeExt() {
this instanceof OutNode
or
this.(PostUpdateNode).getPreUpdateNode() instanceof ArgumentNode
}
OutNodeExt() { outNodeExt(this) }
}
/**
@@ -957,7 +1059,7 @@ abstract class ReturnKindExt extends TReturnKindExt {
abstract string toString();
/** Gets a node corresponding to data flow out of `call`. */
abstract OutNodeExt getAnOutNode(DataFlowCall call);
final OutNodeExt getAnOutNode(DataFlowCall call) { result = getAnOutNodeExt(call, this) }
}
class ValueReturnKind extends ReturnKindExt, TValueReturn {
@@ -968,10 +1070,6 @@ class ValueReturnKind extends ReturnKindExt, TValueReturn {
ReturnKind getKind() { result = kind }
override string toString() { result = kind.toString() }
override OutNodeExt getAnOutNode(DataFlowCall call) {
result = getAnOutNode(call, this.getKind())
}
}
class ParamUpdateReturnKind extends ReturnKindExt, TParamUpdate {
@@ -982,13 +1080,6 @@ class ParamUpdateReturnKind extends ReturnKindExt, TParamUpdate {
int getPosition() { result = pos }
override string toString() { result = "param update " + pos }
override OutNodeExt getAnOutNode(DataFlowCall call) {
exists(ArgumentNode arg |
result.(PostUpdateNode).getPreUpdateNode() = arg and
arg.argumentOf(call, this.getPosition())
)
}
}
/** A callable tagged with a relevant return kind. */
@@ -1015,10 +1106,13 @@ class ReturnPosition extends TReturnPosition0 {
*/
pragma[inline]
DataFlowCallable getNodeEnclosingCallable(Node n) {
exists(Node n0 |
pragma[only_bind_into](n0) = n and
pragma[only_bind_into](result) = n0.getEnclosingCallable()
)
nodeEnclosingCallable(pragma[only_bind_out](n), pragma[only_bind_into](result))
}
/** Gets the type of `n` used for type pruning. */
pragma[inline]
DataFlowType getNodeDataFlowType(Node n) {
nodeDataFlowType(pragma[only_bind_out](n), pragma[only_bind_into](result))
}
pragma[noinline]
@@ -1037,17 +1131,59 @@ ReturnPosition getReturnPosition(ReturnNodeExt ret) {
result = getReturnPosition0(ret, ret.getKind())
}
/**
* Checks whether `inner` can return to `call` in the call context `innercc`.
* Assumes a context of `inner = viableCallableExt(call)`.
*/
bindingset[innercc, inner, call]
predicate checkCallContextReturn(CallContext innercc, DataFlowCallable inner, DataFlowCall call) {
innercc instanceof CallContextAny
or
exists(DataFlowCallable c0, DataFlowCall call0 |
callEnclosingCallable(call0, inner) and
innercc = TReturn(c0, call0) and
c0 = prunedViableImplInCallContextReverse(call0, call)
)
}
/**
* Checks whether `call` can resolve to `calltarget` in the call context `cc`.
* Assumes a context of `calltarget = viableCallableExt(call)`.
*/
bindingset[cc, call, calltarget]
predicate checkCallContextCall(CallContext cc, DataFlowCall call, DataFlowCallable calltarget) {
exists(DataFlowCall ctx | cc = TSpecificCall(ctx) |
if reducedViableImplInCallContext(call, _, ctx)
then calltarget = prunedViableImplInCallContext(call, ctx)
else any()
)
or
cc instanceof CallContextSomeCall
or
cc instanceof CallContextAny
or
cc instanceof CallContextReturn
}
/**
* Resolves a return from `callable` in `cc` to `call`. This is equivalent to
* `callable = viableCallableExt(call) and checkCallContextReturn(cc, callable, call)`.
*/
bindingset[cc, callable]
predicate resolveReturn(CallContext cc, DataFlowCallable callable, DataFlowCall call) {
cc instanceof CallContextAny and callable = viableCallableExt(call)
or
exists(DataFlowCallable c0, DataFlowCall call0 |
call0.getEnclosingCallable() = callable and
callEnclosingCallable(call0, callable) and
cc = TReturn(c0, call0) and
c0 = prunedViableImplInCallContextReverse(call0, call)
)
}
/**
* Resolves a call from `call` in `cc` to `result`. This is equivalent to
* `result = viableCallableExt(call) and checkCallContextCall(cc, call, result)`.
*/
bindingset[call, cc]
DataFlowCallable resolveCall(DataFlowCall call, CallContext cc) {
exists(DataFlowCall ctx | cc = TSpecificCall(ctx) |
@@ -1063,8 +1199,6 @@ DataFlowCallable resolveCall(DataFlowCall call, CallContext cc) {
result = viableCallableExt(call) and cc instanceof CallContextReturn
}
predicate read = readStep/3;
/** An optional Boolean value. */
class BooleanOption extends TBooleanOption {
string toString() {
@@ -1116,7 +1250,7 @@ abstract class AccessPathFront extends TAccessPathFront {
TypedContent getHead() { this = TFrontHead(result) }
predicate isClearedAt(Node n) { clearsContent(n, getHead().getContent()) }
predicate isClearedAt(Node n) { clearsContentCached(n, getHead().getContent()) }
}
class AccessPathFrontNil extends AccessPathFront, TFrontNil {

View File

@@ -168,7 +168,13 @@ module Consistency {
msg = "ArgumentNode is missing PostUpdateNode."
}
query predicate postWithInFlow(PostUpdateNode n, string msg) {
// This predicate helps the compiler forget that in some languages
// it is impossible for a `PostUpdateNode` to be the target of
// `simpleLocalFlowStep`.
private predicate isPostUpdateNode(Node n) { n instanceof PostUpdateNode or none() }
query predicate postWithInFlow(Node n, string msg) {
isPostUpdateNode(n) and
simpleLocalFlowStep(_, n) and
msg = "PostUpdateNode should not be the target of local flow."
}

View File

@@ -228,7 +228,6 @@ module EssaFlow {
* data flow. It is a strict subset of the `localFlowStep` predicate, as it
* excludes SSA flow through instance fields.
*/
cached
predicate simpleLocalFlowStep(Node nodeFrom, Node nodeTo) {
// If there is ESSA-flow out of a node `node`, we want flow
// both out of `node` and any post-update node of `node`.
@@ -870,6 +869,9 @@ predicate jumpStep(Node nodeFrom, Node nodeTo) {
module_export(mv.getScope(), r.getAttributeName(), nodeFrom) and
nodeTo = r
)
or
// Default value for parameter flows to that parameter
defaultValueFlowStep(nodeFrom, nodeTo)
}
/**
@@ -1034,6 +1036,19 @@ predicate kwOverflowStoreStep(CfgNode nodeFrom, DictionaryElementContent c, Node
)
}
predicate defaultValueFlowStep(CfgNode nodeFrom, CfgNode nodeTo) {
exists(Function f, Parameter p, ParameterDefinition def |
// `getArgByName` supports, unlike `getAnArg`, keyword-only parameters
p = f.getArgByName(_) and
nodeFrom.asExpr() = p.getDefault() and
// The following expresses
// nodeTo.(ParameterNode).getParameter() = p
// without non-monotonic recursion
def.getParameter() = p and
nodeTo.getNode() = def.getDefiningNode()
)
}
/**
* Holds if data can flow from `nodeFrom` to `nodeTo` via a read of content `c`.
*/
@@ -1559,7 +1574,6 @@ predicate kwUnpackReadStep(CfgNode nodeFrom, DictionaryElementContent c, Node no
* any value stored inside `f` is cleared at the pre-update node associated with `x`
* in `x.f = newValue`.
*/
cached
predicate clearsContent(Node n, Content c) {
exists(CallNode call, CallableValue callable, string name |
call_unpacks(call, _, callable, name, _) and

View File

@@ -180,6 +180,45 @@ class CallCfgNode extends CfgNode, LocalSourceNode {
Node getArgByName(string name) { result.asCfgNode() = node.getArgByName(name) }
}
/**
* A data-flow node corresponding to a method call, that is `foo.bar(...)`.
*
* Also covers the case where the method lookup is done separately from the call itself, as in
* `temp = foo.bar; temp(...)`. Note that this is only tracked through local scope.
*/
class MethodCallNode extends CallCfgNode {
AttrRead method_lookup;
MethodCallNode() { method_lookup = this.getFunction().getALocalSource() }
/**
* Gets the name of the method being invoked (the `bar` in `foo.bar(...)`) if it can be determined.
*
* Note that this method may have multiple results if a single call node represents calls to
* multiple different objects and methods. If you want to link up objects and method names
* accurately, use the `calls` method instead.
*/
string getMethodName() { result = method_lookup.getAttributeName() }
/**
* Gets the data-flow node corresponding to the object receiving this call. That is, the `foo` in
* `foo.bar(...)`.
*
* Note that this method may have multiple results if a single call node represents calls to
* multiple different objects and methods. If you want to link up objects and method names
* accurately, use the `calls` method instead.
*/
Node getObject() { result = method_lookup.getObject() }
/** Holds if this data-flow node calls method `methodName` on the object node `object`. */
predicate calls(Node object, string methodName) {
// As `getObject` and `getMethodName` may both have multiple results, we must look up the object
// and method name directly on `method_lookup`.
object = method_lookup.getObject() and
methodName = method_lookup.getAttributeName()
}
}
/**
* An expression, viewed as a node in a data flow graph.
*

View File

@@ -18,6 +18,10 @@ predicate localFlowStep(Node nodeFrom, Node nodeTo) { simpleLocalFlowStep(nodeFr
predicate localFlow(Node source, Node sink) { localFlowStep*(source, sink) }
/**
* DEPRECATED. Use the API graphs library (`semmle.python.ApiGraphs`) instead.
*
* For a drop-in replacement, use `API::moduleImport(name).getAUse()`.
*
* Gets a `Node` that refers to the module referenced by `name`.
* Note that for the statement `import pkg.mod`, the new variable introduced is `pkg` that is a
* reference to the module `pkg`.
@@ -37,7 +41,7 @@ predicate localFlow(Node source, Node sink) { localFlowStep*(source, sink) }
* `mypkg/foo.py` but the variable `foo` containing `42` -- however, `import mypkg.foo` will always cause `mypkg.foo`
* to refer to the module.
*/
Node importNode(string name) {
deprecated Node importNode(string name) {
exists(Variable var, Import imp, Alias alias |
alias = imp.getAName() and
alias.getAsname() = var.getAStore() and

View File

@@ -33,15 +33,20 @@ private import DataFlowPrivate
class LocalSourceNode extends Node {
cached
LocalSourceNode() {
not simpleLocalFlowStep(_, this) and
// Currently, we create synthetic post-update nodes for
// - arguments to calls that may modify said argument
// - direct reads a writes of object attributes
// Both of these preserve the identity of the underlying pointer, and hence we exclude these as
// local source nodes.
// We do, however, allow the post-update nodes that arise from object creation (which are non-synthetic).
not this instanceof SyntheticPostUpdateNode
this instanceof ExprNode and
not simpleLocalFlowStep(_, this)
or
// We include all module variable nodes, as these act as stepping stones between writes and
// reads of global variables. Without them, type tracking based on `LocalSourceNode`s would be
// unable to track across global variables.
//
// Once the `track` and `backtrack` methods have been fully deprecated, this disjunct can be
// removed, and the entire class can extend `ExprNode`. At that point, `TypeTrackingNode` should
// be used for type tracking instead of `LocalSourceNode`.
this instanceof ModuleVariableNode
or
// We explicitly include any read of a global variable, as some of these may have local flow going
// into them.
this = any(ModuleVariableNode mvn).getARead()
}
@@ -59,6 +64,11 @@ class LocalSourceNode extends Node {
*/
AttrRead getAnAttributeRead(string attrName) { result = getAnAttributeReference(attrName) }
/**
* Gets a write of attribute `attrName` on this node.
*/
AttrWrite getAnAttributeWrite(string attrName) { result = getAnAttributeReference(attrName) }
/**
* Gets a reference (read or write) of any attribute on this node.
*/
@@ -73,11 +83,26 @@ class LocalSourceNode extends Node {
*/
AttrRead getAnAttributeRead() { result = getAnAttributeReference() }
/**
* Gets a write of any attribute on this node.
*/
AttrWrite getAnAttributeWrite() { result = getAnAttributeReference() }
/**
* Gets a call to this node.
*/
CallCfgNode getACall() { Cached::call(this, result) }
/**
* Gets a call to the method `methodName` on this node.
*
* Includes both calls that have the syntactic shape of a method call (as in `obj.m(...)`), and
* calls where the callee undergoes some additional local data flow (as in `tmp = obj.m; m(...)`).
*/
MethodCallNode getAMethodCall(string methodName) {
result = this.getAnAttributeRead(methodName).getACall()
}
/**
* Gets a node that this node may flow to using one heap and/or interprocedural step.
*
@@ -95,6 +120,53 @@ class LocalSourceNode extends Node {
LocalSourceNode backtrack(TypeBackTracker t2, TypeBackTracker t) { t2 = t.step(result, this) }
}
/**
* A node that can be used for type tracking or type back-tracking.
*
* All steps made during type tracking should be between instances of this class.
*/
class TypeTrackingNode = LocalSourceNode;
/** Temporary holding ground for the `TypeTrackingNode` class. */
private module FutureWork {
class FutureTypeTrackingNode extends Node {
FutureTypeTrackingNode() {
this instanceof LocalSourceNode
or
this instanceof ModuleVariableNode
}
/**
* Holds if this node can flow to `nodeTo` in one or more local flow steps.
*
* For `ModuleVariableNode`s, the only "local" step is to the node itself.
* For `LocalSourceNode`s, this is the usual notion of local flow.
*/
pragma[inline]
predicate flowsTo(Node node) {
this instanceof ModuleVariableNode and this = node
or
this.(LocalSourceNode).flowsTo(node)
}
/**
* Gets a node that this node may flow to using one heap and/or interprocedural step.
*
* See `TypeTracker` for more details about how to use this.
*/
pragma[inline]
TypeTrackingNode track(TypeTracker t2, TypeTracker t) { t = t2.step(this, result) }
/**
* Gets a node that may flow into this one using one heap and/or interprocedural step.
*
* See `TypeBackTracker` for more details about how to use this.
*/
pragma[inline]
TypeTrackingNode backtrack(TypeBackTracker t2, TypeBackTracker t) { t2 = t.step(result, this) }
}
}
cached
private module Cached {
/**
@@ -107,11 +179,21 @@ private module Cached {
source = sink
or
exists(Node second |
simpleLocalFlowStep(source, second) and
simpleLocalFlowStep*(second, sink)
localSourceFlowStep(source, second) and
localSourceFlowStep*(second, sink)
)
}
/**
* Helper predicate for `hasLocalSource`. Removes any steps go to module variable reads, as these
* are already local source nodes in their own right.
*/
cached
private predicate localSourceFlowStep(Node nodeFrom, Node nodeTo) {
simpleLocalFlowStep(nodeFrom, nodeTo) and
not nodeTo = any(ModuleVariableNode v).getARead()
}
/**
* Holds if `base` flows to the base of `ref` and `ref` has attribute name `attr`.
*/

View File

@@ -0,0 +1,73 @@
/**
* INTERNAL: Do not use.
*
* Provides helper predicates for pretty-printing `DataFlow::Node`s.
*
* Since these have not been performance optimized, please only use them for
* debug-queries or in tests.
*/
private import python
private import semmle.python.dataflow.new.DataFlow
/**
* INTERNAL: Do not use.
*
* Gets the pretty-printed version of the Expr `e`.
*/
string prettyExpr(Expr e) {
not e instanceof Num and
not e instanceof StrConst and
not e instanceof Subscript and
not e instanceof Call and
not e instanceof Attribute and
result = e.toString()
or
result = e.(Num).getN()
or
result =
e.(StrConst).getPrefix() + e.(StrConst).getText() +
e.(StrConst).getPrefix().regexpReplaceAll("[a-zA-Z]+", "")
or
result = prettyExpr(e.(Subscript).getObject()) + "[" + prettyExpr(e.(Subscript).getIndex()) + "]"
or
(
if exists(e.(Call).getAnArg()) or exists(e.(Call).getANamedArg())
then result = prettyExpr(e.(Call).getFunc()) + "(..)"
else result = prettyExpr(e.(Call).getFunc()) + "()"
)
or
result = prettyExpr(e.(Attribute).getObject()) + "." + e.(Attribute).getName()
}
/**
* INTERNAL: Do not use.
*
* Gets the pretty-printed version of the DataFlow::Node `node`
*/
bindingset[node]
string prettyNode(DataFlow::Node node) {
if exists(node.asExpr()) then result = prettyExpr(node.asExpr()) else result = node.toString()
}
/**
* INTERNAL: Do not use.
*
* Gets the pretty-printed version of the DataFlow::Node `node`, that is suitable for use
* with `TestUtilities.InlineExpectationsTest` (that is, no spaces unless required).
*/
bindingset[node]
string prettyNodeForInlineTest(DataFlow::Node node) {
exists(node.asExpr()) and
result = prettyExpr(node.asExpr())
or
exists(Expr e | e = node.(DataFlow::PostUpdateNode).getPreUpdateNode().asExpr() |
// since PostUpdateNode both has space in the `[post <thing>]` annotation, and does
// not pretty print the pre-update node, we do custom handling of this.
result = "[post]" + prettyExpr(e)
)
or
not exists(node.asExpr()) and
not exists(Expr e | e = node.(DataFlow::PostUpdateNode).getPreUpdateNode().asExpr()) and
result = node.toString()
}

View File

@@ -1,7 +1,8 @@
private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.internal.DataFlowPrivate
private import semmle.python.dataflow.new.internal.DataFlowPrivate as DataFlowPrivate
private import semmle.python.dataflow.new.internal.TaintTrackingPublic
private import semmle.python.ApiGraphs
/**
* Holds if `node` should be a sanitizer in all global taint flow configurations
@@ -10,35 +11,52 @@ private import semmle.python.dataflow.new.internal.TaintTrackingPublic
predicate defaultTaintSanitizer(DataFlow::Node node) { none() }
/**
* Holds if the additional step from `nodeFrom` to `nodeTo` should be included in all
* global taint flow configurations.
* Holds if default `TaintTracking::Configuration`s should allow implicit reads
* of `c` at sinks and inputs to additional taint steps.
*/
predicate defaultAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
localAdditionalTaintStep(nodeFrom, nodeTo)
or
any(AdditionalTaintStep a).step(nodeFrom, nodeTo)
bindingset[node]
predicate defaultImplicitTaintRead(DataFlow::Node node, DataFlow::Content c) { none() }
private module Cached {
/**
* Holds if the additional step from `nodeFrom` to `nodeTo` should be included in all
* global taint flow configurations.
*/
cached
predicate defaultAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
localAdditionalTaintStep(nodeFrom, nodeTo)
or
any(AdditionalTaintStep a).step(nodeFrom, nodeTo)
}
/**
* Holds if taint can flow in one local step from `nodeFrom` to `nodeTo` excluding
* local data flow steps. That is, `nodeFrom` and `nodeTo` are likely to represent
* different objects.
*/
cached
predicate localAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
concatStep(nodeFrom, nodeTo)
or
subscriptStep(nodeFrom, nodeTo)
or
stringManipulation(nodeFrom, nodeTo)
or
containerStep(nodeFrom, nodeTo)
or
copyStep(nodeFrom, nodeTo)
or
DataFlowPrivate::forReadStep(nodeFrom, _, nodeTo)
or
DataFlowPrivate::iterableUnpackingReadStep(nodeFrom, _, nodeTo)
or
DataFlowPrivate::iterableUnpackingStoreStep(nodeFrom, _, nodeTo)
or
awaitStep(nodeFrom, nodeTo)
}
}
/**
* Holds if taint can flow in one local step from `nodeFrom` to `nodeTo` excluding
* local data flow steps. That is, `nodeFrom` and `nodeTo` are likely to represent
* different objects.
*/
predicate localAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
concatStep(nodeFrom, nodeTo)
or
subscriptStep(nodeFrom, nodeTo)
or
stringManipulation(nodeFrom, nodeTo)
or
containerStep(nodeFrom, nodeTo)
or
copyStep(nodeFrom, nodeTo)
or
forStep(nodeFrom, nodeTo)
or
unpackingAssignmentStep(nodeFrom, nodeTo)
}
import Cached
/**
* Holds if taint can flow from `nodeFrom` to `nodeTo` with a step related to concatenation.
@@ -69,13 +87,13 @@ predicate subscriptStep(DataFlow::CfgNode nodeFrom, DataFlow::CfgNode nodeTo) {
*/
predicate stringManipulation(DataFlow::CfgNode nodeFrom, DataFlow::CfgNode nodeTo) {
// transforming something tainted into a string will make the string tainted
exists(CallNode call | call = nodeTo.getNode() |
call.getFunction().(NameNode).getId() in ["str", "bytes", "unicode"] and
exists(DataFlow::CallCfgNode call | call = nodeTo |
(
nodeFrom.getNode() = call.getArg(0)
call = API::builtin(["str", "bytes", "unicode"]).getACall()
or
nodeFrom.getNode() = call.getArgByName("object")
)
call.getFunction().asCfgNode().(NameNode).getId() in ["str", "bytes", "unicode"]
) and
nodeFrom in [call.getArg(0), call.getArgByName("object")]
)
or
// String methods. Note that this doesn't recognize `meth = "foo".upper; meth()`
@@ -142,39 +160,37 @@ predicate stringManipulation(DataFlow::CfgNode nodeFrom, DataFlow::CfgNode nodeT
predicate containerStep(DataFlow::CfgNode nodeFrom, DataFlow::Node nodeTo) {
// construction by literal
// TODO: Not limiting the content argument here feels like a BIG hack, but we currently get nothing for free :|
storeStep(nodeFrom, _, nodeTo)
DataFlowPrivate::storeStep(nodeFrom, _, nodeTo)
or
// constructor call
exists(CallNode call | call = nodeTo.asCfgNode() |
call.getFunction().(NameNode).getId() in [
"list", "set", "frozenset", "dict", "defaultdict", "tuple"
] and
call.getArg(0) = nodeFrom.getNode()
exists(DataFlow::CallCfgNode call | call = nodeTo |
call = API::builtin(["list", "set", "frozenset", "dict", "tuple"]).getACall() and
call.getArg(0) = nodeFrom
// TODO: Properly handle defaultdict/namedtuple
)
or
// functions operating on collections
exists(CallNode call | call = nodeTo.asCfgNode() |
call.getFunction().(NameNode).getId() in ["sorted", "reversed", "iter", "next"] and
call.getArg(0) = nodeFrom.getNode()
exists(DataFlow::CallCfgNode call | call = nodeTo |
call = API::builtin(["sorted", "reversed", "iter", "next"]).getACall() and
call.getArg(0) = nodeFrom
)
or
// methods
exists(CallNode call, string name | call = nodeTo.asCfgNode() |
name in [
exists(DataFlow::MethodCallNode call, string methodName | call = nodeTo |
methodName in [
// general
"copy", "pop",
// dict
"values", "items", "get", "popitem"
] and
call.getFunction().(AttrNode).getObject(name) = nodeFrom.asCfgNode()
call.calls(nodeFrom, methodName)
)
or
// list.append, set.add
exists(CallNode call, string name |
name in ["append", "add"] and
call.getFunction().(AttrNode).getObject(name) =
nodeTo.(DataFlow::PostUpdateNode).getPreUpdateNode().asCfgNode() and
call.getArg(0) = nodeFrom.getNode()
exists(DataFlow::MethodCallNode call, DataFlow::Node obj |
call.calls(obj, ["append", "add"]) and
obj = nodeTo.(DataFlow::PostUpdateNode).getPreUpdateNode() and
call.getArg(0) = nodeFrom
)
}
@@ -182,38 +198,16 @@ predicate containerStep(DataFlow::CfgNode nodeFrom, DataFlow::Node nodeTo) {
* Holds if taint can flow from `nodeFrom` to `nodeTo` with a step related to copying.
*/
predicate copyStep(DataFlow::CfgNode nodeFrom, DataFlow::CfgNode nodeTo) {
exists(CallNode call | call = nodeTo.getNode() |
// Fully qualified: copy.copy, copy.deepcopy
(
call.getFunction().(NameNode).getId() in ["copy", "deepcopy"]
or
call.getFunction().(AttrNode).getObject(["copy", "deepcopy"]).(NameNode).getId() = "copy"
) and
call.getArg(0) = nodeFrom.getNode()
exists(DataFlow::CallCfgNode call | call = nodeTo |
call = API::moduleImport("copy").getMember(["copy", "deepcopy"]).getACall() and
call.getArg(0) = nodeFrom
)
}
/**
* Holds if taint can flow from `nodeFrom` to `nodeTo` with a step related to `for`-iteration,
* for example `for x in xs`, or `for x,y in points`.
* Holds if taint can flow from `nodeFrom` to `nodeTo` with an `await`-step,
* such that the whole expression `await x` is tainted if `x` is tainted.
*/
predicate forStep(DataFlow::CfgNode nodeFrom, DataFlow::EssaNode nodeTo) {
exists(EssaNodeDefinition defn, For for |
for.getTarget().getAChildNode*() = defn.getDefiningNode().getNode() and
nodeTo.getVar() = defn and
nodeFrom.asExpr() = for.getIter()
)
}
/**
* Holds if taint can flow from `nodeFrom` to `nodeTo` with a step related to iterable unpacking.
* Only handles normal assignment (`x,y = calc_point()`), since `for x,y in points` is handled by `forStep`.
*/
predicate unpackingAssignmentStep(DataFlow::CfgNode nodeFrom, DataFlow::EssaNode nodeTo) {
// `a, b = myiterable` or `head, *tail = myiterable` (only Python 3)
exists(MultiAssignmentDefinition defn, Assign assign |
assign.getATarget().contains(defn.getDefiningNode().getNode()) and
nodeTo.getVar() = defn and
nodeFrom.asExpr() = assign.getValue()
)
predicate awaitStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
nodeTo.asExpr().(Await).getValue() = nodeFrom.asExpr()
}

Some files were not shown because too many files have changed in this diff Show More