mirror of
https://github.com/github/codeql.git
synced 2026-05-01 11:45:14 +02:00
Merge branch 'main' into jorgectf/python/deserialization
This commit is contained in:
100
python/.vscode/ql.code-snippets
vendored
100
python/.vscode/ql.code-snippets
vendored
@@ -106,7 +106,7 @@
|
||||
"prefix": "type tracking",
|
||||
"body": [
|
||||
"/** Gets a reference to ${3:a thing}. */",
|
||||
"private DataFlow::LocalSourceNode ${1:myType}(DataFlow::TypeTracker t) {",
|
||||
"private DataFlow::TypeTrackingNode ${1:myType}(DataFlow::TypeTracker t) {",
|
||||
" t.start() and",
|
||||
" result = ${2:value}",
|
||||
" or",
|
||||
@@ -152,4 +152,102 @@
|
||||
]
|
||||
},
|
||||
|
||||
"Type tracking class": {
|
||||
"scope": "ql",
|
||||
"prefix": "type tracking class",
|
||||
"body": [
|
||||
"/**",
|
||||
" * Provides models for the `${TM_SELECTED_TEXT}` class",
|
||||
" *",
|
||||
" * See ${1:https://apiref (TODO)}.",
|
||||
" */",
|
||||
"module ${TM_SELECTED_TEXT/^(.*)\\.([^.]+)$/$2/} {",
|
||||
" /** Gets a reference to the `${TM_SELECTED_TEXT}` class. */",
|
||||
" private API::Node classRef() {",
|
||||
" result = API::moduleImport(\"${TM_SELECTED_TEXT/\\.([^.]+)/\").getMember(\"$1/g}\")",
|
||||
" }",
|
||||
"",
|
||||
" /**",
|
||||
" * A source of instances of `${TM_SELECTED_TEXT}`, extend this class to model new instances.",
|
||||
" *",
|
||||
" * This can include instantiations of the class, return values from function",
|
||||
" * calls, or a special parameter that will be set when functions are called by an external",
|
||||
" * library.",
|
||||
" *",
|
||||
" * Use the predicate `${TM_SELECTED_TEXT/^(.*)\\.([^.]+)$/$2/}::instance()` to get references to instances of `${TM_SELECTED_TEXT}`.",
|
||||
" */",
|
||||
" abstract class InstanceSource extends DataFlow::LocalSourceNode { }",
|
||||
"",
|
||||
" /** A direct instantiation of `${TM_SELECTED_TEXT}`. */",
|
||||
" private class ClassInstantiation extends InstanceSource, DataFlow::CallCfgNode {",
|
||||
" ClassInstantiation() { this = classRef().getACall() }",
|
||||
" }",
|
||||
"",
|
||||
" /** Gets a reference to an instance of `${TM_SELECTED_TEXT}`. */",
|
||||
" private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t) {",
|
||||
" t.start() and",
|
||||
" result instanceof InstanceSource",
|
||||
" or",
|
||||
" exists(DataFlow::TypeTracker t2 | result = instance(t2).track(t2, t))",
|
||||
" }",
|
||||
"",
|
||||
" /** Gets a reference to an instance of `${TM_SELECTED_TEXT}`. */",
|
||||
" DataFlow::Node instance() { instance(DataFlow::TypeTracker::end()).flowsTo(result) }",
|
||||
"",
|
||||
" /**",
|
||||
" * Taint propagation for `${TM_SELECTED_TEXT}`.",
|
||||
" */",
|
||||
" private class InstanceTaintSteps extends InstanceTaintStepsHelper {",
|
||||
" InstanceTaintSteps() { this = \"${TM_SELECTED_TEXT}\" }",
|
||||
" ",
|
||||
" override DataFlow::Node getInstance() { result = instance() }",
|
||||
" ",
|
||||
" override string getAttributeName() { none() }",
|
||||
" ",
|
||||
" override string getMethodName() { none() }",
|
||||
" ",
|
||||
" override string getAsyncMethodName() { none() }",
|
||||
" }",
|
||||
"",
|
||||
" /**",
|
||||
" * Extra taint propagation for `${TM_SELECTED_TEXT}`, not covered by `InstanceTaintSteps`.",
|
||||
" */",
|
||||
" private class AdditionalTaintStep extends TaintTracking::AdditionalTaintStep {",
|
||||
" override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {",
|
||||
" // TODO",
|
||||
" none()",
|
||||
" }",
|
||||
" }",
|
||||
"}",
|
||||
],
|
||||
"description": "Type tracking class (select full class path before inserting)",
|
||||
},
|
||||
"foo": {
|
||||
"scope": "ql",
|
||||
"prefix": "foo",
|
||||
"body": [
|
||||
" /**",
|
||||
" * Taint propagation for `$1`.",
|
||||
" */",
|
||||
" private class InstanceTaintSteps extends InstanceTaintStepsHelper {",
|
||||
" InstanceTaintSteps() { this = \"$1\" }",
|
||||
"",
|
||||
" override DataFlow::Node getInstance() { result = instance() }",
|
||||
"",
|
||||
" override string getAttributeName() { none() }",
|
||||
"",
|
||||
" override string getMethodName() { none() }",
|
||||
"",
|
||||
" override string getAsyncMethodName() { none() }",
|
||||
" }",
|
||||
],
|
||||
},
|
||||
"API graph .getMember chain": {
|
||||
"scope": "ql",
|
||||
"prefix": "api graph .getMember chain",
|
||||
"body": [
|
||||
"API::moduleImport(\"${TM_SELECTED_TEXT/\\.([^.]+)/\").getMember(\"$1/g}\")"
|
||||
],
|
||||
"description": "API graph .getMember chain (select full path before inserting)",
|
||||
},
|
||||
}
|
||||
|
||||
@@ -0,0 +1,2 @@
|
||||
lgtm,codescanning
|
||||
* Updated the _Use of a broken or weak cryptographic algorithm_ (`py/weak-cryptographic-algorithm`) query, so it alerts on any use of a weak cryptographic non-hashing algorithm. Introduced a new query _Use of a broken or weak cryptographic hashing algorithm on sensitive data_ (`py/weak-sensitive-data-hashing`) to handle weak cryptographic hashing algorithms, which only alerts when used on sensitive data.
|
||||
2
python/change-notes/2021-05-10-idna-add-modeling.md
Normal file
2
python/change-notes/2021-05-10-idna-add-modeling.md
Normal file
@@ -0,0 +1,2 @@
|
||||
lgtm,codescanning
|
||||
* Added modeling of the PyPI package `idna`, for encoding/decoding Internationalised Domain Names in Applications.
|
||||
@@ -0,0 +1,2 @@
|
||||
lgtm,codescanning
|
||||
* Added modeling of the PyPI package `simplejson`.
|
||||
2
python/change-notes/2021-05-10-ujson-add-modeling.md
Normal file
2
python/change-notes/2021-05-10-ujson-add-modeling.md
Normal file
@@ -0,0 +1,2 @@
|
||||
lgtm,codescanning
|
||||
* Added modeling of the PyPI package `ujson`.
|
||||
2
python/change-notes/2021-05-21-api-graph-await.md
Normal file
2
python/change-notes/2021-05-21-api-graph-await.md
Normal file
@@ -0,0 +1,2 @@
|
||||
lgtm,codescanning
|
||||
* API graph nodes now contain a `getAwaited()` member predicate, for getting the result of awaiting an item, such as `await foo`.
|
||||
@@ -0,0 +1,2 @@
|
||||
lgtm,codescanning
|
||||
* Added model of SQL execution in `clickhouse-driver` and `aioch` PyPI packages, resulting in additional sinks for the SQL Injection query (`py/sql-injection`). This modeling was originally [submitted as a contribution by @japroc](https://github.com/github/codeql/pull/5889).
|
||||
@@ -0,0 +1,2 @@
|
||||
lgtm,codescanning
|
||||
* Added modeling of sources/sinks when using the `aiohttp.web` web framework.
|
||||
@@ -0,0 +1,2 @@
|
||||
lgtm,codescanning
|
||||
* Expanded modeling of sensitive data sources to include: subscripting with a key that indicates sensitive data (`obj["password"]`), parameters whose names indicate sensitive data (`def func(password):`), and assignments to variables whose names indicate sensitive data (`password = ...`).
|
||||
2
python/change-notes/2021-06-08-twisted-add-modeling.md
Normal file
2
python/change-notes/2021-06-08-twisted-add-modeling.md
Normal file
@@ -0,0 +1,2 @@
|
||||
lgtm,codescanning
|
||||
* Added modeling of sources/sinks when using `twisted` to create web servers.
|
||||
2
python/change-notes/2021-06-09-add-jmespath-modeling.md
Normal file
2
python/change-notes/2021-06-09-add-jmespath-modeling.md
Normal file
@@ -0,0 +1,2 @@
|
||||
lgtm,codescanning
|
||||
* Added modeling of the PyPI package `jmespath`.
|
||||
2
python/change-notes/2021-06-09-rsa-add-modeling.md
Normal file
2
python/change-notes/2021-06-09-rsa-add-modeling.md
Normal file
@@ -0,0 +1,2 @@
|
||||
lgtm,codescanning
|
||||
* Added modeling of the PyPI package `rsa`.
|
||||
@@ -0,0 +1,5 @@
|
||||
lgtm,codescanning
|
||||
* A new class `DataFlow::MethodCallNode` extends `DataFlow::CallCfgNode` with convenient methods for
|
||||
accessing the receiver and method name of a method call.
|
||||
* The `LocalSourceNode` class now has a `getAMethodCall` method, with which one can easily access
|
||||
method calls with the given node as a receiver.
|
||||
@@ -0,0 +1,2 @@
|
||||
lgtm,codescanning
|
||||
* Added modeling of the PyPI package `MarkupSafe`.
|
||||
@@ -0,0 +1,2 @@
|
||||
lgtm,codescanning
|
||||
* Added `HTTP::Server::CookieWrite` concept for statements that sets a cookie in an HTTP response, along with modeling of this in supported web frameworks (aiohttp/flask/django/tornado/twisted).
|
||||
@@ -0,0 +1,2 @@
|
||||
lgtm,codescanning
|
||||
* The DataFlow libraries have been augmented with support for `Configuration`-specific in-place read steps at, for example, sinks and custom taint steps. This means that it is now possible to specify sinks that accept flow with non-empty access paths.
|
||||
2
python/change-notes/2021-06-25-add-peewee-modeling.md
Normal file
2
python/change-notes/2021-06-25-add-peewee-modeling.md
Normal file
@@ -0,0 +1,2 @@
|
||||
lgtm,codescanning
|
||||
* Added modeling of raw SQL execution from the PyPI package `peewee`.
|
||||
2
python/change-notes/2021-07-12-add-typetrackingnode.md
Normal file
2
python/change-notes/2021-07-12-add-typetrackingnode.md
Normal file
@@ -0,0 +1,2 @@
|
||||
lgtm,codescanning
|
||||
* The `track` and `backtrack` methods on `LocalSourceNode` are in the process of being deprecated. When using type trackers, the corresponding methods on `TypeTrackingNode` should be used instead.
|
||||
@@ -0,0 +1,2 @@
|
||||
lgtm,codescanning
|
||||
* Changed the way to provide extra sources/sinks for `@kind path-problem` queries, to avoid a potential performance problem due to re-evaluation of data-flow configurations. Please use the new `<query>Customization.qll` files and extend their classes instead (such as extending the `Sink` class from `python/ql/src/semmle/python/security/dataflow/SqlInjectionCustomizations.qll`). This is relevant for the queries: `py/sql-injection`, `py/code-injection`, `py/command-line-injection`, `py/reflective-xss`, `py/url-redirection`, `py/unsafe-deserialization`, `py/stack-trace-exposure`, `py/path-injection`.
|
||||
4
python/change-notes/2021-07-16-deprecate-importnode.md
Normal file
4
python/change-notes/2021-07-16-deprecate-importnode.md
Normal file
@@ -0,0 +1,4 @@
|
||||
lgtm,codescanning
|
||||
* The `importNode` predicate from the data-flow library has been deprecated. In its place, we
|
||||
recommend using the API graphs library, accessible via `import semmle.python.ApiGraphs`.
|
||||
|
||||
3
python/change-notes/2021-07-28-port-RoDoS-queries.md
Normal file
3
python/change-notes/2021-07-28-port-RoDoS-queries.md
Normal file
@@ -0,0 +1,3 @@
|
||||
lgtm,codescanning
|
||||
* Added an experimental _Inefficient regular expression_ (`py/redos`) query, which is already available in JavaScript.
|
||||
* Added an experimental _Polynomial regular expression used on uncontrolled data_ (`py/polynomial-redos`), which is already available in JavaScript.
|
||||
@@ -0,0 +1,2 @@
|
||||
lgtm,codescanning
|
||||
* Updated _Modification of parameter with default_ (`py/modification-of-default-value`) query to use the new data flow library instead of the old taint tracking library and to remove the use of points-to analysis. You may see differences in the results found by the query, but overall this change should result in a more robust and accurate analysis.
|
||||
@@ -0,0 +1,2 @@
|
||||
lgtm,codescanning
|
||||
* Added modeling of SQL execution in the `Flask-SQLAlchemy` PyPI package, resulting in additional sinks for the SQL Injection query (`py/sql-injection`).
|
||||
@@ -0,0 +1,2 @@
|
||||
lgtm,codescanning
|
||||
* Added modeling of SQL execution in the `SQLAlchemy` PyPI package, resulting in additional sinks for the SQL Injection query (`py/sql-injection`). This modeling was originally [submitted as a contribution by @mrthankyou](https://github.com/github/codeql/pull/5680).
|
||||
@@ -0,0 +1,2 @@
|
||||
lgtm,codescanning
|
||||
* Expanded the query _SQL query built from user-controlled sources_ (`py/sql-injection`) to alert if user-input is added to a TextClause from SQLAlchemy, since that can lead to SQL injection.
|
||||
@@ -0,0 +1,2 @@
|
||||
lgtm,codescanning
|
||||
* Function parameters with default values will now see flow from those values.
|
||||
4
python/ql/examples/qlpack.lock.yml
Normal file
4
python/ql/examples/qlpack.lock.yml
Normal file
@@ -0,0 +1,4 @@
|
||||
---
|
||||
dependencies: {}
|
||||
compiled: false
|
||||
lockVersion: 1.0.0
|
||||
@@ -1,3 +1,4 @@
|
||||
name: codeql-python-examples
|
||||
version: 0.0.0
|
||||
libraryPathDependencies: codeql-python
|
||||
name: codeql/python-examples
|
||||
version: 0.0.2
|
||||
dependencies:
|
||||
codeql/python-all: "*"
|
||||
|
||||
4
python/ql/lib/qlpack.lock.yml
Normal file
4
python/ql/lib/qlpack.lock.yml
Normal file
@@ -0,0 +1,4 @@
|
||||
---
|
||||
dependencies: {}
|
||||
compiled: false
|
||||
lockVersion: 1.0.0
|
||||
7
python/ql/lib/qlpack.yml
Normal file
7
python/ql/lib/qlpack.yml
Normal file
@@ -0,0 +1,7 @@
|
||||
name: codeql/python-all
|
||||
version: 0.0.2
|
||||
dbscheme: semmlecode.python.dbscheme
|
||||
extractor: python
|
||||
library: true
|
||||
dependencies:
|
||||
codeql/python-upgrades: 0.0.2
|
||||
3
python/ql/lib/semmle/crypto/Crypto.qll
Normal file
3
python/ql/lib/semmle/crypto/Crypto.qll
Normal file
@@ -0,0 +1,3 @@
|
||||
/** DEPRECATED: Use `semmle.python.concepts.CryptoAlgorithms` instead. */
|
||||
|
||||
import semmle.python.concepts.CryptoAlgorithms
|
||||
@@ -97,6 +97,11 @@ module API {
|
||||
*/
|
||||
Node getASubclass() { result = getASuccessor(Label::subclass()) }
|
||||
|
||||
/**
|
||||
* Gets a node representing the result from awaiting this node.
|
||||
*/
|
||||
Node getAwaited() { result = getASuccessor(Label::await()) }
|
||||
|
||||
/**
|
||||
* Gets a string representation of the lexicographically least among all shortest access paths
|
||||
* from the root to this node.
|
||||
@@ -349,22 +354,135 @@ module API {
|
||||
)
|
||||
}
|
||||
|
||||
private import semmle.python.types.Builtins as Builtins
|
||||
/** Gets the name of a known built-in. */
|
||||
private string getBuiltInName() {
|
||||
// These lists were created by inspecting the `builtins` and `__builtin__` modules in
|
||||
// Python 3 and 2 respectively, using the `dir` built-in.
|
||||
// Built-in functions and exceptions shared between Python 2 and 3
|
||||
result in [
|
||||
"abs", "all", "any", "bin", "bool", "bytearray", "callable", "chr", "classmethod",
|
||||
"compile", "complex", "delattr", "dict", "dir", "divmod", "enumerate", "eval", "filter",
|
||||
"float", "format", "frozenset", "getattr", "globals", "hasattr", "hash", "help", "hex",
|
||||
"id", "input", "int", "isinstance", "issubclass", "iter", "len", "list", "locals", "map",
|
||||
"max", "memoryview", "min", "next", "object", "oct", "open", "ord", "pow", "print",
|
||||
"property", "range", "repr", "reversed", "round", "set", "setattr", "slice", "sorted",
|
||||
"staticmethod", "str", "sum", "super", "tuple", "type", "vars", "zip", "__import__",
|
||||
// Exceptions
|
||||
"ArithmeticError", "AssertionError", "AttributeError", "BaseException", "BufferError",
|
||||
"BytesWarning", "DeprecationWarning", "EOFError", "EnvironmentError", "Exception",
|
||||
"FloatingPointError", "FutureWarning", "GeneratorExit", "IOError", "ImportError",
|
||||
"ImportWarning", "IndentationError", "IndexError", "KeyError", "KeyboardInterrupt",
|
||||
"LookupError", "MemoryError", "NameError", "NotImplemented", "NotImplementedError",
|
||||
"OSError", "OverflowError", "PendingDeprecationWarning", "ReferenceError", "RuntimeError",
|
||||
"RuntimeWarning", "StandardError", "StopIteration", "SyntaxError", "SyntaxWarning",
|
||||
"SystemError", "SystemExit", "TabError", "TypeError", "UnboundLocalError",
|
||||
"UnicodeDecodeError", "UnicodeEncodeError", "UnicodeError", "UnicodeTranslateError",
|
||||
"UnicodeWarning", "UserWarning", "ValueError", "Warning", "ZeroDivisionError",
|
||||
// Added for compatibility
|
||||
"exec"
|
||||
]
|
||||
or
|
||||
// Built-in constants shared between Python 2 and 3
|
||||
result in ["False", "True", "None", "NotImplemented", "Ellipsis", "__debug__"]
|
||||
or
|
||||
// Python 3 only
|
||||
result in [
|
||||
"ascii", "breakpoint", "bytes", "exec",
|
||||
// Exceptions
|
||||
"BlockingIOError", "BrokenPipeError", "ChildProcessError", "ConnectionAbortedError",
|
||||
"ConnectionError", "ConnectionRefusedError", "ConnectionResetError", "FileExistsError",
|
||||
"FileNotFoundError", "InterruptedError", "IsADirectoryError", "ModuleNotFoundError",
|
||||
"NotADirectoryError", "PermissionError", "ProcessLookupError", "RecursionError",
|
||||
"ResourceWarning", "StopAsyncIteration", "TimeoutError"
|
||||
]
|
||||
or
|
||||
// Python 2 only
|
||||
result in [
|
||||
"basestring", "cmp", "execfile", "file", "long", "raw_input", "reduce", "reload",
|
||||
"unichr", "unicode", "xrange"
|
||||
]
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a data flow node that is likely to refer to a built-in with the name `name`.
|
||||
*
|
||||
* Currently this is an over-approximation, and does not account for things like overwriting a
|
||||
* Currently this is an over-approximation, and may not account for things like overwriting a
|
||||
* built-in with a different value.
|
||||
*/
|
||||
private DataFlow::Node likely_builtin(string name) {
|
||||
result.asCfgNode() =
|
||||
any(NameNode n |
|
||||
n.isGlobal() and
|
||||
n.isLoad() and
|
||||
name = n.getId() and
|
||||
name in [any(Builtins::Builtin b).getName(), "None", "True", "False"]
|
||||
)
|
||||
exists(Module m |
|
||||
result.asCfgNode() =
|
||||
any(NameNode n |
|
||||
possible_builtin_accessed_in_module(n, name, m) and
|
||||
not possible_builtin_defined_in_module(name, m)
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if a global variable called `name` (which is also the name of a built-in) is assigned
|
||||
* a value in the module `m`.
|
||||
*/
|
||||
private predicate possible_builtin_defined_in_module(string name, Module m) {
|
||||
global_name_defined_in_module(name, m) and
|
||||
name = getBuiltInName()
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `n` is an access of a global variable called `name` (which is also the name of a
|
||||
* built-in) inside the module `m`.
|
||||
*/
|
||||
private predicate possible_builtin_accessed_in_module(NameNode n, string name, Module m) {
|
||||
n.isGlobal() and
|
||||
n.isLoad() and
|
||||
name = n.getId() and
|
||||
name = getBuiltInName() and
|
||||
m = n.getEnclosingModule()
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `n` is an access of a variable called `name` (which is _not_ the name of a
|
||||
* built-in, and which is _not_ a global defined in the enclosing module) inside the scope `s`.
|
||||
*/
|
||||
private predicate name_possibly_defined_in_import_star(NameNode n, string name, Scope s) {
|
||||
n.isLoad() and
|
||||
name = n.getId() and
|
||||
// Not already defined in an enclosing scope.
|
||||
not exists(LocalVariable v |
|
||||
v.getId() = name and v.getScope() = n.getScope().getEnclosingScope*()
|
||||
) and
|
||||
not name = getBuiltInName() and
|
||||
s = n.getScope().getEnclosingScope*() and
|
||||
exists(potential_import_star_base(s)) and
|
||||
not global_name_defined_in_module(name, n.getEnclosingModule())
|
||||
}
|
||||
|
||||
/** Holds if a global variable called `name` is assigned a value in the module `m`. */
|
||||
private predicate global_name_defined_in_module(string name, Module m) {
|
||||
exists(NameNode n |
|
||||
not exists(LocalVariable v | n.defines(v)) and
|
||||
n.isStore() and
|
||||
name = n.getId() and
|
||||
m = n.getEnclosingModule()
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the API graph node for all modules imported with `from ... import *` inside the scope `s`.
|
||||
*
|
||||
* For example, given
|
||||
*
|
||||
* `from foo.bar import *`
|
||||
*
|
||||
* this would be the API graph node with the path
|
||||
*
|
||||
* `moduleImport("foo").getMember("bar")`
|
||||
*/
|
||||
private TApiNode potential_import_star_base(Scope s) {
|
||||
exists(DataFlow::Node ref |
|
||||
ref.asCfgNode() = any(ImportStarNode n | n.getScope() = s).getModule() and
|
||||
use(result, ref)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -396,11 +514,28 @@ module API {
|
||||
exists(DataFlow::Node superclass | pred.flowsTo(superclass) |
|
||||
ref.asExpr().(ClassExpr).getABase() = superclass.asExpr()
|
||||
)
|
||||
or
|
||||
// awaiting
|
||||
exists(Await await, DataFlow::Node awaitedValue |
|
||||
lbl = Label::await() and
|
||||
ref.asExpr() = await and
|
||||
await.getValue() = awaitedValue.asExpr() and
|
||||
pred.flowsTo(awaitedValue)
|
||||
)
|
||||
)
|
||||
or
|
||||
// Built-ins, treated as members of the module `builtins`
|
||||
base = MkModuleImport("builtins") and
|
||||
lbl = Label::member(any(string name | ref = likely_builtin(name)))
|
||||
or
|
||||
// Unknown variables that may belong to a module imported with `import *`
|
||||
exists(Scope s |
|
||||
base = potential_import_star_base(s) and
|
||||
lbl =
|
||||
Label::member(any(string name |
|
||||
name_possibly_defined_in_import_star(ref.asCfgNode(), name, s)
|
||||
))
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -426,7 +561,7 @@ module API {
|
||||
*
|
||||
* The flow from `src` to that node may be inter-procedural.
|
||||
*/
|
||||
private DataFlow::LocalSourceNode trackUseNode(
|
||||
private DataFlow::TypeTrackingNode trackUseNode(
|
||||
DataFlow::LocalSourceNode src, DataFlow::TypeTracker t
|
||||
) {
|
||||
t.start() and
|
||||
@@ -444,7 +579,6 @@ module API {
|
||||
cached
|
||||
DataFlow::LocalSourceNode trackUseNode(DataFlow::LocalSourceNode src) {
|
||||
result = trackUseNode(src, DataFlow::TypeTracker::end()) and
|
||||
// We exclude module variable nodes, as these do not correspond to real uses.
|
||||
not result instanceof DataFlow::ModuleVariableNode
|
||||
}
|
||||
|
||||
@@ -512,5 +646,9 @@ private module Label {
|
||||
/** Gets the `return` edge label. */
|
||||
string return() { result = "getReturn()" }
|
||||
|
||||
/** Gets the `subclass` edge label. */
|
||||
string subclass() { result = "getASubclass()" }
|
||||
|
||||
/** Gets the `await` edge label. */
|
||||
string await() { result = "getAwaited()" }
|
||||
}
|
||||
@@ -49,12 +49,15 @@ abstract class AstNode extends AstNode_ {
|
||||
/** Whether this contains `inner` syntactically */
|
||||
predicate contains(AstNode inner) { this.getAChildNode+() = inner }
|
||||
|
||||
/** Whether this contains `inner` syntactically and `inner` has the same scope as `this` */
|
||||
predicate containsInScope(AstNode inner) {
|
||||
pragma[noinline]
|
||||
private predicate containsInScope(AstNode inner, Scope scope) {
|
||||
this.contains(inner) and
|
||||
this.getScope() = inner.getScope() and
|
||||
not inner instanceof Scope
|
||||
not inner instanceof Scope and
|
||||
scope = this.getScope()
|
||||
}
|
||||
|
||||
/** Whether this contains `inner` syntactically and `inner` has the same scope as `this` */
|
||||
predicate containsInScope(AstNode inner) { this.containsInScope(inner, inner.getScope()) }
|
||||
}
|
||||
|
||||
/* Parents */
|
||||
@@ -4,7 +4,7 @@
|
||||
* provide concrete subclasses.
|
||||
*/
|
||||
|
||||
import python
|
||||
private import python
|
||||
private import semmle.python.dataflow.new.DataFlow
|
||||
private import semmle.python.dataflow.new.RemoteFlowSources
|
||||
private import semmle.python.dataflow.new.TaintTracking
|
||||
@@ -72,6 +72,39 @@ module FileSystemAccess {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A data flow node that writes data to the file system access.
|
||||
*
|
||||
* Extend this class to refine existing API models. If you want to model new APIs,
|
||||
* extend `FileSystemWriteAccess::Range` instead.
|
||||
*/
|
||||
class FileSystemWriteAccess extends FileSystemAccess {
|
||||
override FileSystemWriteAccess::Range range;
|
||||
|
||||
/**
|
||||
* Gets a node that represents data to be written to the file system (possibly with
|
||||
* some transformation happening before it is written, like JSON encoding).
|
||||
*/
|
||||
DataFlow::Node getADataNode() { result = range.getADataNode() }
|
||||
}
|
||||
|
||||
/** Provides a class for modeling new file system writes. */
|
||||
module FileSystemWriteAccess {
|
||||
/**
|
||||
* A data flow node that writes data to the file system access.
|
||||
*
|
||||
* Extend this class to model new APIs. If you want to refine existing API models,
|
||||
* extend `FileSystemWriteAccess` instead.
|
||||
*/
|
||||
abstract class Range extends FileSystemAccess::Range {
|
||||
/**
|
||||
* Gets a node that represents data to be written to the file system (possibly with
|
||||
* some transformation happening before it is written, like JSON encoding).
|
||||
*/
|
||||
abstract DataFlow::Node getADataNode();
|
||||
}
|
||||
}
|
||||
|
||||
/** Provides classes for modeling path-related APIs. */
|
||||
module Path {
|
||||
/**
|
||||
@@ -235,6 +268,35 @@ private class EncodingAdditionalTaintStep extends TaintTracking::AdditionalTaint
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A data-flow node that logs data.
|
||||
*
|
||||
* Extend this class to refine existing API models. If you want to model new APIs,
|
||||
* extend `Logging::Range` instead.
|
||||
*/
|
||||
class Logging extends DataFlow::Node {
|
||||
Logging::Range range;
|
||||
|
||||
Logging() { this = range }
|
||||
|
||||
/** Gets an input that is logged. */
|
||||
DataFlow::Node getAnInput() { result = range.getAnInput() }
|
||||
}
|
||||
|
||||
/** Provides a class for modeling new logging mechanisms. */
|
||||
module Logging {
|
||||
/**
|
||||
* A data-flow node that logs data.
|
||||
*
|
||||
* Extend this class to model new APIs. If you want to refine existing API models,
|
||||
* extend `Logging` instead.
|
||||
*/
|
||||
abstract class Range extends DataFlow::Node {
|
||||
/** Gets an input that is logged. */
|
||||
abstract DataFlow::Node getAnInput();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A data-flow node that dynamically executes Python code.
|
||||
*
|
||||
@@ -293,6 +355,78 @@ module SqlExecution {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A data-flow node that escapes meta-characters, which could be used to prevent
|
||||
* injection attacks.
|
||||
*
|
||||
* Extend this class to refine existing API models. If you want to model new APIs,
|
||||
* extend `Escaping::Range` instead.
|
||||
*/
|
||||
class Escaping extends DataFlow::Node {
|
||||
Escaping::Range range;
|
||||
|
||||
Escaping() {
|
||||
this = range and
|
||||
// escapes that don't have _both_ input/output defined are not valid
|
||||
exists(range.getAnInput()) and
|
||||
exists(range.getOutput())
|
||||
}
|
||||
|
||||
/** Gets an input that will be escaped. */
|
||||
DataFlow::Node getAnInput() { result = range.getAnInput() }
|
||||
|
||||
/** Gets the output that contains the escaped data. */
|
||||
DataFlow::Node getOutput() { result = range.getOutput() }
|
||||
|
||||
/**
|
||||
* Gets the context that this function escapes for, such as `html`, or `url`.
|
||||
*/
|
||||
string getKind() { result = range.getKind() }
|
||||
}
|
||||
|
||||
/** Provides a class for modeling new escaping APIs. */
|
||||
module Escaping {
|
||||
/**
|
||||
* A data-flow node that escapes meta-characters, which could be used to prevent
|
||||
* injection attacks.
|
||||
*
|
||||
* Extend this class to model new APIs. If you want to refine existing API models,
|
||||
* extend `Escaping` instead.
|
||||
*/
|
||||
abstract class Range extends DataFlow::Node {
|
||||
/** Gets an input that will be escaped. */
|
||||
abstract DataFlow::Node getAnInput();
|
||||
|
||||
/** Gets the output that contains the escaped data. */
|
||||
abstract DataFlow::Node getOutput();
|
||||
|
||||
/**
|
||||
* Gets the context that this function escapes for.
|
||||
*
|
||||
* While kinds are represented as strings, this should not be relied upon. Use the
|
||||
* predicates in the `Escaping` module, such as `getHtmlKind`.
|
||||
*/
|
||||
abstract string getKind();
|
||||
}
|
||||
|
||||
/** Gets the escape-kind for escaping a string so it can safely be included in HTML. */
|
||||
string getHtmlKind() { result = "html" }
|
||||
// TODO: If adding an XML kind, update the modeling of the `MarkupSafe` PyPI package.
|
||||
//
|
||||
// Technically it claims to escape for both HTML and XML, but for now we don't have
|
||||
// anything that relies on XML escaping, so I'm going to defer deciding whether they
|
||||
// should be the same kind, or whether they deserve to be treated differently.
|
||||
}
|
||||
|
||||
/**
|
||||
* An escape of a string so it can be safely included in
|
||||
* the body of an HTML element, for example, replacing `{}` in
|
||||
* `<p>{}</p>`.
|
||||
*/
|
||||
class HtmlEscaping extends Escaping {
|
||||
HtmlEscaping() { range.getKind() = Escaping::getHtmlKind() }
|
||||
}
|
||||
|
||||
/** Provides classes for modeling HTTP-related APIs. */
|
||||
module HTTP {
|
||||
import semmle.python.web.HttpConstants
|
||||
@@ -345,7 +479,7 @@ module HTTP {
|
||||
/** Gets the URL pattern for this route, if it can be statically determined. */
|
||||
string getUrlPattern() {
|
||||
exists(StrConst str |
|
||||
DataFlow::exprNode(str).(DataFlow::LocalSourceNode).flowsTo(this.getUrlPatternArg()) and
|
||||
this.getUrlPatternArg().getALocalSource() = DataFlow::exprNode(str) and
|
||||
result = str.getText()
|
||||
)
|
||||
}
|
||||
@@ -478,9 +612,7 @@ module HTTP {
|
||||
/** Gets the mimetype of this HTTP response, if it can be statically determined. */
|
||||
string getMimetype() {
|
||||
exists(StrConst str |
|
||||
DataFlow::exprNode(str)
|
||||
.(DataFlow::LocalSourceNode)
|
||||
.flowsTo(this.getMimetypeOrContentTypeArg()) and
|
||||
this.getMimetypeOrContentTypeArg().getALocalSource() = DataFlow::exprNode(str) and
|
||||
result = str.getText().splitAt(";", 0)
|
||||
)
|
||||
or
|
||||
@@ -524,10 +656,73 @@ module HTTP {
|
||||
abstract DataFlow::Node getRedirectLocation();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A data-flow node that sets a cookie in an HTTP response.
|
||||
*
|
||||
* Extend this class to refine existing API models. If you want to model new APIs,
|
||||
* extend `HTTP::CookieWrite::Range` instead.
|
||||
*/
|
||||
class CookieWrite extends DataFlow::Node {
|
||||
CookieWrite::Range range;
|
||||
|
||||
CookieWrite() { this = range }
|
||||
|
||||
/**
|
||||
* Gets the argument, if any, specifying the raw cookie header.
|
||||
*/
|
||||
DataFlow::Node getHeaderArg() { result = range.getHeaderArg() }
|
||||
|
||||
/**
|
||||
* Gets the argument, if any, specifying the cookie name.
|
||||
*/
|
||||
DataFlow::Node getNameArg() { result = range.getNameArg() }
|
||||
|
||||
/**
|
||||
* Gets the argument, if any, specifying the cookie value.
|
||||
*/
|
||||
DataFlow::Node getValueArg() { result = range.getValueArg() }
|
||||
}
|
||||
|
||||
/** Provides a class for modeling new cookie writes on HTTP responses. */
|
||||
module CookieWrite {
|
||||
/**
|
||||
* A data-flow node that sets a cookie in an HTTP response.
|
||||
*
|
||||
* Note: we don't require that this redirect must be sent to a client (a kind of
|
||||
* "if a tree falls in a forest and nobody hears it" situation).
|
||||
*
|
||||
* Extend this class to model new APIs. If you want to refine existing API models,
|
||||
* extend `HttpResponse` instead.
|
||||
*/
|
||||
abstract class Range extends DataFlow::Node {
|
||||
/**
|
||||
* Gets the argument, if any, specifying the raw cookie header.
|
||||
*/
|
||||
abstract DataFlow::Node getHeaderArg();
|
||||
|
||||
/**
|
||||
* Gets the argument, if any, specifying the cookie name.
|
||||
*/
|
||||
abstract DataFlow::Node getNameArg();
|
||||
|
||||
/**
|
||||
* Gets the argument, if any, specifying the cookie value.
|
||||
*/
|
||||
abstract DataFlow::Node getValueArg();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** Provides models for cryptographic things. */
|
||||
/**
|
||||
* Provides models for cryptographic things.
|
||||
*
|
||||
* Note: The `CryptographicAlgorithm` class currently doesn't take weak keys into
|
||||
* consideration for the `isWeak` member predicate. So RSA is always considered
|
||||
* secure, although using a low number of bits will actually make it insecure. We plan
|
||||
* to improve our libraries in the future to more precisely capture this aspect.
|
||||
*/
|
||||
module Cryptography {
|
||||
/** Provides models for public-key cryptography, also called asymmetric cryptography. */
|
||||
module PublicKey {
|
||||
@@ -563,7 +758,7 @@ module Cryptography {
|
||||
/** Provides classes for modeling new key-pair generation APIs. */
|
||||
module KeyGeneration {
|
||||
/** Gets a back-reference to the keysize argument `arg` that was used to generate a new key-pair. */
|
||||
private DataFlow::LocalSourceNode keysizeBacktracker(
|
||||
private DataFlow::TypeTrackingNode keysizeBacktracker(
|
||||
DataFlow::TypeBackTracker t, DataFlow::Node arg
|
||||
) {
|
||||
t.start() and
|
||||
@@ -626,4 +821,43 @@ module Cryptography {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
import semmle.python.concepts.CryptoAlgorithms
|
||||
|
||||
/**
|
||||
* A data-flow node that is an application of a cryptographic algorithm. For example,
|
||||
* encryption, decryption, signature-validation.
|
||||
*
|
||||
* Extend this class to refine existing API models. If you want to model new APIs,
|
||||
* extend `CryptographicOperation::Range` instead.
|
||||
*/
|
||||
class CryptographicOperation extends DataFlow::Node {
|
||||
CryptographicOperation::Range range;
|
||||
|
||||
CryptographicOperation() { this = range }
|
||||
|
||||
/** Gets the algorithm used, if it matches a known `CryptographicAlgorithm`. */
|
||||
CryptographicAlgorithm getAlgorithm() { result = range.getAlgorithm() }
|
||||
|
||||
/** Gets an input the algorithm is used on, for example the plain text input to be encrypted. */
|
||||
DataFlow::Node getAnInput() { result = range.getAnInput() }
|
||||
}
|
||||
|
||||
/** Provides classes for modeling new applications of a cryptographic algorithms. */
|
||||
module CryptographicOperation {
|
||||
/**
|
||||
* A data-flow node that is an application of a cryptographic algorithm. For example,
|
||||
* encryption, decryption, signature-validation.
|
||||
*
|
||||
* Extend this class to model new APIs. If you want to refine existing API models,
|
||||
* extend `CryptographicOperation` instead.
|
||||
*/
|
||||
abstract class Range extends DataFlow::Node {
|
||||
/** Gets the algorithm used, if it matches a known `CryptographicAlgorithm`. */
|
||||
abstract CryptographicAlgorithm getAlgorithm();
|
||||
|
||||
/** Gets an input the algorithm is used on, for example the plain text input to be encrypted. */
|
||||
abstract DataFlow::Node getAnInput();
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,9 +1,7 @@
|
||||
import python
|
||||
|
||||
/** A file */
|
||||
class File extends Container {
|
||||
File() { files(this, _, _, _, _) }
|
||||
|
||||
class File extends Container, @file {
|
||||
/** DEPRECATED: Use `getAbsolutePath` instead. */
|
||||
deprecated override string getName() { result = this.getAbsolutePath() }
|
||||
|
||||
@@ -34,9 +32,7 @@ class File extends Container {
|
||||
}
|
||||
|
||||
/** Gets a short name for this file (just the file name) */
|
||||
string getShortName() {
|
||||
exists(string simple, string ext | files(this, _, simple, ext, _) | result = simple + ext)
|
||||
}
|
||||
string getShortName() { result = this.getBaseName() }
|
||||
|
||||
private int lastLine() {
|
||||
result = max(int i | exists(Location l | l.getFile() = this and l.getEndLine() = i))
|
||||
@@ -55,7 +51,7 @@ class File extends Container {
|
||||
)
|
||||
}
|
||||
|
||||
override string getAbsolutePath() { files(this, result, _, _, _) }
|
||||
override string getAbsolutePath() { files(this, result) }
|
||||
|
||||
/** Gets the URL of this file. */
|
||||
override string getURL() { result = "file://" + this.getAbsolutePath() + ":0:0:0:0" }
|
||||
@@ -89,7 +85,15 @@ class File extends Container {
|
||||
i.getTest().(Compare).compares(name, op, main) and
|
||||
name.getId() = "__name__" and
|
||||
main.getText() = "__main__"
|
||||
)
|
||||
) and
|
||||
// Exclude files named `__main__.py`. These are often _not_ meant to be run directly, but
|
||||
// contain this construct anyway.
|
||||
//
|
||||
// Their presence in a package (say, `foo`) means one can execute the package directly using
|
||||
// `python -m foo` (which will run the `foo/__main__.py` file). Since being an entry point for
|
||||
// execution means treating imports as absolute, this causes trouble, since when run with
|
||||
// `python -m`, the interpreter uses the usual package semantics.
|
||||
not this.getShortName() = "__main__.py"
|
||||
or
|
||||
// The file contains a `#!` line referencing the python interpreter
|
||||
exists(Comment c |
|
||||
@@ -110,15 +114,10 @@ private predicate occupied_line(File f, int n) {
|
||||
}
|
||||
|
||||
/** A folder (directory) */
|
||||
class Folder extends Container {
|
||||
Folder() { folders(this, _, _) }
|
||||
|
||||
class Folder extends Container, @folder {
|
||||
/** DEPRECATED: Use `getAbsolutePath` instead. */
|
||||
deprecated override string getName() { result = this.getAbsolutePath() }
|
||||
|
||||
/** DEPRECATED: Use `getBaseName` instead. */
|
||||
deprecated string getSimple() { folders(this, _, result) }
|
||||
|
||||
/**
|
||||
* Holds if this element is at the specified location.
|
||||
* The location spans column `startcolumn` of line `startline` to
|
||||
@@ -136,7 +135,7 @@ class Folder extends Container {
|
||||
endcolumn = 0
|
||||
}
|
||||
|
||||
override string getAbsolutePath() { folders(this, result, _) }
|
||||
override string getAbsolutePath() { folders(this, result) }
|
||||
|
||||
/** Gets the URL of this folder. */
|
||||
override string getURL() { result = "folder://" + this.getAbsolutePath() }
|
||||
@@ -653,6 +653,8 @@ class DefinitionNode extends ControlFlowNode {
|
||||
DefinitionNode() {
|
||||
exists(Assign a | a.getATarget().getAFlowNode() = this)
|
||||
or
|
||||
exists(AnnAssign a | a.getTarget().getAFlowNode() = this and exists(a.getValue()))
|
||||
or
|
||||
exists(Alias a | a.getAsname().getAFlowNode() = this)
|
||||
or
|
||||
augstore(_, this)
|
||||
@@ -795,6 +797,9 @@ private AstNode assigned_value(Expr lhs) {
|
||||
/* lhs = result */
|
||||
exists(Assign a | a.getATarget() = lhs and result = a.getValue())
|
||||
or
|
||||
/* lhs : annotation = result */
|
||||
exists(AnnAssign a | a.getTarget() = lhs and result = a.getValue())
|
||||
or
|
||||
/* import result as lhs */
|
||||
exists(Alias a | a.getAsname() = lhs and result = a.getValue())
|
||||
or
|
||||
35
python/ql/lib/semmle/python/Frameworks.qll
Normal file
35
python/ql/lib/semmle/python/Frameworks.qll
Normal file
@@ -0,0 +1,35 @@
|
||||
/**
|
||||
* Helper file that imports all framework modeling.
|
||||
*/
|
||||
|
||||
// If you add modeling of a new framework/library, remember to add it it to the docs in
|
||||
// `docs/codeql/support/reusables/frameworks.rst`
|
||||
private import semmle.python.frameworks.Aioch
|
||||
private import semmle.python.frameworks.Aiohttp
|
||||
private import semmle.python.frameworks.ClickhouseDriver
|
||||
private import semmle.python.frameworks.Cryptodome
|
||||
private import semmle.python.frameworks.Cryptography
|
||||
private import semmle.python.frameworks.Dill
|
||||
private import semmle.python.frameworks.Django
|
||||
private import semmle.python.frameworks.Fabric
|
||||
private import semmle.python.frameworks.Flask
|
||||
private import semmle.python.frameworks.FlaskSqlAlchemy
|
||||
private import semmle.python.frameworks.Idna
|
||||
private import semmle.python.frameworks.Invoke
|
||||
private import semmle.python.frameworks.Jmespath
|
||||
private import semmle.python.frameworks.MarkupSafe
|
||||
private import semmle.python.frameworks.Multidict
|
||||
private import semmle.python.frameworks.Mysql
|
||||
private import semmle.python.frameworks.MySQLdb
|
||||
private import semmle.python.frameworks.Peewee
|
||||
private import semmle.python.frameworks.Psycopg2
|
||||
private import semmle.python.frameworks.PyMySQL
|
||||
private import semmle.python.frameworks.Rsa
|
||||
private import semmle.python.frameworks.Simplejson
|
||||
private import semmle.python.frameworks.SqlAlchemy
|
||||
private import semmle.python.frameworks.Stdlib
|
||||
private import semmle.python.frameworks.Tornado
|
||||
private import semmle.python.frameworks.Twisted
|
||||
private import semmle.python.frameworks.Ujson
|
||||
private import semmle.python.frameworks.Yaml
|
||||
private import semmle.python.frameworks.Yarl
|
||||
@@ -7,6 +7,7 @@
|
||||
*/
|
||||
|
||||
import python
|
||||
import semmle.python.RegexTreeView
|
||||
|
||||
private newtype TPrintAstConfiguration = MkPrintAstConfiguration()
|
||||
|
||||
@@ -53,6 +54,9 @@ private newtype TPrintAstNode =
|
||||
not list = any(Module mod).getBody() and
|
||||
not forall(AstNode child | child = list.getAnItem() | isNotNeeded(child)) and
|
||||
exists(list.getAnItem())
|
||||
} or
|
||||
TRegExpTermNode(RegExpTerm term) {
|
||||
exists(StrConst str | term.getRootTerm() = getParsedRegExp(str) and shouldPrint(str, _))
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -419,6 +423,42 @@ class ParameterNode extends AstElementNode {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A print node for a `StrConst`.
|
||||
*
|
||||
* The string has a child, if the child is used as a regular expression,
|
||||
* which is the root of the regular expression.
|
||||
*/
|
||||
class StrConstNode extends AstElementNode {
|
||||
override StrConst element;
|
||||
|
||||
override PrintAstNode getChild(int childIndex) {
|
||||
childIndex = 0 and result.(RegExpTermNode).getTerm() = getParsedRegExp(element)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A print node for a regular expression term.
|
||||
*/
|
||||
class RegExpTermNode extends TRegExpTermNode, PrintAstNode {
|
||||
RegExpTerm term;
|
||||
|
||||
RegExpTermNode() { this = TRegExpTermNode(term) }
|
||||
|
||||
/** Gets the `RegExpTerm` for this node. */
|
||||
RegExpTerm getTerm() { result = term }
|
||||
|
||||
override PrintAstNode getChild(int childIndex) {
|
||||
result.(RegExpTermNode).getTerm() = term.getChild(childIndex)
|
||||
}
|
||||
|
||||
override string toString() {
|
||||
result = "[" + strictconcat(term.getPrimaryQLClass(), " | ") + "] " + term.toString()
|
||||
}
|
||||
|
||||
override Location getLocation() { result = term.getLocation() }
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the `i`th child from `node` ordered by location.
|
||||
*/
|
||||
@@ -447,7 +487,7 @@ private module PrettyPrinting {
|
||||
string getQlClass(AstNode a) {
|
||||
shouldPrint(a, _) and
|
||||
(
|
||||
not exists(getQlCustomClass(a)) and result = a.toString()
|
||||
not exists(getQlCustomClass(a)) and result = strictconcat(a.toString(), " | ")
|
||||
or
|
||||
result = strictconcat(getQlCustomClass(a), " | ")
|
||||
)
|
||||
986
python/ql/lib/semmle/python/RegexTreeView.qll
Normal file
986
python/ql/lib/semmle/python/RegexTreeView.qll
Normal file
@@ -0,0 +1,986 @@
|
||||
/** Provides a class hierarchy corresponding to a parse tree of regular expressions. */
|
||||
|
||||
import python
|
||||
private import semmle.python.regex
|
||||
|
||||
/**
|
||||
* An element containing a regular expression term, that is, either
|
||||
* a string literal (parsed as a regular expression)
|
||||
* or another regular expression term.
|
||||
*
|
||||
* For sequences and alternations, we require at least one child.
|
||||
* Otherwise, we wish to represent the term differently.
|
||||
* This avoids multiple representations of the same term.
|
||||
*/
|
||||
newtype TRegExpParent =
|
||||
/** A string literal used as a regular expression */
|
||||
TRegExpLiteral(Regex re) or
|
||||
/** A quantified term */
|
||||
TRegExpQuantifier(Regex re, int start, int end) { re.qualifiedItem(start, end, _, _) } or
|
||||
/** A sequence term */
|
||||
TRegExpSequence(Regex re, int start, int end) {
|
||||
re.sequence(start, end) and
|
||||
exists(seqChild(re, start, end, 1)) // if a sequence does not have more than one element, it should be treated as that element instead.
|
||||
} or
|
||||
/** An alternation term */
|
||||
TRegExpAlt(Regex re, int start, int end) {
|
||||
re.alternation(start, end) and
|
||||
exists(int part_end |
|
||||
re.alternationOption(start, end, start, part_end) and
|
||||
part_end < end
|
||||
) // if an alternation does not have more than one element, it should be treated as that element instead.
|
||||
} or
|
||||
/** A character class term */
|
||||
TRegExpCharacterClass(Regex re, int start, int end) { re.charSet(start, end) } or
|
||||
/** A character range term */
|
||||
TRegExpCharacterRange(Regex re, int start, int end) { re.charRange(_, start, _, _, end) } or
|
||||
/** A group term */
|
||||
TRegExpGroup(Regex re, int start, int end) { re.group(start, end) } or
|
||||
/** A special character */
|
||||
TRegExpSpecialChar(Regex re, int start, int end) { re.specialCharacter(start, end, _) } or
|
||||
/** A normal character */
|
||||
TRegExpNormalChar(Regex re, int start, int end) { re.normalCharacter(start, end) } or
|
||||
/** A back reference */
|
||||
TRegExpBackRef(Regex re, int start, int end) { re.backreference(start, end) }
|
||||
|
||||
/**
|
||||
* An element containing a regular expression term, that is, either
|
||||
* a string literal (parsed as a regular expression)
|
||||
* or another regular expression term.
|
||||
*/
|
||||
class RegExpParent extends TRegExpParent {
|
||||
string toString() { result = "RegExpParent" }
|
||||
|
||||
/** Gets the `i`th child term. */
|
||||
abstract RegExpTerm getChild(int i);
|
||||
|
||||
/** Gets a child term . */
|
||||
RegExpTerm getAChild() { result = getChild(_) }
|
||||
|
||||
/** Gets the number of child terms. */
|
||||
int getNumChild() { result = count(getAChild()) }
|
||||
|
||||
/** Gets the associated regex. */
|
||||
abstract Regex getRegex();
|
||||
}
|
||||
|
||||
/** A string literal used as a regular expression */
|
||||
class RegExpLiteral extends TRegExpLiteral, RegExpParent {
|
||||
Regex re;
|
||||
|
||||
RegExpLiteral() { this = TRegExpLiteral(re) }
|
||||
|
||||
override RegExpTerm getChild(int i) { i = 0 and result.getRegex() = re and result.isRootTerm() }
|
||||
|
||||
predicate isDotAll() { re.getAMode() = "DOTALL" }
|
||||
|
||||
predicate isIgnoreCase() { re.getAMode() = "IGNORECASE" }
|
||||
|
||||
string getFlags() { result = concat(string mode | mode = re.getAMode() | mode, " | ") }
|
||||
|
||||
override Regex getRegex() { result = re }
|
||||
|
||||
string getPrimaryQLClass() { result = "RegExpLiteral" }
|
||||
}
|
||||
|
||||
/**
|
||||
* A regular expression term, that is, a syntactic part of a regular expression.
|
||||
*/
|
||||
class RegExpTerm extends RegExpParent {
|
||||
Regex re;
|
||||
int start;
|
||||
int end;
|
||||
|
||||
RegExpTerm() {
|
||||
this = TRegExpAlt(re, start, end)
|
||||
or
|
||||
this = TRegExpBackRef(re, start, end)
|
||||
or
|
||||
this = TRegExpCharacterClass(re, start, end)
|
||||
or
|
||||
this = TRegExpCharacterRange(re, start, end)
|
||||
or
|
||||
this = TRegExpNormalChar(re, start, end)
|
||||
or
|
||||
this = TRegExpGroup(re, start, end)
|
||||
or
|
||||
this = TRegExpQuantifier(re, start, end)
|
||||
or
|
||||
this = TRegExpSequence(re, start, end)
|
||||
or
|
||||
this = TRegExpSpecialChar(re, start, end)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the outermost term of this regular expression.
|
||||
*/
|
||||
RegExpTerm getRootTerm() {
|
||||
this.isRootTerm() and result = this
|
||||
or
|
||||
result = getParent().(RegExpTerm).getRootTerm()
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if this term is part of a string literal
|
||||
* that is interpreted as a regular expression.
|
||||
*/
|
||||
predicate isUsedAsRegExp() { any() }
|
||||
|
||||
/**
|
||||
* Holds if this is the root term of a regular expression.
|
||||
*/
|
||||
predicate isRootTerm() { start = 0 and end = re.getText().length() }
|
||||
|
||||
override RegExpTerm getChild(int i) {
|
||||
result = this.(RegExpAlt).getChild(i)
|
||||
or
|
||||
result = this.(RegExpBackRef).getChild(i)
|
||||
or
|
||||
result = this.(RegExpCharacterClass).getChild(i)
|
||||
or
|
||||
result = this.(RegExpCharacterRange).getChild(i)
|
||||
or
|
||||
result = this.(RegExpNormalChar).getChild(i)
|
||||
or
|
||||
result = this.(RegExpGroup).getChild(i)
|
||||
or
|
||||
result = this.(RegExpQuantifier).getChild(i)
|
||||
or
|
||||
result = this.(RegExpSequence).getChild(i)
|
||||
or
|
||||
result = this.(RegExpSpecialChar).getChild(i)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the parent term of this regular expression term, or the
|
||||
* regular expression literal if this is the root term.
|
||||
*/
|
||||
RegExpParent getParent() { result.getAChild() = this }
|
||||
|
||||
override Regex getRegex() { result = re }
|
||||
|
||||
/** Gets the offset at which this term starts. */
|
||||
int getStart() { result = start }
|
||||
|
||||
/** Gets the offset at which this term ends. */
|
||||
int getEnd() { result = end }
|
||||
|
||||
override string toString() { result = re.getText().substring(start, end) }
|
||||
|
||||
/**
|
||||
* Gets the location of the surrounding regex, as locations inside the regex do not exist.
|
||||
* To get location information corresponding to the term inside the regex,
|
||||
* use `hasLocationInfo`.
|
||||
*/
|
||||
Location getLocation() { result = re.getLocation() }
|
||||
|
||||
/** Holds if this term is found at the specified location offsets. */
|
||||
predicate hasLocationInfo(
|
||||
string filepath, int startline, int startcolumn, int endline, int endcolumn
|
||||
) {
|
||||
exists(int re_start, int re_end |
|
||||
re.getLocation().hasLocationInfo(filepath, startline, re_start, endline, re_end) and
|
||||
startcolumn = re_start + start + 4 and
|
||||
endcolumn = re_start + end + 3
|
||||
)
|
||||
}
|
||||
|
||||
/** Gets the file in which this term is found. */
|
||||
File getFile() { result = this.getLocation().getFile() }
|
||||
|
||||
/** Gets the raw source text of this term. */
|
||||
string getRawValue() { result = this.toString() }
|
||||
|
||||
/** Gets the string literal in which this term is found. */
|
||||
RegExpLiteral getLiteral() { result = TRegExpLiteral(re) }
|
||||
|
||||
/** Gets the regular expression term that is matched (textually) before this one, if any. */
|
||||
RegExpTerm getPredecessor() {
|
||||
exists(RegExpTerm parent | parent = getParent() |
|
||||
result = parent.(RegExpSequence).previousElement(this)
|
||||
or
|
||||
not exists(parent.(RegExpSequence).previousElement(this)) and
|
||||
not parent instanceof RegExpSubPattern and
|
||||
result = parent.getPredecessor()
|
||||
)
|
||||
}
|
||||
|
||||
/** Gets the regular expression term that is matched (textually) after this one, if any. */
|
||||
RegExpTerm getSuccessor() {
|
||||
exists(RegExpTerm parent | parent = getParent() |
|
||||
result = parent.(RegExpSequence).nextElement(this)
|
||||
or
|
||||
not exists(parent.(RegExpSequence).nextElement(this)) and
|
||||
not parent instanceof RegExpSubPattern and
|
||||
result = parent.getSuccessor()
|
||||
)
|
||||
}
|
||||
|
||||
/** Gets the primary QL class for this term. */
|
||||
string getPrimaryQLClass() { result = "RegExpTerm" }
|
||||
}
|
||||
|
||||
/**
|
||||
* A quantified regular expression term.
|
||||
*
|
||||
* Example:
|
||||
*
|
||||
* ```
|
||||
* ((ECMA|Java)[sS]cript)*
|
||||
* ```
|
||||
*/
|
||||
class RegExpQuantifier extends RegExpTerm, TRegExpQuantifier {
|
||||
int part_end;
|
||||
boolean maybe_empty;
|
||||
boolean may_repeat_forever;
|
||||
|
||||
RegExpQuantifier() {
|
||||
this = TRegExpQuantifier(re, start, end) and
|
||||
re.qualifiedPart(start, part_end, end, maybe_empty, may_repeat_forever)
|
||||
}
|
||||
|
||||
override RegExpTerm getChild(int i) {
|
||||
i = 0 and
|
||||
result.getRegex() = re and
|
||||
result.getStart() = start and
|
||||
result.getEnd() = part_end
|
||||
}
|
||||
|
||||
predicate mayRepeatForever() { may_repeat_forever = true }
|
||||
|
||||
string getQualifier() { result = re.getText().substring(part_end, end) }
|
||||
|
||||
override string getPrimaryQLClass() { result = "RegExpQuantifier" }
|
||||
}
|
||||
|
||||
/**
|
||||
* A regular expression term that permits unlimited repetitions.
|
||||
*/
|
||||
class InfiniteRepetitionQuantifier extends RegExpQuantifier {
|
||||
InfiniteRepetitionQuantifier() { this.mayRepeatForever() }
|
||||
}
|
||||
|
||||
/**
|
||||
* A star-quantified term.
|
||||
*
|
||||
* Example:
|
||||
*
|
||||
* ```
|
||||
* \w*
|
||||
* ```
|
||||
*/
|
||||
class RegExpStar extends InfiniteRepetitionQuantifier {
|
||||
RegExpStar() { this.getQualifier().charAt(0) = "*" }
|
||||
|
||||
override string getPrimaryQLClass() { result = "RegExpStar" }
|
||||
}
|
||||
|
||||
/**
|
||||
* A plus-quantified term.
|
||||
*
|
||||
* Example:
|
||||
*
|
||||
* ```
|
||||
* \w+
|
||||
* ```
|
||||
*/
|
||||
class RegExpPlus extends InfiniteRepetitionQuantifier {
|
||||
RegExpPlus() { this.getQualifier().charAt(0) = "+" }
|
||||
|
||||
override string getPrimaryQLClass() { result = "RegExpPlus" }
|
||||
}
|
||||
|
||||
/**
|
||||
* An optional term.
|
||||
*
|
||||
* Example:
|
||||
*
|
||||
* ```
|
||||
* ;?
|
||||
* ```
|
||||
*/
|
||||
class RegExpOpt extends RegExpQuantifier {
|
||||
RegExpOpt() { this.getQualifier().charAt(0) = "?" }
|
||||
|
||||
override string getPrimaryQLClass() { result = "RegExpOpt" }
|
||||
}
|
||||
|
||||
/**
|
||||
* A range-quantified term
|
||||
*
|
||||
* Examples:
|
||||
*
|
||||
* ```
|
||||
* \w{2,4}
|
||||
* \w{2,}
|
||||
* \w{2}
|
||||
* ```
|
||||
*/
|
||||
class RegExpRange extends RegExpQuantifier {
|
||||
string upper;
|
||||
string lower;
|
||||
|
||||
RegExpRange() { re.multiples(part_end, end, lower, upper) }
|
||||
|
||||
string getUpper() { result = upper }
|
||||
|
||||
string getLower() { result = lower }
|
||||
|
||||
/**
|
||||
* Gets the upper bound of the range, if any.
|
||||
*
|
||||
* If there is no upper bound, any number of repetitions is allowed.
|
||||
* For a term of the form `r{lo}`, both the lower and the upper bound
|
||||
* are `lo`.
|
||||
*/
|
||||
int getUpperBound() { result = this.getUpper().toInt() }
|
||||
|
||||
/** Gets the lower bound of the range. */
|
||||
int getLowerBound() { result = this.getLower().toInt() }
|
||||
|
||||
override string getPrimaryQLClass() { result = "RegExpRange" }
|
||||
}
|
||||
|
||||
/**
|
||||
* A sequence term.
|
||||
*
|
||||
* Example:
|
||||
*
|
||||
* ```
|
||||
* (ECMA|Java)Script
|
||||
* ```
|
||||
*
|
||||
* This is a sequence with the elements `(ECMA|Java)` and `Script`.
|
||||
*/
|
||||
class RegExpSequence extends RegExpTerm, TRegExpSequence {
|
||||
RegExpSequence() { this = TRegExpSequence(re, start, end) }
|
||||
|
||||
override RegExpTerm getChild(int i) { result = seqChild(re, start, end, i) }
|
||||
|
||||
/** Gets the element preceding `element` in this sequence. */
|
||||
RegExpTerm previousElement(RegExpTerm element) { element = nextElement(result) }
|
||||
|
||||
/** Gets the element following `element` in this sequence. */
|
||||
RegExpTerm nextElement(RegExpTerm element) {
|
||||
exists(int i |
|
||||
element = this.getChild(i) and
|
||||
result = this.getChild(i + 1)
|
||||
)
|
||||
}
|
||||
|
||||
override string getPrimaryQLClass() { result = "RegExpSequence" }
|
||||
}
|
||||
|
||||
pragma[nomagic]
|
||||
private int seqChildEnd(Regex re, int start, int end, int i) {
|
||||
result = seqChild(re, start, end, i).getEnd()
|
||||
}
|
||||
|
||||
// moved out so we can use it in the charpred
|
||||
private RegExpTerm seqChild(Regex re, int start, int end, int i) {
|
||||
re.sequence(start, end) and
|
||||
(
|
||||
i = 0 and
|
||||
result.getRegex() = re and
|
||||
result.getStart() = start and
|
||||
exists(int itemEnd |
|
||||
re.item(start, itemEnd) and
|
||||
result.getEnd() = itemEnd
|
||||
)
|
||||
or
|
||||
i > 0 and
|
||||
result.getRegex() = re and
|
||||
exists(int itemStart | itemStart = seqChildEnd(re, start, end, i - 1) |
|
||||
result.getStart() = itemStart and
|
||||
re.item(itemStart, result.getEnd())
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* An alternative term, that is, a term of the form `a|b`.
|
||||
*
|
||||
* Example:
|
||||
*
|
||||
* ```
|
||||
* ECMA|Java
|
||||
* ```
|
||||
*/
|
||||
class RegExpAlt extends RegExpTerm, TRegExpAlt {
|
||||
RegExpAlt() { this = TRegExpAlt(re, start, end) }
|
||||
|
||||
override RegExpTerm getChild(int i) {
|
||||
i = 0 and
|
||||
result.getRegex() = re and
|
||||
result.getStart() = start and
|
||||
exists(int part_end |
|
||||
re.alternationOption(start, end, start, part_end) and
|
||||
result.getEnd() = part_end
|
||||
)
|
||||
or
|
||||
i > 0 and
|
||||
result.getRegex() = re and
|
||||
exists(int part_start |
|
||||
part_start = this.getChild(i - 1).getEnd() + 1 // allow for the |
|
||||
|
|
||||
result.getStart() = part_start and
|
||||
re.alternationOption(start, end, part_start, result.getEnd())
|
||||
)
|
||||
}
|
||||
|
||||
override string getPrimaryQLClass() { result = "RegExpAlt" }
|
||||
}
|
||||
|
||||
/**
|
||||
* An escaped regular expression term, that is, a regular expression
|
||||
* term starting with a backslash, which is not a backreference.
|
||||
*
|
||||
* Example:
|
||||
*
|
||||
* ```
|
||||
* \.
|
||||
* \w
|
||||
* ```
|
||||
*/
|
||||
class RegExpEscape extends RegExpNormalChar {
|
||||
RegExpEscape() { re.escapedCharacter(start, end) }
|
||||
|
||||
/**
|
||||
* Gets the name of the escaped; for example, `w` for `\w`.
|
||||
* TODO: Handle named escapes.
|
||||
*/
|
||||
override string getValue() {
|
||||
this.isIdentityEscape() and result = this.getUnescaped()
|
||||
or
|
||||
this.getUnescaped() = "n" and result = "\n"
|
||||
or
|
||||
this.getUnescaped() = "r" and result = "\r"
|
||||
or
|
||||
this.getUnescaped() = "t" and result = "\t"
|
||||
or
|
||||
// TODO: Find a way to include a formfeed character
|
||||
// this.getUnescaped() = "f" and result = ""
|
||||
// or
|
||||
isUnicode() and
|
||||
result = getUnicode()
|
||||
}
|
||||
|
||||
predicate isIdentityEscape() { not this.getUnescaped() in ["n", "r", "t", "f"] }
|
||||
|
||||
override string getPrimaryQLClass() { result = "RegExpEscape" }
|
||||
|
||||
string getUnescaped() { result = this.getText().suffix(1) }
|
||||
|
||||
/**
|
||||
* Gets the text for this escape. That is e.g. "\w".
|
||||
*/
|
||||
private string getText() { result = re.getText().substring(start, end) }
|
||||
|
||||
/**
|
||||
* Holds if this is a unicode escape.
|
||||
*/
|
||||
private predicate isUnicode() { getText().prefix(2) = ["\\u", "\\U"] }
|
||||
|
||||
/**
|
||||
* Gets the unicode char for this escape.
|
||||
* E.g. for `\u0061` this returns "a".
|
||||
*/
|
||||
private string getUnicode() {
|
||||
exists(int codepoint | codepoint = sum(this.getHexValueFromUnicode(_)) |
|
||||
result = codepoint.toUnicode()
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets int value for the `index`th char in the hex number of the unicode escape.
|
||||
* E.g. for `\u0061` and `index = 2` this returns 96 (the number `6` interpreted as hex).
|
||||
*/
|
||||
private int getHexValueFromUnicode(int index) {
|
||||
this.isUnicode() and
|
||||
exists(string hex, string char | hex = this.getText().suffix(2) |
|
||||
char = hex.charAt(index) and
|
||||
result = 16.pow(hex.length() - index - 1) * toHex(char)
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the hex number for the `hex` char.
|
||||
*/
|
||||
private int toHex(string hex) {
|
||||
hex = [0 .. 9].toString() and
|
||||
result = hex.toInt()
|
||||
or
|
||||
result = 10 and hex = ["a", "A"]
|
||||
or
|
||||
result = 11 and hex = ["b", "B"]
|
||||
or
|
||||
result = 12 and hex = ["c", "C"]
|
||||
or
|
||||
result = 13 and hex = ["d", "D"]
|
||||
or
|
||||
result = 14 and hex = ["e", "E"]
|
||||
or
|
||||
result = 15 and hex = ["f", "F"]
|
||||
}
|
||||
|
||||
/**
|
||||
* A character class escape in a regular expression.
|
||||
* That is, an escaped charachter that denotes multiple characters.
|
||||
*
|
||||
* Examples:
|
||||
*
|
||||
* ```
|
||||
* \w
|
||||
* \S
|
||||
* ```
|
||||
*/
|
||||
class RegExpCharacterClassEscape extends RegExpEscape {
|
||||
// string value;
|
||||
RegExpCharacterClassEscape() {
|
||||
// value = re.getText().substring(start + 1, end) and
|
||||
// value in ["d", "D", "s", "S", "w", "W"]
|
||||
this.getValue() in ["d", "D", "s", "S", "w", "W"]
|
||||
}
|
||||
|
||||
/** Gets the name of the character class; for example, `w` for `\w`. */
|
||||
// override string getValue() { result = value }
|
||||
override RegExpTerm getChild(int i) { none() }
|
||||
|
||||
override string getPrimaryQLClass() { result = "RegExpCharacterClassEscape" }
|
||||
}
|
||||
|
||||
/**
|
||||
* A character class in a regular expression.
|
||||
*
|
||||
* Examples:
|
||||
*
|
||||
* ```
|
||||
* [a-z_]
|
||||
* [^<>&]
|
||||
* ```
|
||||
*/
|
||||
class RegExpCharacterClass extends RegExpTerm, TRegExpCharacterClass {
|
||||
RegExpCharacterClass() { this = TRegExpCharacterClass(re, start, end) }
|
||||
|
||||
predicate isInverted() { re.getChar(start + 1) = "^" }
|
||||
|
||||
string getCharThing(int i) { result = re.getChar(i + start) }
|
||||
|
||||
predicate isUniversalClass() {
|
||||
// [^]
|
||||
isInverted() and not exists(getAChild())
|
||||
or
|
||||
// [\w\W] and similar
|
||||
not isInverted() and
|
||||
exists(string cce1, string cce2 |
|
||||
cce1 = getAChild().(RegExpCharacterClassEscape).getValue() and
|
||||
cce2 = getAChild().(RegExpCharacterClassEscape).getValue()
|
||||
|
|
||||
cce1 != cce2 and cce1.toLowerCase() = cce2.toLowerCase()
|
||||
)
|
||||
}
|
||||
|
||||
override RegExpTerm getChild(int i) {
|
||||
i = 0 and
|
||||
result.getRegex() = re and
|
||||
exists(int itemStart, int itemEnd |
|
||||
result.getStart() = itemStart and
|
||||
re.char_set_start(start, itemStart) and
|
||||
re.char_set_child(start, itemStart, itemEnd) and
|
||||
result.getEnd() = itemEnd
|
||||
)
|
||||
or
|
||||
i > 0 and
|
||||
result.getRegex() = re and
|
||||
exists(int itemStart | itemStart = this.getChild(i - 1).getEnd() |
|
||||
result.getStart() = itemStart and
|
||||
re.char_set_child(start, itemStart, result.getEnd())
|
||||
)
|
||||
}
|
||||
|
||||
override string getPrimaryQLClass() { result = "RegExpCharacterClass" }
|
||||
}
|
||||
|
||||
/**
|
||||
* A character range in a character class in a regular expression.
|
||||
*
|
||||
* Example:
|
||||
*
|
||||
* ```
|
||||
* a-z
|
||||
* ```
|
||||
*/
|
||||
class RegExpCharacterRange extends RegExpTerm, TRegExpCharacterRange {
|
||||
int lower_end;
|
||||
int upper_start;
|
||||
|
||||
RegExpCharacterRange() {
|
||||
this = TRegExpCharacterRange(re, start, end) and
|
||||
re.charRange(_, start, lower_end, upper_start, end)
|
||||
}
|
||||
|
||||
predicate isRange(string lo, string hi) {
|
||||
lo = re.getText().substring(start, lower_end) and
|
||||
hi = re.getText().substring(upper_start, end)
|
||||
}
|
||||
|
||||
override RegExpTerm getChild(int i) {
|
||||
i = 0 and
|
||||
result.getRegex() = re and
|
||||
result.getStart() = start and
|
||||
result.getEnd() = lower_end
|
||||
or
|
||||
i = 1 and
|
||||
result.getRegex() = re and
|
||||
result.getStart() = upper_start and
|
||||
result.getEnd() = end
|
||||
}
|
||||
|
||||
override string getPrimaryQLClass() { result = "RegExpCharacterRange" }
|
||||
}
|
||||
|
||||
/**
|
||||
* A normal character in a regular expression, that is, a character
|
||||
* without special meaning. This includes escaped characters.
|
||||
*
|
||||
* Examples:
|
||||
* ```
|
||||
* t
|
||||
* \t
|
||||
* ```
|
||||
*/
|
||||
class RegExpNormalChar extends RegExpTerm, TRegExpNormalChar {
|
||||
RegExpNormalChar() { this = TRegExpNormalChar(re, start, end) }
|
||||
|
||||
predicate isCharacter() { any() }
|
||||
|
||||
string getValue() { result = re.getText().substring(start, end) }
|
||||
|
||||
override RegExpTerm getChild(int i) { none() }
|
||||
|
||||
override string getPrimaryQLClass() { result = "RegExpNormalChar" }
|
||||
}
|
||||
|
||||
/**
|
||||
* A constant regular expression term, that is, a regular expression
|
||||
* term matching a single string. Currently, this will always be a single character.
|
||||
*
|
||||
* Example:
|
||||
*
|
||||
* ```
|
||||
* a
|
||||
* ```
|
||||
*/
|
||||
class RegExpConstant extends RegExpTerm {
|
||||
string value;
|
||||
|
||||
RegExpConstant() {
|
||||
this = TRegExpNormalChar(re, start, end) and
|
||||
not this instanceof RegExpCharacterClassEscape and
|
||||
// exclude chars in qualifiers
|
||||
// TODO: push this into regex library
|
||||
not exists(int qstart, int qend | re.qualifiedPart(_, qstart, qend, _, _) |
|
||||
qstart <= start and end <= qend
|
||||
) and
|
||||
value = this.(RegExpNormalChar).getValue()
|
||||
// This will never hold
|
||||
// or
|
||||
// this = TRegExpSpecialChar(re, start, end) and
|
||||
// re.inCharSet(start) and
|
||||
// value = this.(RegExpSpecialChar).getChar()
|
||||
}
|
||||
|
||||
predicate isCharacter() { any() }
|
||||
|
||||
string getValue() { result = value }
|
||||
|
||||
override RegExpTerm getChild(int i) { none() }
|
||||
|
||||
override string getPrimaryQLClass() { result = "RegExpConstant" }
|
||||
}
|
||||
|
||||
/**
|
||||
* A grouped regular expression.
|
||||
*
|
||||
* Examples:
|
||||
*
|
||||
* ```
|
||||
* (ECMA|Java)
|
||||
* (?:ECMA|Java)
|
||||
* (?<quote>['"])
|
||||
* ```
|
||||
*/
|
||||
class RegExpGroup extends RegExpTerm, TRegExpGroup {
|
||||
RegExpGroup() { this = TRegExpGroup(re, start, end) }
|
||||
|
||||
/**
|
||||
* Gets the index of this capture group within the enclosing regular
|
||||
* expression literal.
|
||||
*
|
||||
* For example, in the regular expression `/((a?).)(?:b)/`, the
|
||||
* group `((a?).)` has index 1, the group `(a?)` nested inside it
|
||||
* has index 2, and the group `(?:b)` has no index, since it is
|
||||
* not a capture group.
|
||||
*/
|
||||
int getNumber() { result = re.getGroupNumber(start, end) }
|
||||
|
||||
/** Holds if this is a named capture group. */
|
||||
predicate isNamed() { exists(this.getName()) }
|
||||
|
||||
/** Gets the name of this capture group, if any. */
|
||||
string getName() { result = re.getGroupName(start, end) }
|
||||
|
||||
predicate isCharacter() { any() }
|
||||
|
||||
string getValue() { result = re.getText().substring(start, end) }
|
||||
|
||||
override RegExpTerm getChild(int i) {
|
||||
result.getRegex() = re and
|
||||
i = 0 and
|
||||
re.groupContents(start, end, result.getStart(), result.getEnd())
|
||||
}
|
||||
|
||||
override string getPrimaryQLClass() { result = "RegExpGroup" }
|
||||
}
|
||||
|
||||
/**
|
||||
* A special character in a regular expression.
|
||||
*
|
||||
* Examples:
|
||||
* ```
|
||||
* ^
|
||||
* $
|
||||
* .
|
||||
* ```
|
||||
*/
|
||||
class RegExpSpecialChar extends RegExpTerm, TRegExpSpecialChar {
|
||||
string char;
|
||||
|
||||
RegExpSpecialChar() {
|
||||
this = TRegExpSpecialChar(re, start, end) and
|
||||
re.specialCharacter(start, end, char)
|
||||
}
|
||||
|
||||
predicate isCharacter() { any() }
|
||||
|
||||
string getChar() { result = char }
|
||||
|
||||
override RegExpTerm getChild(int i) { none() }
|
||||
|
||||
override string getPrimaryQLClass() { result = "RegExpSpecialChar" }
|
||||
}
|
||||
|
||||
/**
|
||||
* A dot regular expression.
|
||||
*
|
||||
* Example:
|
||||
*
|
||||
* ```
|
||||
* .
|
||||
* ```
|
||||
*/
|
||||
class RegExpDot extends RegExpSpecialChar {
|
||||
RegExpDot() { this.getChar() = "." }
|
||||
|
||||
override string getPrimaryQLClass() { result = "RegExpDot" }
|
||||
}
|
||||
|
||||
/**
|
||||
* A dollar assertion `$` matching the end of a line.
|
||||
*
|
||||
* Example:
|
||||
*
|
||||
* ```
|
||||
* $
|
||||
* ```
|
||||
*/
|
||||
class RegExpDollar extends RegExpSpecialChar {
|
||||
RegExpDollar() { this.getChar() = "$" }
|
||||
|
||||
override string getPrimaryQLClass() { result = "RegExpDollar" }
|
||||
}
|
||||
|
||||
/**
|
||||
* A caret assertion `^` matching the beginning of a line.
|
||||
*
|
||||
* Example:
|
||||
*
|
||||
* ```
|
||||
* ^
|
||||
* ```
|
||||
*/
|
||||
class RegExpCaret extends RegExpSpecialChar {
|
||||
RegExpCaret() { this.getChar() = "^" }
|
||||
|
||||
override string getPrimaryQLClass() { result = "RegExpCaret" }
|
||||
}
|
||||
|
||||
/**
|
||||
* A zero-width match, that is, either an empty group or an assertion.
|
||||
*
|
||||
* Examples:
|
||||
* ```
|
||||
* ()
|
||||
* (?=\w)
|
||||
* ```
|
||||
*/
|
||||
class RegExpZeroWidthMatch extends RegExpGroup {
|
||||
RegExpZeroWidthMatch() { re.zeroWidthMatch(start, end) }
|
||||
|
||||
override predicate isCharacter() { any() }
|
||||
|
||||
override RegExpTerm getChild(int i) { none() }
|
||||
|
||||
override string getPrimaryQLClass() { result = "RegExpZeroWidthMatch" }
|
||||
}
|
||||
|
||||
/**
|
||||
* A zero-width lookahead or lookbehind assertion.
|
||||
*
|
||||
* Examples:
|
||||
*
|
||||
* ```
|
||||
* (?=\w)
|
||||
* (?!\n)
|
||||
* (?<=\.)
|
||||
* (?<!\\)
|
||||
* ```
|
||||
*/
|
||||
class RegExpSubPattern extends RegExpZeroWidthMatch {
|
||||
RegExpSubPattern() { not re.emptyGroup(start, end) }
|
||||
|
||||
/** Gets the lookahead term. */
|
||||
RegExpTerm getOperand() {
|
||||
exists(int in_start, int in_end | re.groupContents(start, end, in_start, in_end) |
|
||||
result.getRegex() = re and
|
||||
result.getStart() = in_start and
|
||||
result.getEnd() = in_end
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A zero-width lookahead assertion.
|
||||
*
|
||||
* Examples:
|
||||
*
|
||||
* ```
|
||||
* (?=\w)
|
||||
* (?!\n)
|
||||
* ```
|
||||
*/
|
||||
abstract class RegExpLookahead extends RegExpSubPattern { }
|
||||
|
||||
/**
|
||||
* A positive-lookahead assertion.
|
||||
*
|
||||
* Examples:
|
||||
*
|
||||
* ```
|
||||
* (?=\w)
|
||||
* ```
|
||||
*/
|
||||
class RegExpPositiveLookahead extends RegExpLookahead {
|
||||
RegExpPositiveLookahead() { re.positiveLookaheadAssertionGroup(start, end) }
|
||||
|
||||
override string getPrimaryQLClass() { result = "RegExpPositiveLookahead" }
|
||||
}
|
||||
|
||||
/**
|
||||
* A negative-lookahead assertion.
|
||||
*
|
||||
* Examples:
|
||||
*
|
||||
* ```
|
||||
* (?!\n)
|
||||
* ```
|
||||
*/
|
||||
class RegExpNegativeLookahead extends RegExpLookahead {
|
||||
RegExpNegativeLookahead() { re.negativeLookaheadAssertionGroup(start, end) }
|
||||
|
||||
override string getPrimaryQLClass() { result = "RegExpNegativeLookahead" }
|
||||
}
|
||||
|
||||
/**
|
||||
* A zero-width lookbehind assertion.
|
||||
*
|
||||
* Examples:
|
||||
*
|
||||
* ```
|
||||
* (?<=\.)
|
||||
* (?<!\\)
|
||||
* ```
|
||||
*/
|
||||
abstract class RegExpLookbehind extends RegExpSubPattern { }
|
||||
|
||||
/**
|
||||
* A positive-lookbehind assertion.
|
||||
*
|
||||
* Examples:
|
||||
*
|
||||
* ```
|
||||
* (?<=\.)
|
||||
* ```
|
||||
*/
|
||||
class RegExpPositiveLookbehind extends RegExpLookbehind {
|
||||
RegExpPositiveLookbehind() { re.positiveLookbehindAssertionGroup(start, end) }
|
||||
|
||||
override string getPrimaryQLClass() { result = "RegExpPositiveLookbehind" }
|
||||
}
|
||||
|
||||
/**
|
||||
* A negative-lookbehind assertion.
|
||||
*
|
||||
* Examples:
|
||||
*
|
||||
* ```
|
||||
* (?<!\\)
|
||||
* ```
|
||||
*/
|
||||
class RegExpNegativeLookbehind extends RegExpLookbehind {
|
||||
RegExpNegativeLookbehind() { re.negativeLookbehindAssertionGroup(start, end) }
|
||||
|
||||
override string getPrimaryQLClass() { result = "RegExpNegativeLookbehind" }
|
||||
}
|
||||
|
||||
/**
|
||||
* A back reference, that is, a term of the form `\i` or `\k<name>`
|
||||
* in a regular expression.
|
||||
*
|
||||
* Examples:
|
||||
*
|
||||
* ```
|
||||
* \1
|
||||
* (?P=quote)
|
||||
* ```
|
||||
*/
|
||||
class RegExpBackRef extends RegExpTerm, TRegExpBackRef {
|
||||
RegExpBackRef() { this = TRegExpBackRef(re, start, end) }
|
||||
|
||||
/**
|
||||
* Gets the number of the capture group this back reference refers to, if any.
|
||||
*/
|
||||
int getNumber() { result = re.getBackrefNumber(start, end) }
|
||||
|
||||
/**
|
||||
* Gets the name of the capture group this back reference refers to, if any.
|
||||
*/
|
||||
string getName() { result = re.getBackrefName(start, end) }
|
||||
|
||||
/** Gets the capture group this back reference refers to. */
|
||||
RegExpGroup getGroup() {
|
||||
result.getLiteral() = this.getLiteral() and
|
||||
(
|
||||
result.getNumber() = this.getNumber() or
|
||||
result.getName() = this.getName()
|
||||
)
|
||||
}
|
||||
|
||||
override RegExpTerm getChild(int i) { none() }
|
||||
|
||||
override string getPrimaryQLClass() { result = "RegExpBackRef" }
|
||||
}
|
||||
|
||||
/** Gets the parse tree resulting from parsing `re`, if such has been constructed. */
|
||||
RegExpTerm getParsedRegExp(StrConst re) { result.getRegex() = re and result.isRootTerm() }
|
||||
@@ -153,6 +153,12 @@ class ExceptStmt extends ExceptStmt_ {
|
||||
override Stmt getASubStatement() { result = this.getAStmt() }
|
||||
|
||||
override Stmt getLastStatement() { result = this.getBody().getLastItem().getLastStatement() }
|
||||
|
||||
override Expr getType() {
|
||||
result = super.getType() and not result instanceof Tuple
|
||||
or
|
||||
result = super.getType().(Tuple).getAnElt()
|
||||
}
|
||||
}
|
||||
|
||||
/** An assert statement, such as `assert a == b, "A is not equal to b"` */
|
||||
@@ -0,0 +1,317 @@
|
||||
/**
|
||||
* Provides an extension point for for modeling sensitive data, such as secrets, certificates, or passwords.
|
||||
* Sensitive data can be interesting to use as data-flow sources in security queries.
|
||||
*/
|
||||
|
||||
private import python
|
||||
private import semmle.python.dataflow.new.DataFlow
|
||||
// Need to import `semmle.python.Frameworks` since frameworks can extend `SensitiveDataSource::Range`
|
||||
private import semmle.python.Frameworks
|
||||
private import semmle.python.security.internal.SensitiveDataHeuristics as SensitiveDataHeuristics
|
||||
|
||||
// We export these explicitly, so we don't also export the `HeuristicNames` module.
|
||||
class SensitiveDataClassification = SensitiveDataHeuristics::SensitiveDataClassification;
|
||||
|
||||
module SensitiveDataClassification = SensitiveDataHeuristics::SensitiveDataClassification;
|
||||
|
||||
/**
|
||||
* A data flow source of sensitive data, such as secrets, certificates, or passwords.
|
||||
*
|
||||
* Extend this class to refine existing API models. If you want to model new APIs,
|
||||
* extend `SensitiveDataSource::Range` instead.
|
||||
*/
|
||||
class SensitiveDataSource extends DataFlow::Node {
|
||||
SensitiveDataSource::Range range;
|
||||
|
||||
SensitiveDataSource() { this = range }
|
||||
|
||||
/**
|
||||
* Gets the classification of the sensitive data.
|
||||
*/
|
||||
SensitiveDataClassification getClassification() { result = range.getClassification() }
|
||||
}
|
||||
|
||||
/** Provides a class for modeling new sources of sensitive data, such as secrets, certificates, or passwords. */
|
||||
module SensitiveDataSource {
|
||||
/**
|
||||
* A data flow source of sensitive data, such as secrets, certificates, or passwords.
|
||||
*
|
||||
* Extend this class to model new APIs. If you want to refine existing API models,
|
||||
* extend `SensitiveDataSource` instead.
|
||||
*/
|
||||
abstract class Range extends DataFlow::Node {
|
||||
/**
|
||||
* Gets the classification of the sensitive data.
|
||||
*/
|
||||
abstract SensitiveDataClassification getClassification();
|
||||
}
|
||||
}
|
||||
|
||||
/** Actual sensitive data modeling */
|
||||
private module SensitiveDataModeling {
|
||||
private import SensitiveDataHeuristics::HeuristicNames
|
||||
|
||||
/**
|
||||
* Gets a reference to a function that is considered to be a sensitive source of
|
||||
* `classification`.
|
||||
*/
|
||||
private DataFlow::TypeTrackingNode sensitiveFunction(
|
||||
DataFlow::TypeTracker t, SensitiveDataClassification classification
|
||||
) {
|
||||
t.start() and
|
||||
exists(Function f |
|
||||
f.getName() = sensitiveString(classification) and
|
||||
result.asExpr() = f.getDefinition()
|
||||
)
|
||||
or
|
||||
exists(DataFlow::TypeTracker t2 | result = sensitiveFunction(t2, classification).track(t2, t))
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a reference to a function that is considered to be a sensitive source of
|
||||
* `classification`.
|
||||
*/
|
||||
DataFlow::Node sensitiveFunction(SensitiveDataClassification classification) {
|
||||
sensitiveFunction(DataFlow::TypeTracker::end(), classification).flowsTo(result)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a reference (in local scope) to a string constant that, if used as the key in
|
||||
* a lookup, indicates the presence of sensitive data with `classification`.
|
||||
*/
|
||||
DataFlow::Node sensitiveLookupStringConst(SensitiveDataClassification classification) {
|
||||
// Note: If this is implemented with type-tracking, we will get cross-talk as
|
||||
// illustrated in python/ql/test/experimental/dataflow/sensitive-data/test.py
|
||||
exists(DataFlow::LocalSourceNode source |
|
||||
source.asExpr().(StrConst).getText() = sensitiveString(classification) and
|
||||
source.flowsTo(result)
|
||||
)
|
||||
}
|
||||
|
||||
/** A function call that is considered a source of sensitive data. */
|
||||
class SensitiveFunctionCall extends SensitiveDataSource::Range, DataFlow::CallCfgNode {
|
||||
SensitiveDataClassification classification;
|
||||
|
||||
SensitiveFunctionCall() {
|
||||
this.getFunction() = sensitiveFunction(classification)
|
||||
or
|
||||
// to cover functions that we don't have the definition for, and where the
|
||||
// reference to the function has not already been marked as being sensitive
|
||||
this.getFunction().asCfgNode().(NameNode).getId() = sensitiveString(classification)
|
||||
}
|
||||
|
||||
override SensitiveDataClassification getClassification() { result = classification }
|
||||
}
|
||||
|
||||
/**
|
||||
* Tracks any modeled source of sensitive data (with any classification),
|
||||
* to limit the scope of `extraStepForCalls`. See it's QLDoc for more context.
|
||||
*
|
||||
* Also see `extraStepForCalls`.
|
||||
*/
|
||||
private DataFlow::TypeTrackingNode possibleSensitiveCallable(DataFlow::TypeTracker t) {
|
||||
t.start() and
|
||||
result instanceof SensitiveDataSource
|
||||
or
|
||||
exists(DataFlow::TypeTracker t2 | result = possibleSensitiveCallable(t2).track(t2, t))
|
||||
}
|
||||
|
||||
/**
|
||||
* Tracks any modeled source of sensitive data (with any classification),
|
||||
* to limit the scope of `extraStepForCalls`. See it's QLDoc for more context.
|
||||
*
|
||||
* Also see `extraStepForCalls`.
|
||||
*/
|
||||
private DataFlow::Node possibleSensitiveCallable() {
|
||||
possibleSensitiveCallable(DataFlow::TypeTracker::end()).flowsTo(result)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if the step from `nodeFrom` to `nodeTo` should be considered a
|
||||
* taint-flow step for sensitive-data, to ensure calls are handled correctly.
|
||||
*
|
||||
* To handle calls properly, while preserving a good source for path explanations,
|
||||
* you need to include this predicate as an additional taint step in your taint-tracking
|
||||
* configurations.
|
||||
*
|
||||
* The core problem can be illustrated by the example below. If we consider the
|
||||
* `print` a sink, what path and what source do we want to show? My initial approach
|
||||
* would be to use type-tracking to propagate from the `not_found.get_passwd` attribute
|
||||
* lookup, to the use of `non_sensitive_name`, and then create a new `SensitiveDataSource::Range`
|
||||
* like `SensitiveFunctionCall`. Although that seems likely to work, it will also end up
|
||||
* with a non-optimal path, which starts at _bad source_, and therefore doesn't show
|
||||
* how we figured out that `non_sensitive_name`
|
||||
* could be a function that returns a password (and in cases where there is many calls to
|
||||
* `my_func` it will be annoying for someone to figure this out manually).
|
||||
*
|
||||
* By including this additional taint-step in the taint-tracking configuration, it's possible
|
||||
* to get a path explanation going from _good source_ to the sink.
|
||||
*
|
||||
* ```python
|
||||
* def my_func(non_sensitive_name):
|
||||
* x = non_sensitive_name() # <-- bad source
|
||||
* print(x) # <-- sink
|
||||
*
|
||||
* import not_found
|
||||
* f = not_found.get_passwd # <-- good source
|
||||
* my_func(f)
|
||||
* ```
|
||||
*/
|
||||
predicate extraStepForCalls(DataFlow::Node nodeFrom, DataFlow::CallCfgNode nodeTo) {
|
||||
// However, we do still use the type-tracking approach to limit the size of this
|
||||
// predicate.
|
||||
nodeTo.getFunction() = nodeFrom and
|
||||
nodeFrom = possibleSensitiveCallable()
|
||||
}
|
||||
|
||||
pragma[nomagic]
|
||||
private string sensitiveStrConstCandidate() {
|
||||
result = any(StrConst s | not s.isDocString()).getText() and
|
||||
not result.regexpMatch(notSensitiveRegexp())
|
||||
}
|
||||
|
||||
pragma[nomagic]
|
||||
private string sensitiveAttributeNameCandidate() {
|
||||
result = any(DataFlow::AttrRead a).getAttributeName() and
|
||||
not result.regexpMatch(notSensitiveRegexp())
|
||||
}
|
||||
|
||||
pragma[nomagic]
|
||||
private string sensitiveParameterNameCandidate() {
|
||||
result = any(Parameter p).getName() and
|
||||
not result.regexpMatch(notSensitiveRegexp())
|
||||
}
|
||||
|
||||
pragma[nomagic]
|
||||
private string sensitiveFunctionNameCandidate() {
|
||||
result = any(Function f).getName() and
|
||||
not result.regexpMatch(notSensitiveRegexp())
|
||||
}
|
||||
|
||||
pragma[nomagic]
|
||||
private string sensitiveNameCandidate() {
|
||||
result = any(Name n).getId() and
|
||||
not result.regexpMatch(notSensitiveRegexp())
|
||||
}
|
||||
|
||||
/**
|
||||
* This helper predicate serves to deduplicate the results of the preceding predicates. This
|
||||
* means that if, say, an attribute and a function parameter have the same name, then that name will
|
||||
* only be matched once, which greatly cuts down on the number of regexp matches that have to be
|
||||
* performed.
|
||||
*
|
||||
* Under normal circumstances, deduplication is only performed when a predicate is materialized, and
|
||||
* so to see the effect of this we must create a separate predicate that calculates the union of the
|
||||
* preceding predicates.
|
||||
*/
|
||||
pragma[nomagic]
|
||||
private string sensitiveStringCandidate() {
|
||||
result in [
|
||||
sensitiveNameCandidate(), sensitiveAttributeNameCandidate(),
|
||||
sensitiveParameterNameCandidate(), sensitiveFunctionNameCandidate(),
|
||||
sensitiveStrConstCandidate()
|
||||
]
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns strings (primarily the names of various program entities) that may contain sensitive data
|
||||
* with the classification `classification`.
|
||||
*
|
||||
* This helper predicate ends up being very similar to `nameIndicatesSensitiveData`,
|
||||
* but is performance optimized to limit the number of regexp matches that have to be performed.
|
||||
*/
|
||||
pragma[nomagic]
|
||||
private string sensitiveString(SensitiveDataClassification classification) {
|
||||
result = sensitiveStringCandidate() and
|
||||
result.regexpMatch(maybeSensitiveRegexp(classification))
|
||||
}
|
||||
|
||||
/**
|
||||
* Any kind of variable assignment (also including with/for) where the name indicates
|
||||
* it contains sensitive data.
|
||||
*
|
||||
* Note: We _could_ make any access to a variable with a sensitive name a source of
|
||||
* sensitive data, but to make path explanations in data-flow/taint-tracking good,
|
||||
* we don't want that, since it works against allowing users to understand the flow
|
||||
* in the program (which is the whole point).
|
||||
*
|
||||
* Note: To make data-flow/taint-tracking work, the expression that is _assigned_ to
|
||||
* the variable is marked as the source (as compared to marking the variable as the
|
||||
* source).
|
||||
*/
|
||||
class SensitiveVariableAssignment extends SensitiveDataSource::Range {
|
||||
SensitiveDataClassification classification;
|
||||
|
||||
SensitiveVariableAssignment() {
|
||||
exists(DefinitionNode def |
|
||||
def.(NameNode).getId() = sensitiveString(classification) and
|
||||
(
|
||||
this.asCfgNode() = def.getValue()
|
||||
or
|
||||
this.asCfgNode() = def.getValue().(ForNode).getSequence()
|
||||
) and
|
||||
not this.asExpr() instanceof FunctionExpr and
|
||||
not this.asExpr() instanceof ClassExpr
|
||||
)
|
||||
or
|
||||
exists(With with |
|
||||
with.getOptionalVars().(Name).getId() = sensitiveString(classification) and
|
||||
this.asExpr() = with.getContextExpr()
|
||||
)
|
||||
}
|
||||
|
||||
override SensitiveDataClassification getClassification() { result = classification }
|
||||
}
|
||||
|
||||
/** An attribute access that is considered a source of sensitive data. */
|
||||
class SensitiveAttributeAccess extends SensitiveDataSource::Range {
|
||||
SensitiveDataClassification classification;
|
||||
|
||||
SensitiveAttributeAccess() {
|
||||
// Things like `foo.<sensitive-name>` or `from <module> import <sensitive-name>`
|
||||
// I considered excluding any `from ... import something_sensitive`, but then realized that
|
||||
// we should flag up `form ... import password as ...` as a password
|
||||
this.(DataFlow::AttrRead).getAttributeName() = sensitiveString(classification)
|
||||
or
|
||||
// Things like `getattr(foo, <reference-to-string>)`
|
||||
this.(DataFlow::AttrRead).getAttributeNameExpr() = sensitiveLookupStringConst(classification)
|
||||
}
|
||||
|
||||
override SensitiveDataClassification getClassification() { result = classification }
|
||||
}
|
||||
|
||||
/** A subscript, where the key indicates the result will be sensitive data. */
|
||||
class SensitiveSubscript extends SensitiveDataSource::Range {
|
||||
SensitiveDataClassification classification;
|
||||
|
||||
SensitiveSubscript() {
|
||||
this.asCfgNode().(SubscriptNode).getIndex() =
|
||||
sensitiveLookupStringConst(classification).asCfgNode()
|
||||
}
|
||||
|
||||
override SensitiveDataClassification getClassification() { result = classification }
|
||||
}
|
||||
|
||||
/** A call to `get` on an object, where the key indicates the result will be sensitive data. */
|
||||
class SensitiveGetCall extends SensitiveDataSource::Range, DataFlow::CallCfgNode {
|
||||
SensitiveDataClassification classification;
|
||||
|
||||
SensitiveGetCall() {
|
||||
this.getFunction().(DataFlow::AttrRef).getAttributeName() = "get" and
|
||||
this.getArg(0) = sensitiveLookupStringConst(classification)
|
||||
}
|
||||
|
||||
override SensitiveDataClassification getClassification() { result = classification }
|
||||
}
|
||||
|
||||
/** A parameter where the name indicates it will receive sensitive data. */
|
||||
class SensitiveParameter extends SensitiveDataSource::Range, DataFlow::ParameterNode {
|
||||
SensitiveDataClassification classification;
|
||||
|
||||
SensitiveParameter() { this.getParameter().getName() = sensitiveString(classification) }
|
||||
|
||||
override SensitiveDataClassification getClassification() { result = classification }
|
||||
}
|
||||
}
|
||||
|
||||
predicate sensitiveDataExtraStepForCalls = SensitiveDataModeling::extraStepForCalls/2;
|
||||
65
python/ql/lib/semmle/python/dataflow/new/TypeTracker.qll
Normal file
65
python/ql/lib/semmle/python/dataflow/new/TypeTracker.qll
Normal file
@@ -0,0 +1,65 @@
|
||||
/**
|
||||
* This file acts as a wrapper for `internal.TypeTracker`, exposing some of the functionality with
|
||||
* names that are more appropriate for Python.
|
||||
*/
|
||||
|
||||
private import python
|
||||
private import internal.TypeTracker as Internal
|
||||
|
||||
/** Any string that may appear as the name of an attribute or access path. */
|
||||
class AttributeName = Internal::ContentName;
|
||||
|
||||
/** Either an attribute name, or the empty string (representing no attribute). */
|
||||
class OptionalAttributeName = Internal::OptionalContentName;
|
||||
|
||||
/**
|
||||
* Summary of the steps needed to track a value to a given dataflow node.
|
||||
*
|
||||
* This can be used to track objects that implement a certain API in order to
|
||||
* recognize calls to that API. Note that type-tracking does not by itself provide a
|
||||
* source/sink relation, that is, it may determine that a node has a given type,
|
||||
* but it won't determine where that type came from.
|
||||
*
|
||||
* It is recommended that all uses of this type are written in the following form,
|
||||
* for tracking some type `myType`:
|
||||
* ```ql
|
||||
* DataFlow::TypeTrackingNode myType(DataFlow::TypeTracker t) {
|
||||
* t.start() and
|
||||
* result = < source of myType >
|
||||
* or
|
||||
* exists (DataFlow::TypeTracker t2 |
|
||||
* result = myType(t2).track(t2, t)
|
||||
* )
|
||||
* }
|
||||
*
|
||||
* DataFlow::LocalSourceNode myType() { myType(DataFlow::TypeTracker::end()) }
|
||||
* ```
|
||||
*
|
||||
* Instead of `result = myType(t2).track(t2, t)`, you can also use the equivalent
|
||||
* `t = t2.step(myType(t2), result)`. If you additionally want to track individual
|
||||
* intra-procedural steps, use `t = t2.smallstep(myCallback(t2), result)`.
|
||||
*/
|
||||
class TypeTracker extends Internal::TypeTracker {
|
||||
/**
|
||||
* Holds if this is the starting point of type tracking, and the value starts in the attribute named `attrName`.
|
||||
* The type tracking only ends after the attribute has been loaded.
|
||||
*/
|
||||
predicate startInAttr(string attrName) { this.startInContent(attrName) }
|
||||
|
||||
/**
|
||||
* INTERNAL. DO NOT USE.
|
||||
*
|
||||
* Gets the attribute associated with this type tracker.
|
||||
*/
|
||||
string getAttr() { result = this.getContent() }
|
||||
}
|
||||
|
||||
module TypeTracker = Internal::TypeTracker;
|
||||
|
||||
class StepSummary = Internal::StepSummary;
|
||||
|
||||
module StepSummary = Internal::StepSummary;
|
||||
|
||||
class TypeBackTracker = Internal::TypeBackTracker;
|
||||
|
||||
module TypeBackTracker = Internal::TypeBackTracker;
|
||||
@@ -17,6 +17,13 @@ abstract class AttrRef extends Node {
|
||||
*/
|
||||
abstract Node getObject();
|
||||
|
||||
/**
|
||||
* Holds if this data flow node accesses attribute named `attrName` on object `object`.
|
||||
*/
|
||||
predicate accesses(Node object, string attrName) {
|
||||
this.getObject() = object and this.getAttributeName() = attrName
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the expression node that defines the attribute being accessed, if any. This is
|
||||
* usually an identifier or literal.
|
||||
@@ -191,7 +198,7 @@ private class ClassDefinitionAsAttrWrite extends AttrWrite, CfgNode {
|
||||
* - Dynamic attribute reads using `getattr`: `getattr(object, attr)`
|
||||
* - Qualified imports: `from module import attr as name`
|
||||
*/
|
||||
abstract class AttrRead extends AttrRef, Node { }
|
||||
abstract class AttrRead extends AttrRef, Node, LocalSourceNode { }
|
||||
|
||||
/** A simple attribute read, e.g. `object.attr` */
|
||||
private class AttributeReadAsAttrRead extends AttrRead, CfgNode {
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -35,22 +35,22 @@ predicate accessPathCostLimits(int apLimit, int tupleLimit) {
|
||||
* calls. For this reason, we cannot reuse the code from `DataFlowImpl.qll` directly.
|
||||
*/
|
||||
private module LambdaFlow {
|
||||
private predicate viableParamNonLambda(DataFlowCall call, int i, ParameterNode p) {
|
||||
private predicate viableParamNonLambda(DataFlowCall call, int i, ParamNode p) {
|
||||
p.isParameterOf(viableCallable(call), i)
|
||||
}
|
||||
|
||||
private predicate viableParamLambda(DataFlowCall call, int i, ParameterNode p) {
|
||||
private predicate viableParamLambda(DataFlowCall call, int i, ParamNode p) {
|
||||
p.isParameterOf(viableCallableLambda(call, _), i)
|
||||
}
|
||||
|
||||
private predicate viableParamArgNonLambda(DataFlowCall call, ParameterNode p, ArgumentNode arg) {
|
||||
private predicate viableParamArgNonLambda(DataFlowCall call, ParamNode p, ArgNode arg) {
|
||||
exists(int i |
|
||||
viableParamNonLambda(call, i, p) and
|
||||
arg.argumentOf(call, i)
|
||||
)
|
||||
}
|
||||
|
||||
private predicate viableParamArgLambda(DataFlowCall call, ParameterNode p, ArgumentNode arg) {
|
||||
private predicate viableParamArgLambda(DataFlowCall call, ParamNode p, ArgNode arg) {
|
||||
exists(int i |
|
||||
viableParamLambda(call, i, p) and
|
||||
arg.argumentOf(call, i)
|
||||
@@ -118,8 +118,8 @@ private module LambdaFlow {
|
||||
boolean toJump, DataFlowCallOption lastCall
|
||||
) {
|
||||
revLambdaFlow0(lambdaCall, kind, node, t, toReturn, toJump, lastCall) and
|
||||
if node instanceof CastNode or node instanceof ArgumentNode or node instanceof ReturnNode
|
||||
then compatibleTypes(t, getNodeType(node))
|
||||
if castNode(node) or node instanceof ArgNode or node instanceof ReturnNode
|
||||
then compatibleTypes(t, getNodeDataFlowType(node))
|
||||
else any()
|
||||
}
|
||||
|
||||
@@ -129,7 +129,7 @@ private module LambdaFlow {
|
||||
boolean toJump, DataFlowCallOption lastCall
|
||||
) {
|
||||
lambdaCall(lambdaCall, kind, node) and
|
||||
t = getNodeType(node) and
|
||||
t = getNodeDataFlowType(node) and
|
||||
toReturn = false and
|
||||
toJump = false and
|
||||
lastCall = TDataFlowCallNone()
|
||||
@@ -146,7 +146,7 @@ private module LambdaFlow {
|
||||
getNodeEnclosingCallable(node) = getNodeEnclosingCallable(mid)
|
||||
|
|
||||
preservesValue = false and
|
||||
t = getNodeType(node)
|
||||
t = getNodeDataFlowType(node)
|
||||
or
|
||||
preservesValue = true and
|
||||
t = t0
|
||||
@@ -160,7 +160,7 @@ private module LambdaFlow {
|
||||
toJump = true and
|
||||
lastCall = TDataFlowCallNone()
|
||||
|
|
||||
jumpStep(node, mid) and
|
||||
jumpStepCached(node, mid) and
|
||||
t = t0
|
||||
or
|
||||
exists(boolean preservesValue |
|
||||
@@ -168,7 +168,7 @@ private module LambdaFlow {
|
||||
getNodeEnclosingCallable(node) != getNodeEnclosingCallable(mid)
|
||||
|
|
||||
preservesValue = false and
|
||||
t = getNodeType(node)
|
||||
t = getNodeDataFlowType(node)
|
||||
or
|
||||
preservesValue = true and
|
||||
t = t0
|
||||
@@ -176,7 +176,7 @@ private module LambdaFlow {
|
||||
)
|
||||
or
|
||||
// flow into a callable
|
||||
exists(ParameterNode p, DataFlowCallOption lastCall0, DataFlowCall call |
|
||||
exists(ParamNode p, DataFlowCallOption lastCall0, DataFlowCall call |
|
||||
revLambdaFlowIn(lambdaCall, kind, p, t, toJump, lastCall0) and
|
||||
(
|
||||
if lastCall0 = TDataFlowCallNone() and toJump = false
|
||||
@@ -227,7 +227,7 @@ private module LambdaFlow {
|
||||
|
||||
pragma[nomagic]
|
||||
predicate revLambdaFlowIn(
|
||||
DataFlowCall lambdaCall, LambdaCallKind kind, ParameterNode p, DataFlowType t, boolean toJump,
|
||||
DataFlowCall lambdaCall, LambdaCallKind kind, ParamNode p, DataFlowType t, boolean toJump,
|
||||
DataFlowCallOption lastCall
|
||||
) {
|
||||
revLambdaFlow(lambdaCall, kind, p, t, false, toJump, lastCall)
|
||||
@@ -242,6 +242,89 @@ private DataFlowCallable viableCallableExt(DataFlowCall call) {
|
||||
|
||||
cached
|
||||
private module Cached {
|
||||
/**
|
||||
* If needed, call this predicate from `DataFlowImplSpecific.qll` in order to
|
||||
* force a stage-dependency on the `DataFlowImplCommon.qll` stage and therby
|
||||
* collapsing the two stages.
|
||||
*/
|
||||
cached
|
||||
predicate forceCachingInSameStage() { any() }
|
||||
|
||||
cached
|
||||
predicate nodeEnclosingCallable(Node n, DataFlowCallable c) { c = n.getEnclosingCallable() }
|
||||
|
||||
cached
|
||||
predicate callEnclosingCallable(DataFlowCall call, DataFlowCallable c) {
|
||||
c = call.getEnclosingCallable()
|
||||
}
|
||||
|
||||
cached
|
||||
predicate nodeDataFlowType(Node n, DataFlowType t) { t = getNodeType(n) }
|
||||
|
||||
cached
|
||||
predicate jumpStepCached(Node node1, Node node2) { jumpStep(node1, node2) }
|
||||
|
||||
cached
|
||||
predicate clearsContentCached(Node n, Content c) { clearsContent(n, c) }
|
||||
|
||||
cached
|
||||
predicate isUnreachableInCallCached(Node n, DataFlowCall call) { isUnreachableInCall(n, call) }
|
||||
|
||||
cached
|
||||
predicate outNodeExt(Node n) {
|
||||
n instanceof OutNode
|
||||
or
|
||||
n.(PostUpdateNode).getPreUpdateNode() instanceof ArgNode
|
||||
}
|
||||
|
||||
cached
|
||||
predicate hiddenNode(Node n) { nodeIsHidden(n) }
|
||||
|
||||
cached
|
||||
OutNodeExt getAnOutNodeExt(DataFlowCall call, ReturnKindExt k) {
|
||||
result = getAnOutNode(call, k.(ValueReturnKind).getKind())
|
||||
or
|
||||
exists(ArgNode arg |
|
||||
result.(PostUpdateNode).getPreUpdateNode() = arg and
|
||||
arg.argumentOf(call, k.(ParamUpdateReturnKind).getPosition())
|
||||
)
|
||||
}
|
||||
|
||||
cached
|
||||
predicate returnNodeExt(Node n, ReturnKindExt k) {
|
||||
k = TValueReturn(n.(ReturnNode).getKind())
|
||||
or
|
||||
exists(ParamNode p, int pos |
|
||||
parameterValueFlowsToPreUpdate(p, n) and
|
||||
p.isParameterOf(_, pos) and
|
||||
k = TParamUpdate(pos)
|
||||
)
|
||||
}
|
||||
|
||||
cached
|
||||
predicate castNode(Node n) { n instanceof CastNode }
|
||||
|
||||
cached
|
||||
predicate castingNode(Node n) {
|
||||
castNode(n) or
|
||||
n instanceof ParamNode or
|
||||
n instanceof OutNodeExt or
|
||||
// For reads, `x.f`, we want to check that the tracked type after the read (which
|
||||
// is obtained by popping the head of the access path stack) is compatible with
|
||||
// the type of `x.f`.
|
||||
read(_, _, n)
|
||||
}
|
||||
|
||||
cached
|
||||
predicate parameterNode(Node n, DataFlowCallable c, int i) {
|
||||
n.(ParameterNode).isParameterOf(c, i)
|
||||
}
|
||||
|
||||
cached
|
||||
predicate argumentNode(Node n, DataFlowCall call, int pos) {
|
||||
n.(ArgumentNode).argumentOf(call, pos)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a viable target for the lambda call `call`.
|
||||
*
|
||||
@@ -261,7 +344,7 @@ private module Cached {
|
||||
* The instance parameter is considered to have index `-1`.
|
||||
*/
|
||||
pragma[nomagic]
|
||||
private predicate viableParam(DataFlowCall call, int i, ParameterNode p) {
|
||||
private predicate viableParam(DataFlowCall call, int i, ParamNode p) {
|
||||
p.isParameterOf(viableCallableExt(call), i)
|
||||
}
|
||||
|
||||
@@ -270,11 +353,11 @@ private module Cached {
|
||||
* dispatch into account.
|
||||
*/
|
||||
cached
|
||||
predicate viableParamArg(DataFlowCall call, ParameterNode p, ArgumentNode arg) {
|
||||
predicate viableParamArg(DataFlowCall call, ParamNode p, ArgNode arg) {
|
||||
exists(int i |
|
||||
viableParam(call, i, p) and
|
||||
arg.argumentOf(call, i) and
|
||||
compatibleTypes(getNodeType(arg), getNodeType(p))
|
||||
compatibleTypes(getNodeDataFlowType(arg), getNodeDataFlowType(p))
|
||||
)
|
||||
}
|
||||
|
||||
@@ -312,7 +395,7 @@ private module Cached {
|
||||
* `read` indicates whether it is contents of `p` that can flow to `node`.
|
||||
*/
|
||||
pragma[nomagic]
|
||||
private predicate parameterValueFlowCand(ParameterNode p, Node node, boolean read) {
|
||||
private predicate parameterValueFlowCand(ParamNode p, Node node, boolean read) {
|
||||
p = node and
|
||||
read = false
|
||||
or
|
||||
@@ -325,30 +408,30 @@ private module Cached {
|
||||
// read
|
||||
exists(Node mid |
|
||||
parameterValueFlowCand(p, mid, false) and
|
||||
readStep(mid, _, node) and
|
||||
read(mid, _, node) and
|
||||
read = true
|
||||
)
|
||||
or
|
||||
// flow through: no prior read
|
||||
exists(ArgumentNode arg |
|
||||
exists(ArgNode arg |
|
||||
parameterValueFlowArgCand(p, arg, false) and
|
||||
argumentValueFlowsThroughCand(arg, node, read)
|
||||
)
|
||||
or
|
||||
// flow through: no read inside method
|
||||
exists(ArgumentNode arg |
|
||||
exists(ArgNode arg |
|
||||
parameterValueFlowArgCand(p, arg, read) and
|
||||
argumentValueFlowsThroughCand(arg, node, false)
|
||||
)
|
||||
}
|
||||
|
||||
pragma[nomagic]
|
||||
private predicate parameterValueFlowArgCand(ParameterNode p, ArgumentNode arg, boolean read) {
|
||||
private predicate parameterValueFlowArgCand(ParamNode p, ArgNode arg, boolean read) {
|
||||
parameterValueFlowCand(p, arg, read)
|
||||
}
|
||||
|
||||
pragma[nomagic]
|
||||
predicate parameterValueFlowsToPreUpdateCand(ParameterNode p, PostUpdateNode n) {
|
||||
predicate parameterValueFlowsToPreUpdateCand(ParamNode p, PostUpdateNode n) {
|
||||
parameterValueFlowCand(p, n.getPreUpdateNode(), false)
|
||||
}
|
||||
|
||||
@@ -360,7 +443,7 @@ private module Cached {
|
||||
* `read` indicates whether it is contents of `p` that can flow to the return
|
||||
* node.
|
||||
*/
|
||||
predicate parameterValueFlowReturnCand(ParameterNode p, ReturnKind kind, boolean read) {
|
||||
predicate parameterValueFlowReturnCand(ParamNode p, ReturnKind kind, boolean read) {
|
||||
exists(ReturnNode ret |
|
||||
parameterValueFlowCand(p, ret, read) and
|
||||
kind = ret.getKind()
|
||||
@@ -369,9 +452,9 @@ private module Cached {
|
||||
|
||||
pragma[nomagic]
|
||||
private predicate argumentValueFlowsThroughCand0(
|
||||
DataFlowCall call, ArgumentNode arg, ReturnKind kind, boolean read
|
||||
DataFlowCall call, ArgNode arg, ReturnKind kind, boolean read
|
||||
) {
|
||||
exists(ParameterNode param | viableParamArg(call, param, arg) |
|
||||
exists(ParamNode param | viableParamArg(call, param, arg) |
|
||||
parameterValueFlowReturnCand(param, kind, read)
|
||||
)
|
||||
}
|
||||
@@ -382,14 +465,14 @@ private module Cached {
|
||||
*
|
||||
* `read` indicates whether it is contents of `arg` that can flow to `out`.
|
||||
*/
|
||||
predicate argumentValueFlowsThroughCand(ArgumentNode arg, Node out, boolean read) {
|
||||
predicate argumentValueFlowsThroughCand(ArgNode arg, Node out, boolean read) {
|
||||
exists(DataFlowCall call, ReturnKind kind |
|
||||
argumentValueFlowsThroughCand0(call, arg, kind, read) and
|
||||
out = getAnOutNode(call, kind)
|
||||
)
|
||||
}
|
||||
|
||||
predicate cand(ParameterNode p, Node n) {
|
||||
predicate cand(ParamNode p, Node n) {
|
||||
parameterValueFlowCand(p, n, _) and
|
||||
(
|
||||
parameterValueFlowReturnCand(p, _, _)
|
||||
@@ -416,21 +499,21 @@ private module Cached {
|
||||
* If a read step was taken, then `read` captures the `Content`, the
|
||||
* container type, and the content type.
|
||||
*/
|
||||
predicate parameterValueFlow(ParameterNode p, Node node, ReadStepTypesOption read) {
|
||||
predicate parameterValueFlow(ParamNode p, Node node, ReadStepTypesOption read) {
|
||||
parameterValueFlow0(p, node, read) and
|
||||
if node instanceof CastingNode
|
||||
then
|
||||
// normal flow through
|
||||
read = TReadStepTypesNone() and
|
||||
compatibleTypes(getNodeType(p), getNodeType(node))
|
||||
compatibleTypes(getNodeDataFlowType(p), getNodeDataFlowType(node))
|
||||
or
|
||||
// getter
|
||||
compatibleTypes(read.getContentType(), getNodeType(node))
|
||||
compatibleTypes(read.getContentType(), getNodeDataFlowType(node))
|
||||
else any()
|
||||
}
|
||||
|
||||
pragma[nomagic]
|
||||
private predicate parameterValueFlow0(ParameterNode p, Node node, ReadStepTypesOption read) {
|
||||
private predicate parameterValueFlow0(ParamNode p, Node node, ReadStepTypesOption read) {
|
||||
p = node and
|
||||
Cand::cand(p, _) and
|
||||
read = TReadStepTypesNone()
|
||||
@@ -447,7 +530,7 @@ private module Cached {
|
||||
readStepWithTypes(mid, read.getContainerType(), read.getContent(), node,
|
||||
read.getContentType()) and
|
||||
Cand::parameterValueFlowReturnCand(p, _, true) and
|
||||
compatibleTypes(getNodeType(p), read.getContainerType())
|
||||
compatibleTypes(getNodeDataFlowType(p), read.getContainerType())
|
||||
)
|
||||
or
|
||||
parameterValueFlow0_0(TReadStepTypesNone(), p, node, read)
|
||||
@@ -455,34 +538,32 @@ private module Cached {
|
||||
|
||||
pragma[nomagic]
|
||||
private predicate parameterValueFlow0_0(
|
||||
ReadStepTypesOption mustBeNone, ParameterNode p, Node node, ReadStepTypesOption read
|
||||
ReadStepTypesOption mustBeNone, ParamNode p, Node node, ReadStepTypesOption read
|
||||
) {
|
||||
// flow through: no prior read
|
||||
exists(ArgumentNode arg |
|
||||
exists(ArgNode arg |
|
||||
parameterValueFlowArg(p, arg, mustBeNone) and
|
||||
argumentValueFlowsThrough(arg, read, node)
|
||||
)
|
||||
or
|
||||
// flow through: no read inside method
|
||||
exists(ArgumentNode arg |
|
||||
exists(ArgNode arg |
|
||||
parameterValueFlowArg(p, arg, read) and
|
||||
argumentValueFlowsThrough(arg, mustBeNone, node)
|
||||
)
|
||||
}
|
||||
|
||||
pragma[nomagic]
|
||||
private predicate parameterValueFlowArg(
|
||||
ParameterNode p, ArgumentNode arg, ReadStepTypesOption read
|
||||
) {
|
||||
private predicate parameterValueFlowArg(ParamNode p, ArgNode arg, ReadStepTypesOption read) {
|
||||
parameterValueFlow(p, arg, read) and
|
||||
Cand::argumentValueFlowsThroughCand(arg, _, _)
|
||||
}
|
||||
|
||||
pragma[nomagic]
|
||||
private predicate argumentValueFlowsThrough0(
|
||||
DataFlowCall call, ArgumentNode arg, ReturnKind kind, ReadStepTypesOption read
|
||||
DataFlowCall call, ArgNode arg, ReturnKind kind, ReadStepTypesOption read
|
||||
) {
|
||||
exists(ParameterNode param | viableParamArg(call, param, arg) |
|
||||
exists(ParamNode param | viableParamArg(call, param, arg) |
|
||||
parameterValueFlowReturn(param, kind, read)
|
||||
)
|
||||
}
|
||||
@@ -496,18 +577,18 @@ private module Cached {
|
||||
* container type, and the content type.
|
||||
*/
|
||||
pragma[nomagic]
|
||||
predicate argumentValueFlowsThrough(ArgumentNode arg, ReadStepTypesOption read, Node out) {
|
||||
predicate argumentValueFlowsThrough(ArgNode arg, ReadStepTypesOption read, Node out) {
|
||||
exists(DataFlowCall call, ReturnKind kind |
|
||||
argumentValueFlowsThrough0(call, arg, kind, read) and
|
||||
out = getAnOutNode(call, kind)
|
||||
|
|
||||
// normal flow through
|
||||
read = TReadStepTypesNone() and
|
||||
compatibleTypes(getNodeType(arg), getNodeType(out))
|
||||
compatibleTypes(getNodeDataFlowType(arg), getNodeDataFlowType(out))
|
||||
or
|
||||
// getter
|
||||
compatibleTypes(getNodeType(arg), read.getContainerType()) and
|
||||
compatibleTypes(read.getContentType(), getNodeType(out))
|
||||
compatibleTypes(getNodeDataFlowType(arg), read.getContainerType()) and
|
||||
compatibleTypes(read.getContentType(), getNodeDataFlowType(out))
|
||||
)
|
||||
}
|
||||
|
||||
@@ -516,7 +597,7 @@ private module Cached {
|
||||
* value-preserving steps and a single read step, not taking call
|
||||
* contexts into account, thus representing a getter-step.
|
||||
*/
|
||||
predicate getterStep(ArgumentNode arg, Content c, Node out) {
|
||||
predicate getterStep(ArgNode arg, Content c, Node out) {
|
||||
argumentValueFlowsThrough(arg, TReadStepTypesSome(_, c, _), out)
|
||||
}
|
||||
|
||||
@@ -529,7 +610,7 @@ private module Cached {
|
||||
* container type, and the content type.
|
||||
*/
|
||||
private predicate parameterValueFlowReturn(
|
||||
ParameterNode p, ReturnKind kind, ReadStepTypesOption read
|
||||
ParamNode p, ReturnKind kind, ReadStepTypesOption read
|
||||
) {
|
||||
exists(ReturnNode ret |
|
||||
parameterValueFlow(p, ret, read) and
|
||||
@@ -553,7 +634,7 @@ private module Cached {
|
||||
private predicate mayBenefitFromCallContextExt(DataFlowCall call, DataFlowCallable callable) {
|
||||
mayBenefitFromCallContext(call, callable)
|
||||
or
|
||||
callable = call.getEnclosingCallable() and
|
||||
callEnclosingCallable(call, callable) and
|
||||
exists(viableCallableLambda(call, TDataFlowCallSome(_)))
|
||||
}
|
||||
|
||||
@@ -611,7 +692,7 @@ private module Cached {
|
||||
mayBenefitFromCallContextExt(call, _) and
|
||||
c = viableCallableExt(call) and
|
||||
ctxtgts = count(DataFlowCall ctx | c = viableImplInCallContextExt(call, ctx)) and
|
||||
tgts = strictcount(DataFlowCall ctx | viableCallableExt(ctx) = call.getEnclosingCallable()) and
|
||||
tgts = strictcount(DataFlowCall ctx | callEnclosingCallable(call, viableCallableExt(ctx))) and
|
||||
ctxtgts < tgts
|
||||
)
|
||||
}
|
||||
@@ -635,8 +716,7 @@ private module Cached {
|
||||
* Holds if `p` can flow to the pre-update node associated with post-update
|
||||
* node `n`, in the same callable, using only value-preserving steps.
|
||||
*/
|
||||
cached
|
||||
predicate parameterValueFlowsToPreUpdate(ParameterNode p, PostUpdateNode n) {
|
||||
private predicate parameterValueFlowsToPreUpdate(ParamNode p, PostUpdateNode n) {
|
||||
parameterValueFlow(p, n.getPreUpdateNode(), TReadStepTypesNone())
|
||||
}
|
||||
|
||||
@@ -644,9 +724,8 @@ private module Cached {
|
||||
Node node1, Content c, Node node2, DataFlowType contentType, DataFlowType containerType
|
||||
) {
|
||||
storeStep(node1, c, node2) and
|
||||
readStep(_, c, _) and
|
||||
contentType = getNodeType(node1) and
|
||||
containerType = getNodeType(node2)
|
||||
contentType = getNodeDataFlowType(node1) and
|
||||
containerType = getNodeDataFlowType(node2)
|
||||
or
|
||||
exists(Node n1, Node n2 |
|
||||
n1 = node1.(PostUpdateNode).getPreUpdateNode() and
|
||||
@@ -654,12 +733,15 @@ private module Cached {
|
||||
|
|
||||
argumentValueFlowsThrough(n2, TReadStepTypesSome(containerType, c, contentType), n1)
|
||||
or
|
||||
readStep(n2, c, n1) and
|
||||
contentType = getNodeType(n1) and
|
||||
containerType = getNodeType(n2)
|
||||
read(n2, c, n1) and
|
||||
contentType = getNodeDataFlowType(n1) and
|
||||
containerType = getNodeDataFlowType(n2)
|
||||
)
|
||||
}
|
||||
|
||||
cached
|
||||
predicate read(Node node1, Content c, Node node2) { readStep(node1, c, node2) }
|
||||
|
||||
/**
|
||||
* Holds if data can flow from `node1` to `node2` via a direct assignment to
|
||||
* `f`.
|
||||
@@ -678,8 +760,9 @@ private module Cached {
|
||||
* are aliases. A typical example is a function returning `this`, implementing a fluent
|
||||
* interface.
|
||||
*/
|
||||
cached
|
||||
predicate reverseStepThroughInputOutputAlias(PostUpdateNode fromNode, PostUpdateNode toNode) {
|
||||
private predicate reverseStepThroughInputOutputAlias(
|
||||
PostUpdateNode fromNode, PostUpdateNode toNode
|
||||
) {
|
||||
exists(Node fromPre, Node toPre |
|
||||
fromPre = fromNode.getPreUpdateNode() and
|
||||
toPre = toNode.getPreUpdateNode()
|
||||
@@ -688,23 +771,34 @@ private module Cached {
|
||||
// Does the language-specific simpleLocalFlowStep already model flow
|
||||
// from function input to output?
|
||||
fromPre = getAnOutNode(c, _) and
|
||||
toPre.(ArgumentNode).argumentOf(c, _) and
|
||||
simpleLocalFlowStep(toPre.(ArgumentNode), fromPre)
|
||||
toPre.(ArgNode).argumentOf(c, _) and
|
||||
simpleLocalFlowStep(toPre.(ArgNode), fromPre)
|
||||
)
|
||||
or
|
||||
argumentValueFlowsThrough(toPre, TReadStepTypesNone(), fromPre)
|
||||
)
|
||||
}
|
||||
|
||||
cached
|
||||
predicate simpleLocalFlowStepExt(Node node1, Node node2) {
|
||||
simpleLocalFlowStep(node1, node2) or
|
||||
reverseStepThroughInputOutputAlias(node1, node2)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if the call context `call` either improves virtual dispatch in
|
||||
* `callable` or if it allows us to prune unreachable nodes in `callable`.
|
||||
* Holds if the call context `call` improves virtual dispatch in `callable`.
|
||||
*/
|
||||
cached
|
||||
predicate recordDataFlowCallSite(DataFlowCall call, DataFlowCallable callable) {
|
||||
predicate recordDataFlowCallSiteDispatch(DataFlowCall call, DataFlowCallable callable) {
|
||||
reducedViableImplInCallContext(_, callable, call)
|
||||
or
|
||||
exists(Node n | getNodeEnclosingCallable(n) = callable | isUnreachableInCall(n, call))
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if the call context `call` allows us to prune unreachable nodes in `callable`.
|
||||
*/
|
||||
cached
|
||||
predicate recordDataFlowCallSiteUnreachable(DataFlowCall call, DataFlowCallable callable) {
|
||||
exists(Node n | getNodeEnclosingCallable(n) = callable | isUnreachableInCallCached(n, call))
|
||||
}
|
||||
|
||||
cached
|
||||
@@ -726,12 +820,12 @@ private module Cached {
|
||||
cached
|
||||
newtype TLocalFlowCallContext =
|
||||
TAnyLocalCall() or
|
||||
TSpecificLocalCall(DataFlowCall call) { isUnreachableInCall(_, call) }
|
||||
TSpecificLocalCall(DataFlowCall call) { isUnreachableInCallCached(_, call) }
|
||||
|
||||
cached
|
||||
newtype TReturnKindExt =
|
||||
TValueReturn(ReturnKind kind) or
|
||||
TParamUpdate(int pos) { exists(ParameterNode p | p.isParameterOf(_, pos)) }
|
||||
TParamUpdate(int pos) { exists(ParamNode p | p.isParameterOf(_, pos)) }
|
||||
|
||||
cached
|
||||
newtype TBooleanOption =
|
||||
@@ -757,27 +851,28 @@ private module Cached {
|
||||
TAccessPathFrontSome(AccessPathFront apf)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if the call context `call` either improves virtual dispatch in
|
||||
* `callable` or if it allows us to prune unreachable nodes in `callable`.
|
||||
*/
|
||||
predicate recordDataFlowCallSite(DataFlowCall call, DataFlowCallable callable) {
|
||||
recordDataFlowCallSiteDispatch(call, callable) or
|
||||
recordDataFlowCallSiteUnreachable(call, callable)
|
||||
}
|
||||
|
||||
/**
|
||||
* A `Node` at which a cast can occur such that the type should be checked.
|
||||
*/
|
||||
class CastingNode extends Node {
|
||||
CastingNode() {
|
||||
this instanceof ParameterNode or
|
||||
this instanceof CastNode or
|
||||
this instanceof OutNodeExt or
|
||||
// For reads, `x.f`, we want to check that the tracked type after the read (which
|
||||
// is obtained by popping the head of the access path stack) is compatible with
|
||||
// the type of `x.f`.
|
||||
readStep(_, _, this)
|
||||
}
|
||||
CastingNode() { castingNode(this) }
|
||||
}
|
||||
|
||||
private predicate readStepWithTypes(
|
||||
Node n1, DataFlowType container, Content c, Node n2, DataFlowType content
|
||||
) {
|
||||
readStep(n1, c, n2) and
|
||||
container = getNodeType(n1) and
|
||||
content = getNodeType(n2)
|
||||
read(n1, c, n2) and
|
||||
container = getNodeDataFlowType(n1) and
|
||||
content = getNodeDataFlowType(n2)
|
||||
}
|
||||
|
||||
private newtype TReadStepTypesOption =
|
||||
@@ -854,7 +949,7 @@ class CallContextSomeCall extends CallContextCall, TSomeCall {
|
||||
override string toString() { result = "CcSomeCall" }
|
||||
|
||||
override predicate relevantFor(DataFlowCallable callable) {
|
||||
exists(ParameterNode p | getNodeEnclosingCallable(p) = callable)
|
||||
exists(ParamNode p | getNodeEnclosingCallable(p) = callable)
|
||||
}
|
||||
|
||||
override predicate matchesCall(DataFlowCall call) { any() }
|
||||
@@ -866,7 +961,7 @@ class CallContextReturn extends CallContextNoCall, TReturn {
|
||||
}
|
||||
|
||||
override predicate relevantFor(DataFlowCallable callable) {
|
||||
exists(DataFlowCall call | this = TReturn(_, call) and call.getEnclosingCallable() = callable)
|
||||
exists(DataFlowCall call | this = TReturn(_, call) and callEnclosingCallable(call, callable))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -899,7 +994,7 @@ class LocalCallContextSpecificCall extends LocalCallContext, TSpecificLocalCall
|
||||
}
|
||||
|
||||
private predicate relevantLocalCCtx(DataFlowCall call, DataFlowCallable callable) {
|
||||
exists(Node n | getNodeEnclosingCallable(n) = callable and isUnreachableInCall(n, call))
|
||||
exists(Node n | getNodeEnclosingCallable(n) = callable and isUnreachableInCallCached(n, call))
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -913,26 +1008,37 @@ LocalCallContext getLocalCallContext(CallContext ctx, DataFlowCallable callable)
|
||||
else result instanceof LocalCallContextAny
|
||||
}
|
||||
|
||||
/**
|
||||
* The value of a parameter at function entry, viewed as a node in a data
|
||||
* flow graph.
|
||||
*/
|
||||
class ParamNode extends Node {
|
||||
ParamNode() { parameterNode(this, _, _) }
|
||||
|
||||
/**
|
||||
* Holds if this node is the parameter of callable `c` at the specified
|
||||
* (zero-based) position.
|
||||
*/
|
||||
predicate isParameterOf(DataFlowCallable c, int i) { parameterNode(this, c, i) }
|
||||
}
|
||||
|
||||
/** A data-flow node that represents a call argument. */
|
||||
class ArgNode extends Node {
|
||||
ArgNode() { argumentNode(this, _, _) }
|
||||
|
||||
/** Holds if this argument occurs at the given position in the given call. */
|
||||
final predicate argumentOf(DataFlowCall call, int pos) { argumentNode(this, call, pos) }
|
||||
}
|
||||
|
||||
/**
|
||||
* A node from which flow can return to the caller. This is either a regular
|
||||
* `ReturnNode` or a `PostUpdateNode` corresponding to the value of a parameter.
|
||||
*/
|
||||
class ReturnNodeExt extends Node {
|
||||
ReturnNodeExt() {
|
||||
this instanceof ReturnNode or
|
||||
parameterValueFlowsToPreUpdate(_, this)
|
||||
}
|
||||
ReturnNodeExt() { returnNodeExt(this, _) }
|
||||
|
||||
/** Gets the kind of this returned value. */
|
||||
ReturnKindExt getKind() {
|
||||
result = TValueReturn(this.(ReturnNode).getKind())
|
||||
or
|
||||
exists(ParameterNode p, int pos |
|
||||
parameterValueFlowsToPreUpdate(p, this) and
|
||||
p.isParameterOf(_, pos) and
|
||||
result = TParamUpdate(pos)
|
||||
)
|
||||
}
|
||||
ReturnKindExt getKind() { returnNodeExt(this, result) }
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -940,11 +1046,7 @@ class ReturnNodeExt extends Node {
|
||||
* or a post-update node associated with a call argument.
|
||||
*/
|
||||
class OutNodeExt extends Node {
|
||||
OutNodeExt() {
|
||||
this instanceof OutNode
|
||||
or
|
||||
this.(PostUpdateNode).getPreUpdateNode() instanceof ArgumentNode
|
||||
}
|
||||
OutNodeExt() { outNodeExt(this) }
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -957,7 +1059,7 @@ abstract class ReturnKindExt extends TReturnKindExt {
|
||||
abstract string toString();
|
||||
|
||||
/** Gets a node corresponding to data flow out of `call`. */
|
||||
abstract OutNodeExt getAnOutNode(DataFlowCall call);
|
||||
final OutNodeExt getAnOutNode(DataFlowCall call) { result = getAnOutNodeExt(call, this) }
|
||||
}
|
||||
|
||||
class ValueReturnKind extends ReturnKindExt, TValueReturn {
|
||||
@@ -968,10 +1070,6 @@ class ValueReturnKind extends ReturnKindExt, TValueReturn {
|
||||
ReturnKind getKind() { result = kind }
|
||||
|
||||
override string toString() { result = kind.toString() }
|
||||
|
||||
override OutNodeExt getAnOutNode(DataFlowCall call) {
|
||||
result = getAnOutNode(call, this.getKind())
|
||||
}
|
||||
}
|
||||
|
||||
class ParamUpdateReturnKind extends ReturnKindExt, TParamUpdate {
|
||||
@@ -982,13 +1080,6 @@ class ParamUpdateReturnKind extends ReturnKindExt, TParamUpdate {
|
||||
int getPosition() { result = pos }
|
||||
|
||||
override string toString() { result = "param update " + pos }
|
||||
|
||||
override OutNodeExt getAnOutNode(DataFlowCall call) {
|
||||
exists(ArgumentNode arg |
|
||||
result.(PostUpdateNode).getPreUpdateNode() = arg and
|
||||
arg.argumentOf(call, this.getPosition())
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/** A callable tagged with a relevant return kind. */
|
||||
@@ -1015,10 +1106,13 @@ class ReturnPosition extends TReturnPosition0 {
|
||||
*/
|
||||
pragma[inline]
|
||||
DataFlowCallable getNodeEnclosingCallable(Node n) {
|
||||
exists(Node n0 |
|
||||
pragma[only_bind_into](n0) = n and
|
||||
pragma[only_bind_into](result) = n0.getEnclosingCallable()
|
||||
)
|
||||
nodeEnclosingCallable(pragma[only_bind_out](n), pragma[only_bind_into](result))
|
||||
}
|
||||
|
||||
/** Gets the type of `n` used for type pruning. */
|
||||
pragma[inline]
|
||||
DataFlowType getNodeDataFlowType(Node n) {
|
||||
nodeDataFlowType(pragma[only_bind_out](n), pragma[only_bind_into](result))
|
||||
}
|
||||
|
||||
pragma[noinline]
|
||||
@@ -1037,17 +1131,59 @@ ReturnPosition getReturnPosition(ReturnNodeExt ret) {
|
||||
result = getReturnPosition0(ret, ret.getKind())
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks whether `inner` can return to `call` in the call context `innercc`.
|
||||
* Assumes a context of `inner = viableCallableExt(call)`.
|
||||
*/
|
||||
bindingset[innercc, inner, call]
|
||||
predicate checkCallContextReturn(CallContext innercc, DataFlowCallable inner, DataFlowCall call) {
|
||||
innercc instanceof CallContextAny
|
||||
or
|
||||
exists(DataFlowCallable c0, DataFlowCall call0 |
|
||||
callEnclosingCallable(call0, inner) and
|
||||
innercc = TReturn(c0, call0) and
|
||||
c0 = prunedViableImplInCallContextReverse(call0, call)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks whether `call` can resolve to `calltarget` in the call context `cc`.
|
||||
* Assumes a context of `calltarget = viableCallableExt(call)`.
|
||||
*/
|
||||
bindingset[cc, call, calltarget]
|
||||
predicate checkCallContextCall(CallContext cc, DataFlowCall call, DataFlowCallable calltarget) {
|
||||
exists(DataFlowCall ctx | cc = TSpecificCall(ctx) |
|
||||
if reducedViableImplInCallContext(call, _, ctx)
|
||||
then calltarget = prunedViableImplInCallContext(call, ctx)
|
||||
else any()
|
||||
)
|
||||
or
|
||||
cc instanceof CallContextSomeCall
|
||||
or
|
||||
cc instanceof CallContextAny
|
||||
or
|
||||
cc instanceof CallContextReturn
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolves a return from `callable` in `cc` to `call`. This is equivalent to
|
||||
* `callable = viableCallableExt(call) and checkCallContextReturn(cc, callable, call)`.
|
||||
*/
|
||||
bindingset[cc, callable]
|
||||
predicate resolveReturn(CallContext cc, DataFlowCallable callable, DataFlowCall call) {
|
||||
cc instanceof CallContextAny and callable = viableCallableExt(call)
|
||||
or
|
||||
exists(DataFlowCallable c0, DataFlowCall call0 |
|
||||
call0.getEnclosingCallable() = callable and
|
||||
callEnclosingCallable(call0, callable) and
|
||||
cc = TReturn(c0, call0) and
|
||||
c0 = prunedViableImplInCallContextReverse(call0, call)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolves a call from `call` in `cc` to `result`. This is equivalent to
|
||||
* `result = viableCallableExt(call) and checkCallContextCall(cc, call, result)`.
|
||||
*/
|
||||
bindingset[call, cc]
|
||||
DataFlowCallable resolveCall(DataFlowCall call, CallContext cc) {
|
||||
exists(DataFlowCall ctx | cc = TSpecificCall(ctx) |
|
||||
@@ -1063,8 +1199,6 @@ DataFlowCallable resolveCall(DataFlowCall call, CallContext cc) {
|
||||
result = viableCallableExt(call) and cc instanceof CallContextReturn
|
||||
}
|
||||
|
||||
predicate read = readStep/3;
|
||||
|
||||
/** An optional Boolean value. */
|
||||
class BooleanOption extends TBooleanOption {
|
||||
string toString() {
|
||||
@@ -1116,7 +1250,7 @@ abstract class AccessPathFront extends TAccessPathFront {
|
||||
|
||||
TypedContent getHead() { this = TFrontHead(result) }
|
||||
|
||||
predicate isClearedAt(Node n) { clearsContent(n, getHead().getContent()) }
|
||||
predicate isClearedAt(Node n) { clearsContentCached(n, getHead().getContent()) }
|
||||
}
|
||||
|
||||
class AccessPathFrontNil extends AccessPathFront, TFrontNil {
|
||||
@@ -168,7 +168,13 @@ module Consistency {
|
||||
msg = "ArgumentNode is missing PostUpdateNode."
|
||||
}
|
||||
|
||||
query predicate postWithInFlow(PostUpdateNode n, string msg) {
|
||||
// This predicate helps the compiler forget that in some languages
|
||||
// it is impossible for a `PostUpdateNode` to be the target of
|
||||
// `simpleLocalFlowStep`.
|
||||
private predicate isPostUpdateNode(Node n) { n instanceof PostUpdateNode or none() }
|
||||
|
||||
query predicate postWithInFlow(Node n, string msg) {
|
||||
isPostUpdateNode(n) and
|
||||
simpleLocalFlowStep(_, n) and
|
||||
msg = "PostUpdateNode should not be the target of local flow."
|
||||
}
|
||||
@@ -228,7 +228,6 @@ module EssaFlow {
|
||||
* data flow. It is a strict subset of the `localFlowStep` predicate, as it
|
||||
* excludes SSA flow through instance fields.
|
||||
*/
|
||||
cached
|
||||
predicate simpleLocalFlowStep(Node nodeFrom, Node nodeTo) {
|
||||
// If there is ESSA-flow out of a node `node`, we want flow
|
||||
// both out of `node` and any post-update node of `node`.
|
||||
@@ -870,6 +869,9 @@ predicate jumpStep(Node nodeFrom, Node nodeTo) {
|
||||
module_export(mv.getScope(), r.getAttributeName(), nodeFrom) and
|
||||
nodeTo = r
|
||||
)
|
||||
or
|
||||
// Default value for parameter flows to that parameter
|
||||
defaultValueFlowStep(nodeFrom, nodeTo)
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -1034,6 +1036,19 @@ predicate kwOverflowStoreStep(CfgNode nodeFrom, DictionaryElementContent c, Node
|
||||
)
|
||||
}
|
||||
|
||||
predicate defaultValueFlowStep(CfgNode nodeFrom, CfgNode nodeTo) {
|
||||
exists(Function f, Parameter p, ParameterDefinition def |
|
||||
// `getArgByName` supports, unlike `getAnArg`, keyword-only parameters
|
||||
p = f.getArgByName(_) and
|
||||
nodeFrom.asExpr() = p.getDefault() and
|
||||
// The following expresses
|
||||
// nodeTo.(ParameterNode).getParameter() = p
|
||||
// without non-monotonic recursion
|
||||
def.getParameter() = p and
|
||||
nodeTo.getNode() = def.getDefiningNode()
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if data can flow from `nodeFrom` to `nodeTo` via a read of content `c`.
|
||||
*/
|
||||
@@ -1559,7 +1574,6 @@ predicate kwUnpackReadStep(CfgNode nodeFrom, DictionaryElementContent c, Node no
|
||||
* any value stored inside `f` is cleared at the pre-update node associated with `x`
|
||||
* in `x.f = newValue`.
|
||||
*/
|
||||
cached
|
||||
predicate clearsContent(Node n, Content c) {
|
||||
exists(CallNode call, CallableValue callable, string name |
|
||||
call_unpacks(call, _, callable, name, _) and
|
||||
@@ -180,6 +180,45 @@ class CallCfgNode extends CfgNode, LocalSourceNode {
|
||||
Node getArgByName(string name) { result.asCfgNode() = node.getArgByName(name) }
|
||||
}
|
||||
|
||||
/**
|
||||
* A data-flow node corresponding to a method call, that is `foo.bar(...)`.
|
||||
*
|
||||
* Also covers the case where the method lookup is done separately from the call itself, as in
|
||||
* `temp = foo.bar; temp(...)`. Note that this is only tracked through local scope.
|
||||
*/
|
||||
class MethodCallNode extends CallCfgNode {
|
||||
AttrRead method_lookup;
|
||||
|
||||
MethodCallNode() { method_lookup = this.getFunction().getALocalSource() }
|
||||
|
||||
/**
|
||||
* Gets the name of the method being invoked (the `bar` in `foo.bar(...)`) if it can be determined.
|
||||
*
|
||||
* Note that this method may have multiple results if a single call node represents calls to
|
||||
* multiple different objects and methods. If you want to link up objects and method names
|
||||
* accurately, use the `calls` method instead.
|
||||
*/
|
||||
string getMethodName() { result = method_lookup.getAttributeName() }
|
||||
|
||||
/**
|
||||
* Gets the data-flow node corresponding to the object receiving this call. That is, the `foo` in
|
||||
* `foo.bar(...)`.
|
||||
*
|
||||
* Note that this method may have multiple results if a single call node represents calls to
|
||||
* multiple different objects and methods. If you want to link up objects and method names
|
||||
* accurately, use the `calls` method instead.
|
||||
*/
|
||||
Node getObject() { result = method_lookup.getObject() }
|
||||
|
||||
/** Holds if this data-flow node calls method `methodName` on the object node `object`. */
|
||||
predicate calls(Node object, string methodName) {
|
||||
// As `getObject` and `getMethodName` may both have multiple results, we must look up the object
|
||||
// and method name directly on `method_lookup`.
|
||||
object = method_lookup.getObject() and
|
||||
methodName = method_lookup.getAttributeName()
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* An expression, viewed as a node in a data flow graph.
|
||||
*
|
||||
@@ -18,6 +18,10 @@ predicate localFlowStep(Node nodeFrom, Node nodeTo) { simpleLocalFlowStep(nodeFr
|
||||
predicate localFlow(Node source, Node sink) { localFlowStep*(source, sink) }
|
||||
|
||||
/**
|
||||
* DEPRECATED. Use the API graphs library (`semmle.python.ApiGraphs`) instead.
|
||||
*
|
||||
* For a drop-in replacement, use `API::moduleImport(name).getAUse()`.
|
||||
*
|
||||
* Gets a `Node` that refers to the module referenced by `name`.
|
||||
* Note that for the statement `import pkg.mod`, the new variable introduced is `pkg` that is a
|
||||
* reference to the module `pkg`.
|
||||
@@ -37,7 +41,7 @@ predicate localFlow(Node source, Node sink) { localFlowStep*(source, sink) }
|
||||
* `mypkg/foo.py` but the variable `foo` containing `42` -- however, `import mypkg.foo` will always cause `mypkg.foo`
|
||||
* to refer to the module.
|
||||
*/
|
||||
Node importNode(string name) {
|
||||
deprecated Node importNode(string name) {
|
||||
exists(Variable var, Import imp, Alias alias |
|
||||
alias = imp.getAName() and
|
||||
alias.getAsname() = var.getAStore() and
|
||||
@@ -33,15 +33,20 @@ private import DataFlowPrivate
|
||||
class LocalSourceNode extends Node {
|
||||
cached
|
||||
LocalSourceNode() {
|
||||
not simpleLocalFlowStep(_, this) and
|
||||
// Currently, we create synthetic post-update nodes for
|
||||
// - arguments to calls that may modify said argument
|
||||
// - direct reads a writes of object attributes
|
||||
// Both of these preserve the identity of the underlying pointer, and hence we exclude these as
|
||||
// local source nodes.
|
||||
// We do, however, allow the post-update nodes that arise from object creation (which are non-synthetic).
|
||||
not this instanceof SyntheticPostUpdateNode
|
||||
this instanceof ExprNode and
|
||||
not simpleLocalFlowStep(_, this)
|
||||
or
|
||||
// We include all module variable nodes, as these act as stepping stones between writes and
|
||||
// reads of global variables. Without them, type tracking based on `LocalSourceNode`s would be
|
||||
// unable to track across global variables.
|
||||
//
|
||||
// Once the `track` and `backtrack` methods have been fully deprecated, this disjunct can be
|
||||
// removed, and the entire class can extend `ExprNode`. At that point, `TypeTrackingNode` should
|
||||
// be used for type tracking instead of `LocalSourceNode`.
|
||||
this instanceof ModuleVariableNode
|
||||
or
|
||||
// We explicitly include any read of a global variable, as some of these may have local flow going
|
||||
// into them.
|
||||
this = any(ModuleVariableNode mvn).getARead()
|
||||
}
|
||||
|
||||
@@ -59,6 +64,11 @@ class LocalSourceNode extends Node {
|
||||
*/
|
||||
AttrRead getAnAttributeRead(string attrName) { result = getAnAttributeReference(attrName) }
|
||||
|
||||
/**
|
||||
* Gets a write of attribute `attrName` on this node.
|
||||
*/
|
||||
AttrWrite getAnAttributeWrite(string attrName) { result = getAnAttributeReference(attrName) }
|
||||
|
||||
/**
|
||||
* Gets a reference (read or write) of any attribute on this node.
|
||||
*/
|
||||
@@ -73,11 +83,26 @@ class LocalSourceNode extends Node {
|
||||
*/
|
||||
AttrRead getAnAttributeRead() { result = getAnAttributeReference() }
|
||||
|
||||
/**
|
||||
* Gets a write of any attribute on this node.
|
||||
*/
|
||||
AttrWrite getAnAttributeWrite() { result = getAnAttributeReference() }
|
||||
|
||||
/**
|
||||
* Gets a call to this node.
|
||||
*/
|
||||
CallCfgNode getACall() { Cached::call(this, result) }
|
||||
|
||||
/**
|
||||
* Gets a call to the method `methodName` on this node.
|
||||
*
|
||||
* Includes both calls that have the syntactic shape of a method call (as in `obj.m(...)`), and
|
||||
* calls where the callee undergoes some additional local data flow (as in `tmp = obj.m; m(...)`).
|
||||
*/
|
||||
MethodCallNode getAMethodCall(string methodName) {
|
||||
result = this.getAnAttributeRead(methodName).getACall()
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a node that this node may flow to using one heap and/or interprocedural step.
|
||||
*
|
||||
@@ -95,6 +120,53 @@ class LocalSourceNode extends Node {
|
||||
LocalSourceNode backtrack(TypeBackTracker t2, TypeBackTracker t) { t2 = t.step(result, this) }
|
||||
}
|
||||
|
||||
/**
|
||||
* A node that can be used for type tracking or type back-tracking.
|
||||
*
|
||||
* All steps made during type tracking should be between instances of this class.
|
||||
*/
|
||||
class TypeTrackingNode = LocalSourceNode;
|
||||
|
||||
/** Temporary holding ground for the `TypeTrackingNode` class. */
|
||||
private module FutureWork {
|
||||
class FutureTypeTrackingNode extends Node {
|
||||
FutureTypeTrackingNode() {
|
||||
this instanceof LocalSourceNode
|
||||
or
|
||||
this instanceof ModuleVariableNode
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if this node can flow to `nodeTo` in one or more local flow steps.
|
||||
*
|
||||
* For `ModuleVariableNode`s, the only "local" step is to the node itself.
|
||||
* For `LocalSourceNode`s, this is the usual notion of local flow.
|
||||
*/
|
||||
pragma[inline]
|
||||
predicate flowsTo(Node node) {
|
||||
this instanceof ModuleVariableNode and this = node
|
||||
or
|
||||
this.(LocalSourceNode).flowsTo(node)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a node that this node may flow to using one heap and/or interprocedural step.
|
||||
*
|
||||
* See `TypeTracker` for more details about how to use this.
|
||||
*/
|
||||
pragma[inline]
|
||||
TypeTrackingNode track(TypeTracker t2, TypeTracker t) { t = t2.step(this, result) }
|
||||
|
||||
/**
|
||||
* Gets a node that may flow into this one using one heap and/or interprocedural step.
|
||||
*
|
||||
* See `TypeBackTracker` for more details about how to use this.
|
||||
*/
|
||||
pragma[inline]
|
||||
TypeTrackingNode backtrack(TypeBackTracker t2, TypeBackTracker t) { t2 = t.step(result, this) }
|
||||
}
|
||||
}
|
||||
|
||||
cached
|
||||
private module Cached {
|
||||
/**
|
||||
@@ -107,11 +179,21 @@ private module Cached {
|
||||
source = sink
|
||||
or
|
||||
exists(Node second |
|
||||
simpleLocalFlowStep(source, second) and
|
||||
simpleLocalFlowStep*(second, sink)
|
||||
localSourceFlowStep(source, second) and
|
||||
localSourceFlowStep*(second, sink)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper predicate for `hasLocalSource`. Removes any steps go to module variable reads, as these
|
||||
* are already local source nodes in their own right.
|
||||
*/
|
||||
cached
|
||||
private predicate localSourceFlowStep(Node nodeFrom, Node nodeTo) {
|
||||
simpleLocalFlowStep(nodeFrom, nodeTo) and
|
||||
not nodeTo = any(ModuleVariableNode v).getARead()
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `base` flows to the base of `ref` and `ref` has attribute name `attr`.
|
||||
*/
|
||||
@@ -0,0 +1,73 @@
|
||||
/**
|
||||
* INTERNAL: Do not use.
|
||||
*
|
||||
* Provides helper predicates for pretty-printing `DataFlow::Node`s.
|
||||
*
|
||||
* Since these have not been performance optimized, please only use them for
|
||||
* debug-queries or in tests.
|
||||
*/
|
||||
|
||||
private import python
|
||||
private import semmle.python.dataflow.new.DataFlow
|
||||
|
||||
/**
|
||||
* INTERNAL: Do not use.
|
||||
*
|
||||
* Gets the pretty-printed version of the Expr `e`.
|
||||
*/
|
||||
string prettyExpr(Expr e) {
|
||||
not e instanceof Num and
|
||||
not e instanceof StrConst and
|
||||
not e instanceof Subscript and
|
||||
not e instanceof Call and
|
||||
not e instanceof Attribute and
|
||||
result = e.toString()
|
||||
or
|
||||
result = e.(Num).getN()
|
||||
or
|
||||
result =
|
||||
e.(StrConst).getPrefix() + e.(StrConst).getText() +
|
||||
e.(StrConst).getPrefix().regexpReplaceAll("[a-zA-Z]+", "")
|
||||
or
|
||||
result = prettyExpr(e.(Subscript).getObject()) + "[" + prettyExpr(e.(Subscript).getIndex()) + "]"
|
||||
or
|
||||
(
|
||||
if exists(e.(Call).getAnArg()) or exists(e.(Call).getANamedArg())
|
||||
then result = prettyExpr(e.(Call).getFunc()) + "(..)"
|
||||
else result = prettyExpr(e.(Call).getFunc()) + "()"
|
||||
)
|
||||
or
|
||||
result = prettyExpr(e.(Attribute).getObject()) + "." + e.(Attribute).getName()
|
||||
}
|
||||
|
||||
/**
|
||||
* INTERNAL: Do not use.
|
||||
*
|
||||
* Gets the pretty-printed version of the DataFlow::Node `node`
|
||||
*/
|
||||
bindingset[node]
|
||||
string prettyNode(DataFlow::Node node) {
|
||||
if exists(node.asExpr()) then result = prettyExpr(node.asExpr()) else result = node.toString()
|
||||
}
|
||||
|
||||
/**
|
||||
* INTERNAL: Do not use.
|
||||
*
|
||||
* Gets the pretty-printed version of the DataFlow::Node `node`, that is suitable for use
|
||||
* with `TestUtilities.InlineExpectationsTest` (that is, no spaces unless required).
|
||||
*/
|
||||
bindingset[node]
|
||||
string prettyNodeForInlineTest(DataFlow::Node node) {
|
||||
exists(node.asExpr()) and
|
||||
result = prettyExpr(node.asExpr())
|
||||
or
|
||||
exists(Expr e | e = node.(DataFlow::PostUpdateNode).getPreUpdateNode().asExpr() |
|
||||
// since PostUpdateNode both has space in the `[post <thing>]` annotation, and does
|
||||
// not pretty print the pre-update node, we do custom handling of this.
|
||||
result = "[post]" + prettyExpr(e)
|
||||
)
|
||||
or
|
||||
not exists(node.asExpr()) and
|
||||
not exists(Expr e | e = node.(DataFlow::PostUpdateNode).getPreUpdateNode().asExpr()) and
|
||||
result = node.toString()
|
||||
}
|
||||
@@ -1,7 +1,8 @@
|
||||
private import python
|
||||
private import semmle.python.dataflow.new.DataFlow
|
||||
private import semmle.python.dataflow.new.internal.DataFlowPrivate
|
||||
private import semmle.python.dataflow.new.internal.DataFlowPrivate as DataFlowPrivate
|
||||
private import semmle.python.dataflow.new.internal.TaintTrackingPublic
|
||||
private import semmle.python.ApiGraphs
|
||||
|
||||
/**
|
||||
* Holds if `node` should be a sanitizer in all global taint flow configurations
|
||||
@@ -10,35 +11,52 @@ private import semmle.python.dataflow.new.internal.TaintTrackingPublic
|
||||
predicate defaultTaintSanitizer(DataFlow::Node node) { none() }
|
||||
|
||||
/**
|
||||
* Holds if the additional step from `nodeFrom` to `nodeTo` should be included in all
|
||||
* global taint flow configurations.
|
||||
* Holds if default `TaintTracking::Configuration`s should allow implicit reads
|
||||
* of `c` at sinks and inputs to additional taint steps.
|
||||
*/
|
||||
predicate defaultAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
|
||||
localAdditionalTaintStep(nodeFrom, nodeTo)
|
||||
or
|
||||
any(AdditionalTaintStep a).step(nodeFrom, nodeTo)
|
||||
bindingset[node]
|
||||
predicate defaultImplicitTaintRead(DataFlow::Node node, DataFlow::Content c) { none() }
|
||||
|
||||
private module Cached {
|
||||
/**
|
||||
* Holds if the additional step from `nodeFrom` to `nodeTo` should be included in all
|
||||
* global taint flow configurations.
|
||||
*/
|
||||
cached
|
||||
predicate defaultAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
|
||||
localAdditionalTaintStep(nodeFrom, nodeTo)
|
||||
or
|
||||
any(AdditionalTaintStep a).step(nodeFrom, nodeTo)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if taint can flow in one local step from `nodeFrom` to `nodeTo` excluding
|
||||
* local data flow steps. That is, `nodeFrom` and `nodeTo` are likely to represent
|
||||
* different objects.
|
||||
*/
|
||||
cached
|
||||
predicate localAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
|
||||
concatStep(nodeFrom, nodeTo)
|
||||
or
|
||||
subscriptStep(nodeFrom, nodeTo)
|
||||
or
|
||||
stringManipulation(nodeFrom, nodeTo)
|
||||
or
|
||||
containerStep(nodeFrom, nodeTo)
|
||||
or
|
||||
copyStep(nodeFrom, nodeTo)
|
||||
or
|
||||
DataFlowPrivate::forReadStep(nodeFrom, _, nodeTo)
|
||||
or
|
||||
DataFlowPrivate::iterableUnpackingReadStep(nodeFrom, _, nodeTo)
|
||||
or
|
||||
DataFlowPrivate::iterableUnpackingStoreStep(nodeFrom, _, nodeTo)
|
||||
or
|
||||
awaitStep(nodeFrom, nodeTo)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if taint can flow in one local step from `nodeFrom` to `nodeTo` excluding
|
||||
* local data flow steps. That is, `nodeFrom` and `nodeTo` are likely to represent
|
||||
* different objects.
|
||||
*/
|
||||
predicate localAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
|
||||
concatStep(nodeFrom, nodeTo)
|
||||
or
|
||||
subscriptStep(nodeFrom, nodeTo)
|
||||
or
|
||||
stringManipulation(nodeFrom, nodeTo)
|
||||
or
|
||||
containerStep(nodeFrom, nodeTo)
|
||||
or
|
||||
copyStep(nodeFrom, nodeTo)
|
||||
or
|
||||
forStep(nodeFrom, nodeTo)
|
||||
or
|
||||
unpackingAssignmentStep(nodeFrom, nodeTo)
|
||||
}
|
||||
import Cached
|
||||
|
||||
/**
|
||||
* Holds if taint can flow from `nodeFrom` to `nodeTo` with a step related to concatenation.
|
||||
@@ -69,13 +87,13 @@ predicate subscriptStep(DataFlow::CfgNode nodeFrom, DataFlow::CfgNode nodeTo) {
|
||||
*/
|
||||
predicate stringManipulation(DataFlow::CfgNode nodeFrom, DataFlow::CfgNode nodeTo) {
|
||||
// transforming something tainted into a string will make the string tainted
|
||||
exists(CallNode call | call = nodeTo.getNode() |
|
||||
call.getFunction().(NameNode).getId() in ["str", "bytes", "unicode"] and
|
||||
exists(DataFlow::CallCfgNode call | call = nodeTo |
|
||||
(
|
||||
nodeFrom.getNode() = call.getArg(0)
|
||||
call = API::builtin(["str", "bytes", "unicode"]).getACall()
|
||||
or
|
||||
nodeFrom.getNode() = call.getArgByName("object")
|
||||
)
|
||||
call.getFunction().asCfgNode().(NameNode).getId() in ["str", "bytes", "unicode"]
|
||||
) and
|
||||
nodeFrom in [call.getArg(0), call.getArgByName("object")]
|
||||
)
|
||||
or
|
||||
// String methods. Note that this doesn't recognize `meth = "foo".upper; meth()`
|
||||
@@ -142,39 +160,37 @@ predicate stringManipulation(DataFlow::CfgNode nodeFrom, DataFlow::CfgNode nodeT
|
||||
predicate containerStep(DataFlow::CfgNode nodeFrom, DataFlow::Node nodeTo) {
|
||||
// construction by literal
|
||||
// TODO: Not limiting the content argument here feels like a BIG hack, but we currently get nothing for free :|
|
||||
storeStep(nodeFrom, _, nodeTo)
|
||||
DataFlowPrivate::storeStep(nodeFrom, _, nodeTo)
|
||||
or
|
||||
// constructor call
|
||||
exists(CallNode call | call = nodeTo.asCfgNode() |
|
||||
call.getFunction().(NameNode).getId() in [
|
||||
"list", "set", "frozenset", "dict", "defaultdict", "tuple"
|
||||
] and
|
||||
call.getArg(0) = nodeFrom.getNode()
|
||||
exists(DataFlow::CallCfgNode call | call = nodeTo |
|
||||
call = API::builtin(["list", "set", "frozenset", "dict", "tuple"]).getACall() and
|
||||
call.getArg(0) = nodeFrom
|
||||
// TODO: Properly handle defaultdict/namedtuple
|
||||
)
|
||||
or
|
||||
// functions operating on collections
|
||||
exists(CallNode call | call = nodeTo.asCfgNode() |
|
||||
call.getFunction().(NameNode).getId() in ["sorted", "reversed", "iter", "next"] and
|
||||
call.getArg(0) = nodeFrom.getNode()
|
||||
exists(DataFlow::CallCfgNode call | call = nodeTo |
|
||||
call = API::builtin(["sorted", "reversed", "iter", "next"]).getACall() and
|
||||
call.getArg(0) = nodeFrom
|
||||
)
|
||||
or
|
||||
// methods
|
||||
exists(CallNode call, string name | call = nodeTo.asCfgNode() |
|
||||
name in [
|
||||
exists(DataFlow::MethodCallNode call, string methodName | call = nodeTo |
|
||||
methodName in [
|
||||
// general
|
||||
"copy", "pop",
|
||||
// dict
|
||||
"values", "items", "get", "popitem"
|
||||
] and
|
||||
call.getFunction().(AttrNode).getObject(name) = nodeFrom.asCfgNode()
|
||||
call.calls(nodeFrom, methodName)
|
||||
)
|
||||
or
|
||||
// list.append, set.add
|
||||
exists(CallNode call, string name |
|
||||
name in ["append", "add"] and
|
||||
call.getFunction().(AttrNode).getObject(name) =
|
||||
nodeTo.(DataFlow::PostUpdateNode).getPreUpdateNode().asCfgNode() and
|
||||
call.getArg(0) = nodeFrom.getNode()
|
||||
exists(DataFlow::MethodCallNode call, DataFlow::Node obj |
|
||||
call.calls(obj, ["append", "add"]) and
|
||||
obj = nodeTo.(DataFlow::PostUpdateNode).getPreUpdateNode() and
|
||||
call.getArg(0) = nodeFrom
|
||||
)
|
||||
}
|
||||
|
||||
@@ -182,38 +198,16 @@ predicate containerStep(DataFlow::CfgNode nodeFrom, DataFlow::Node nodeTo) {
|
||||
* Holds if taint can flow from `nodeFrom` to `nodeTo` with a step related to copying.
|
||||
*/
|
||||
predicate copyStep(DataFlow::CfgNode nodeFrom, DataFlow::CfgNode nodeTo) {
|
||||
exists(CallNode call | call = nodeTo.getNode() |
|
||||
// Fully qualified: copy.copy, copy.deepcopy
|
||||
(
|
||||
call.getFunction().(NameNode).getId() in ["copy", "deepcopy"]
|
||||
or
|
||||
call.getFunction().(AttrNode).getObject(["copy", "deepcopy"]).(NameNode).getId() = "copy"
|
||||
) and
|
||||
call.getArg(0) = nodeFrom.getNode()
|
||||
exists(DataFlow::CallCfgNode call | call = nodeTo |
|
||||
call = API::moduleImport("copy").getMember(["copy", "deepcopy"]).getACall() and
|
||||
call.getArg(0) = nodeFrom
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if taint can flow from `nodeFrom` to `nodeTo` with a step related to `for`-iteration,
|
||||
* for example `for x in xs`, or `for x,y in points`.
|
||||
* Holds if taint can flow from `nodeFrom` to `nodeTo` with an `await`-step,
|
||||
* such that the whole expression `await x` is tainted if `x` is tainted.
|
||||
*/
|
||||
predicate forStep(DataFlow::CfgNode nodeFrom, DataFlow::EssaNode nodeTo) {
|
||||
exists(EssaNodeDefinition defn, For for |
|
||||
for.getTarget().getAChildNode*() = defn.getDefiningNode().getNode() and
|
||||
nodeTo.getVar() = defn and
|
||||
nodeFrom.asExpr() = for.getIter()
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if taint can flow from `nodeFrom` to `nodeTo` with a step related to iterable unpacking.
|
||||
* Only handles normal assignment (`x,y = calc_point()`), since `for x,y in points` is handled by `forStep`.
|
||||
*/
|
||||
predicate unpackingAssignmentStep(DataFlow::CfgNode nodeFrom, DataFlow::EssaNode nodeTo) {
|
||||
// `a, b = myiterable` or `head, *tail = myiterable` (only Python 3)
|
||||
exists(MultiAssignmentDefinition defn, Assign assign |
|
||||
assign.getATarget().contains(defn.getDefiningNode().getNode()) and
|
||||
nodeTo.getVar() = defn and
|
||||
nodeFrom.asExpr() = assign.getValue()
|
||||
)
|
||||
predicate awaitStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
|
||||
nodeTo.asExpr().(Await).getValue() = nodeFrom.asExpr()
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user