mirror of
https://github.com/github/codeql.git
synced 2025-12-24 04:36:35 +01:00
Merge remote-tracking branch 'origin/main' into jty/python/emailInjection
This commit is contained in:
100
python/.vscode/ql.code-snippets
vendored
100
python/.vscode/ql.code-snippets
vendored
@@ -106,7 +106,7 @@
|
||||
"prefix": "type tracking",
|
||||
"body": [
|
||||
"/** Gets a reference to ${3:a thing}. */",
|
||||
"private DataFlow::LocalSourceNode ${1:myType}(DataFlow::TypeTracker t) {",
|
||||
"private DataFlow::TypeTrackingNode ${1:myType}(DataFlow::TypeTracker t) {",
|
||||
" t.start() and",
|
||||
" result = ${2:value}",
|
||||
" or",
|
||||
@@ -152,4 +152,102 @@
|
||||
]
|
||||
},
|
||||
|
||||
"Type tracking class": {
|
||||
"scope": "ql",
|
||||
"prefix": "type tracking class",
|
||||
"body": [
|
||||
"/**",
|
||||
" * Provides models for the `${TM_SELECTED_TEXT}` class",
|
||||
" *",
|
||||
" * See ${1:https://apiref (TODO)}.",
|
||||
" */",
|
||||
"module ${TM_SELECTED_TEXT/^(.*)\\.([^.]+)$/$2/} {",
|
||||
" /** Gets a reference to the `${TM_SELECTED_TEXT}` class. */",
|
||||
" private API::Node classRef() {",
|
||||
" result = API::moduleImport(\"${TM_SELECTED_TEXT/\\.([^.]+)/\").getMember(\"$1/g}\")",
|
||||
" }",
|
||||
"",
|
||||
" /**",
|
||||
" * A source of instances of `${TM_SELECTED_TEXT}`, extend this class to model new instances.",
|
||||
" *",
|
||||
" * This can include instantiations of the class, return values from function",
|
||||
" * calls, or a special parameter that will be set when functions are called by an external",
|
||||
" * library.",
|
||||
" *",
|
||||
" * Use the predicate `${TM_SELECTED_TEXT/^(.*)\\.([^.]+)$/$2/}::instance()` to get references to instances of `${TM_SELECTED_TEXT}`.",
|
||||
" */",
|
||||
" abstract class InstanceSource extends DataFlow::LocalSourceNode { }",
|
||||
"",
|
||||
" /** A direct instantiation of `${TM_SELECTED_TEXT}`. */",
|
||||
" private class ClassInstantiation extends InstanceSource, DataFlow::CallCfgNode {",
|
||||
" ClassInstantiation() { this = classRef().getACall() }",
|
||||
" }",
|
||||
"",
|
||||
" /** Gets a reference to an instance of `${TM_SELECTED_TEXT}`. */",
|
||||
" private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t) {",
|
||||
" t.start() and",
|
||||
" result instanceof InstanceSource",
|
||||
" or",
|
||||
" exists(DataFlow::TypeTracker t2 | result = instance(t2).track(t2, t))",
|
||||
" }",
|
||||
"",
|
||||
" /** Gets a reference to an instance of `${TM_SELECTED_TEXT}`. */",
|
||||
" DataFlow::Node instance() { instance(DataFlow::TypeTracker::end()).flowsTo(result) }",
|
||||
"",
|
||||
" /**",
|
||||
" * Taint propagation for `${TM_SELECTED_TEXT}`.",
|
||||
" */",
|
||||
" private class InstanceTaintSteps extends InstanceTaintStepsHelper {",
|
||||
" InstanceTaintSteps() { this = \"${TM_SELECTED_TEXT}\" }",
|
||||
" ",
|
||||
" override DataFlow::Node getInstance() { result = instance() }",
|
||||
" ",
|
||||
" override string getAttributeName() { none() }",
|
||||
" ",
|
||||
" override string getMethodName() { none() }",
|
||||
" ",
|
||||
" override string getAsyncMethodName() { none() }",
|
||||
" }",
|
||||
"",
|
||||
" /**",
|
||||
" * Extra taint propagation for `${TM_SELECTED_TEXT}`, not covered by `InstanceTaintSteps`.",
|
||||
" */",
|
||||
" private class AdditionalTaintStep extends TaintTracking::AdditionalTaintStep {",
|
||||
" override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {",
|
||||
" // TODO",
|
||||
" none()",
|
||||
" }",
|
||||
" }",
|
||||
"}",
|
||||
],
|
||||
"description": "Type tracking class (select full class path before inserting)",
|
||||
},
|
||||
"foo": {
|
||||
"scope": "ql",
|
||||
"prefix": "foo",
|
||||
"body": [
|
||||
" /**",
|
||||
" * Taint propagation for `$1`.",
|
||||
" */",
|
||||
" private class InstanceTaintSteps extends InstanceTaintStepsHelper {",
|
||||
" InstanceTaintSteps() { this = \"$1\" }",
|
||||
"",
|
||||
" override DataFlow::Node getInstance() { result = instance() }",
|
||||
"",
|
||||
" override string getAttributeName() { none() }",
|
||||
"",
|
||||
" override string getMethodName() { none() }",
|
||||
"",
|
||||
" override string getAsyncMethodName() { none() }",
|
||||
" }",
|
||||
],
|
||||
},
|
||||
"API graph .getMember chain": {
|
||||
"scope": "ql",
|
||||
"prefix": "api graph .getMember chain",
|
||||
"body": [
|
||||
"API::moduleImport(\"${TM_SELECTED_TEXT/\\.([^.]+)/\").getMember(\"$1/g}\")"
|
||||
],
|
||||
"description": "API graph .getMember chain (select full path before inserting)",
|
||||
},
|
||||
}
|
||||
|
||||
2
python/change-notes/2021-05-21-api-graph-await.md
Normal file
2
python/change-notes/2021-05-21-api-graph-await.md
Normal file
@@ -0,0 +1,2 @@
|
||||
lgtm,codescanning
|
||||
* API graph nodes now contain a `getAwaited()` member predicate, for getting the result of awaiting an item, such as `await foo`.
|
||||
@@ -0,0 +1,2 @@
|
||||
lgtm,codescanning
|
||||
* Added model of SQL execution in `clickhouse-driver` and `aioch` PyPI packages, resulting in additional sinks for the SQL Injection query (`py/sql-injection`). This modeling was originally [submitted as a contribution by @japroc](https://github.com/github/codeql/pull/5889).
|
||||
2
python/change-notes/2021-06-08-twisted-add-modeling.md
Normal file
2
python/change-notes/2021-06-08-twisted-add-modeling.md
Normal file
@@ -0,0 +1,2 @@
|
||||
lgtm,codescanning
|
||||
* Added modeling of sources/sinks when using `twisted` to create web servers.
|
||||
2
python/change-notes/2021-06-09-add-jmespath-modeling.md
Normal file
2
python/change-notes/2021-06-09-add-jmespath-modeling.md
Normal file
@@ -0,0 +1,2 @@
|
||||
lgtm,codescanning
|
||||
* Added modeling of the PyPI package `jmespath`.
|
||||
2
python/change-notes/2021-06-09-rsa-add-modeling.md
Normal file
2
python/change-notes/2021-06-09-rsa-add-modeling.md
Normal file
@@ -0,0 +1,2 @@
|
||||
lgtm,codescanning
|
||||
* Added modeling of the PyPI package `rsa`.
|
||||
@@ -0,0 +1,5 @@
|
||||
lgtm,codescanning
|
||||
* A new class `DataFlow::MethodCallNode` extends `DataFlow::CallCfgNode` with convenient methods for
|
||||
accessing the receiver and method name of a method call.
|
||||
* The `LocalSourceNode` class now has a `getAMethodCall` method, with which one can easily access
|
||||
method calls with the given node as a receiver.
|
||||
@@ -0,0 +1,2 @@
|
||||
lgtm,codescanning
|
||||
* Added modeling of the PyPI package `MarkupSafe`.
|
||||
@@ -0,0 +1,2 @@
|
||||
lgtm,codescanning
|
||||
* Added `HTTP::Server::CookieWrite` concept for statements that sets a cookie in an HTTP response, along with modeling of this in supported web frameworks (aiohttp/flask/django/tornado/twisted).
|
||||
@@ -0,0 +1,2 @@
|
||||
lgtm,codescanning
|
||||
* The DataFlow libraries have been augmented with support for `Configuration`-specific in-place read steps at, for example, sinks and custom taint steps. This means that it is now possible to specify sinks that accept flow with non-empty access paths.
|
||||
2
python/change-notes/2021-06-25-add-peewee-modeling.md
Normal file
2
python/change-notes/2021-06-25-add-peewee-modeling.md
Normal file
@@ -0,0 +1,2 @@
|
||||
lgtm,codescanning
|
||||
* Added modeling of raw SQL execution from the PyPI package `peewee`.
|
||||
2
python/change-notes/2021-07-12-add-typetrackingnode.md
Normal file
2
python/change-notes/2021-07-12-add-typetrackingnode.md
Normal file
@@ -0,0 +1,2 @@
|
||||
lgtm,codescanning
|
||||
* The `track` and `backtrack` methods on `LocalSourceNode` are in the process of being deprecated. When using type trackers, the corresponding methods on `TypeTrackingNode` should be used instead.
|
||||
@@ -0,0 +1,2 @@
|
||||
lgtm,codescanning
|
||||
* Changed the way to provide extra sources/sinks for `@kind path-problem` queries, to avoid a potential performance problem due to re-evaluation of data-flow configurations. Please use the new `<query>Customization.qll` files and extend their classes instead (such as extending the `Sink` class from `python/ql/src/semmle/python/security/dataflow/SqlInjectionCustomizations.qll`). This is relevant for the queries: `py/sql-injection`, `py/code-injection`, `py/command-line-injection`, `py/reflective-xss`, `py/url-redirection`, `py/unsafe-deserialization`, `py/stack-trace-exposure`, `py/path-injection`.
|
||||
4
python/change-notes/2021-07-16-deprecate-importnode.md
Normal file
4
python/change-notes/2021-07-16-deprecate-importnode.md
Normal file
@@ -0,0 +1,4 @@
|
||||
lgtm,codescanning
|
||||
* The `importNode` predicate from the data-flow library has been deprecated. In its place, we
|
||||
recommend using the API graphs library, accessible via `import semmle.python.ApiGraphs`.
|
||||
|
||||
3
python/change-notes/2021-07-28-port-RoDoS-queries.md
Normal file
3
python/change-notes/2021-07-28-port-RoDoS-queries.md
Normal file
@@ -0,0 +1,3 @@
|
||||
lgtm,codescanning
|
||||
* Added an experimental _Inefficient regular expression_ (`py/redos`) query, which is already available in JavaScript.
|
||||
* Added an experimental _Polynomial regular expression used on uncontrolled data_ (`py/polynomial-redos`), which is already available in JavaScript.
|
||||
@@ -0,0 +1,2 @@
|
||||
lgtm,codescanning
|
||||
* Updated _Modification of parameter with default_ (`py/modification-of-default-value`) query to use the new data flow library instead of the old taint tracking library and to remove the use of points-to analysis. You may see differences in the results found by the query, but overall this change should result in a more robust and accurate analysis.
|
||||
@@ -0,0 +1,2 @@
|
||||
lgtm,codescanning
|
||||
* Added modeling of SQL execution in the `Flask-SQLAlchemy` PyPI package, resulting in additional sinks for the SQL Injection query (`py/sql-injection`).
|
||||
@@ -0,0 +1,2 @@
|
||||
lgtm,codescanning
|
||||
* Added modeling of SQL execution in the `SQLAlchemy` PyPI package, resulting in additional sinks for the SQL Injection query (`py/sql-injection`). This modeling was originally [submitted as a contribution by @mrthankyou](https://github.com/github/codeql/pull/5680).
|
||||
@@ -0,0 +1,2 @@
|
||||
lgtm,codescanning
|
||||
* Expanded the query _SQL query built from user-controlled sources_ (`py/sql-injection`) to alert if user-input is added to a TextClause from SQLAlchemy, since that can lead to SQL injection.
|
||||
@@ -0,0 +1,2 @@
|
||||
lgtm,codescanning
|
||||
* Function parameters with default values will now see flow from those values.
|
||||
@@ -0,0 +1,2 @@
|
||||
lgtm,codescanning
|
||||
* The query "Regular expression injection" (`py/regex-injection`) has been promoted from experimental to the main query pack. Its results will now appear by default. This query was originally [submitted as an experimental query by @jorgectf](https://github.com/github/codeql/pull/5442).
|
||||
@@ -0,0 +1,3 @@
|
||||
lgtm,codescanning
|
||||
* Added data-flow from both `x` and `y` to `x or y` and `x and y`, as a slight over-approximation of what is described in the
|
||||
[Python Language Reference](https://docs.python.org/3/reference/expressions.html#boolean-operations).
|
||||
@@ -0,0 +1,2 @@
|
||||
lgtm,codescanning
|
||||
* Improved modeling of decoding through pickle related functions (which can lead to code execution), resulting in additional sinks for the _Deserializing untrusted input_ query (`py/unsafe-deserialization`). Now we fully support `pickle.load`, `pickle.loads`, `pickle.Unpickler`, `marshal.load`, `marshal.loads`, `dill.load`, `dill.loads`, `shelve.open`.
|
||||
@@ -0,0 +1,2 @@
|
||||
codescanning
|
||||
* Problems with extraction that in most cases won't completely break the analysis are now reported as warnings rather than errors.
|
||||
2
python/change-notes/2021-10-26-ruamel.yaml-modeling.md
Normal file
2
python/change-notes/2021-10-26-ruamel.yaml-modeling.md
Normal file
@@ -0,0 +1,2 @@
|
||||
lgtm,codescanning
|
||||
* Added modeling of the `ruamel.yaml` PyPI package, resulting in additional sinks for the _Deserializing untrusted input_ (`py/unsafe-deserialization`) query (since `ruamel.yaml.load` can lead to code execution).
|
||||
4
python/ql/examples/qlpack.lock.yml
Normal file
4
python/ql/examples/qlpack.lock.yml
Normal file
@@ -0,0 +1,4 @@
|
||||
---
|
||||
dependencies: {}
|
||||
compiled: false
|
||||
lockVersion: 1.0.0
|
||||
@@ -1,3 +1,4 @@
|
||||
name: codeql-python-examples
|
||||
version: 0.0.0
|
||||
libraryPathDependencies: codeql-python
|
||||
name: codeql/python-examples
|
||||
version: 0.0.2
|
||||
dependencies:
|
||||
codeql/python-all: "*"
|
||||
|
||||
4
python/ql/lib/qlpack.lock.yml
Normal file
4
python/ql/lib/qlpack.lock.yml
Normal file
@@ -0,0 +1,4 @@
|
||||
---
|
||||
dependencies: {}
|
||||
compiled: false
|
||||
lockVersion: 1.0.0
|
||||
7
python/ql/lib/qlpack.yml
Normal file
7
python/ql/lib/qlpack.yml
Normal file
@@ -0,0 +1,7 @@
|
||||
name: codeql/python-all
|
||||
version: 0.0.2
|
||||
dbscheme: semmlecode.python.dbscheme
|
||||
extractor: python
|
||||
library: true
|
||||
dependencies:
|
||||
codeql/python-upgrades: 0.0.2
|
||||
@@ -55,7 +55,7 @@ module API {
|
||||
/**
|
||||
* Gets a call to the function represented by this API component.
|
||||
*/
|
||||
DataFlow::CallCfgNode getACall() { result = getReturn().getAnImmediateUse() }
|
||||
DataFlow::CallCfgNode getACall() { result = this.getReturn().getAnImmediateUse() }
|
||||
|
||||
/**
|
||||
* Gets a node representing member `m` of this API component.
|
||||
@@ -67,21 +67,21 @@ module API {
|
||||
*/
|
||||
bindingset[m]
|
||||
bindingset[result]
|
||||
Node getMember(string m) { result = getASuccessor(Label::member(m)) }
|
||||
Node getMember(string m) { result = this.getASuccessor(Label::member(m)) }
|
||||
|
||||
/**
|
||||
* Gets a node representing a member of this API component where the name of the member is
|
||||
* not known statically.
|
||||
*/
|
||||
Node getUnknownMember() { result = getASuccessor(Label::unknownMember()) }
|
||||
Node getUnknownMember() { result = this.getASuccessor(Label::unknownMember()) }
|
||||
|
||||
/**
|
||||
* Gets a node representing a member of this API component where the name of the member may
|
||||
* or may not be known statically.
|
||||
*/
|
||||
Node getAMember() {
|
||||
result = getASuccessor(Label::member(_)) or
|
||||
result = getUnknownMember()
|
||||
result = this.getASuccessor(Label::member(_)) or
|
||||
result = this.getUnknownMember()
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -90,18 +90,25 @@ module API {
|
||||
* This predicate may have multiple results when there are multiple invocations of this API component.
|
||||
* Consider using `getACall()` if there is a need to distinguish between individual calls.
|
||||
*/
|
||||
Node getReturn() { result = getASuccessor(Label::return()) }
|
||||
Node getReturn() { result = this.getASuccessor(Label::return()) }
|
||||
|
||||
/**
|
||||
* Gets a node representing a subclass of the class represented by this node.
|
||||
*/
|
||||
Node getASubclass() { result = getASuccessor(Label::subclass()) }
|
||||
Node getASubclass() { result = this.getASuccessor(Label::subclass()) }
|
||||
|
||||
/**
|
||||
* Gets a node representing the result from awaiting this node.
|
||||
*/
|
||||
Node getAwaited() { result = this.getASuccessor(Label::await()) }
|
||||
|
||||
/**
|
||||
* Gets a string representation of the lexicographically least among all shortest access paths
|
||||
* from the root to this node.
|
||||
*/
|
||||
string getPath() { result = min(string p | p = getAPath(Impl::distanceFromRoot(this)) | p) }
|
||||
string getPath() {
|
||||
result = min(string p | p = this.getAPath(Impl::distanceFromRoot(this)) | p)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a node such that there is an edge in the API graph between this node and the other
|
||||
@@ -119,13 +126,13 @@ module API {
|
||||
* Gets a node such that there is an edge in the API graph between this node and the other
|
||||
* one.
|
||||
*/
|
||||
Node getAPredecessor() { result = getAPredecessor(_) }
|
||||
Node getAPredecessor() { result = this.getAPredecessor(_) }
|
||||
|
||||
/**
|
||||
* Gets a node such that there is an edge in the API graph between that other node and
|
||||
* this one.
|
||||
*/
|
||||
Node getASuccessor() { result = getASuccessor(_) }
|
||||
Node getASuccessor() { result = this.getASuccessor(_) }
|
||||
|
||||
/**
|
||||
* Gets the data-flow node that gives rise to this node, if any.
|
||||
@@ -137,16 +144,16 @@ module API {
|
||||
* The location spans column `startcolumn` of line `startline` to
|
||||
* column `endcolumn` of line `endline` in file `filepath`.
|
||||
* For more information, see
|
||||
* [Locations](https://help.semmle.com/QL/learn-ql/locations.html).
|
||||
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
|
||||
*/
|
||||
predicate hasLocationInfo(
|
||||
string filepath, int startline, int startcolumn, int endline, int endcolumn
|
||||
) {
|
||||
getInducingNode().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
|
||||
this.getInducingNode().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
|
||||
or
|
||||
// For nodes that do not have a meaningful location, `path` is the empty string and all other
|
||||
// parameters are zero.
|
||||
not exists(getInducingNode()) and
|
||||
not exists(this.getInducingNode()) and
|
||||
filepath = "" and
|
||||
startline = 0 and
|
||||
startcolumn = 0 and
|
||||
@@ -197,7 +204,7 @@ module API {
|
||||
or
|
||||
this = Impl::MkModuleImport(_) and type = "ModuleImport "
|
||||
|
|
||||
result = type + getPath()
|
||||
result = type + this.getPath()
|
||||
or
|
||||
not exists(this.getPath()) and result = type + "with no path"
|
||||
)
|
||||
@@ -419,13 +426,8 @@ module API {
|
||||
* a value in the module `m`.
|
||||
*/
|
||||
private predicate possible_builtin_defined_in_module(string name, Module m) {
|
||||
exists(NameNode n |
|
||||
not exists(LocalVariable v | n.defines(v)) and
|
||||
n.isStore() and
|
||||
name = n.getId() and
|
||||
name = getBuiltInName() and
|
||||
m = n.getEnclosingModule()
|
||||
)
|
||||
global_name_defined_in_module(name, m) and
|
||||
name = getBuiltInName()
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -440,6 +442,51 @@ module API {
|
||||
m = n.getEnclosingModule()
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `n` is an access of a variable called `name` (which is _not_ the name of a
|
||||
* built-in, and which is _not_ a global defined in the enclosing module) inside the scope `s`.
|
||||
*/
|
||||
private predicate name_possibly_defined_in_import_star(NameNode n, string name, Scope s) {
|
||||
n.isLoad() and
|
||||
name = n.getId() and
|
||||
// Not already defined in an enclosing scope.
|
||||
not exists(LocalVariable v |
|
||||
v.getId() = name and v.getScope() = n.getScope().getEnclosingScope*()
|
||||
) and
|
||||
not name = getBuiltInName() and
|
||||
s = n.getScope().getEnclosingScope*() and
|
||||
exists(potential_import_star_base(s)) and
|
||||
not global_name_defined_in_module(name, n.getEnclosingModule())
|
||||
}
|
||||
|
||||
/** Holds if a global variable called `name` is assigned a value in the module `m`. */
|
||||
private predicate global_name_defined_in_module(string name, Module m) {
|
||||
exists(NameNode n |
|
||||
not exists(LocalVariable v | n.defines(v)) and
|
||||
n.isStore() and
|
||||
name = n.getId() and
|
||||
m = n.getEnclosingModule()
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the API graph node for all modules imported with `from ... import *` inside the scope `s`.
|
||||
*
|
||||
* For example, given
|
||||
*
|
||||
* `from foo.bar import *`
|
||||
*
|
||||
* this would be the API graph node with the path
|
||||
*
|
||||
* `moduleImport("foo").getMember("bar")`
|
||||
*/
|
||||
private TApiNode potential_import_star_base(Scope s) {
|
||||
exists(DataFlow::Node ref |
|
||||
ref.asCfgNode() = any(ImportStarNode n | n.getScope() = s).getModule() and
|
||||
use(result, ref)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `ref` is a use of a node that should have an incoming edge from `base` labeled
|
||||
* `lbl` in the API graph.
|
||||
@@ -469,11 +516,28 @@ module API {
|
||||
exists(DataFlow::Node superclass | pred.flowsTo(superclass) |
|
||||
ref.asExpr().(ClassExpr).getABase() = superclass.asExpr()
|
||||
)
|
||||
or
|
||||
// awaiting
|
||||
exists(Await await, DataFlow::Node awaitedValue |
|
||||
lbl = Label::await() and
|
||||
ref.asExpr() = await and
|
||||
await.getValue() = awaitedValue.asExpr() and
|
||||
pred.flowsTo(awaitedValue)
|
||||
)
|
||||
)
|
||||
or
|
||||
// Built-ins, treated as members of the module `builtins`
|
||||
base = MkModuleImport("builtins") and
|
||||
lbl = Label::member(any(string name | ref = likely_builtin(name)))
|
||||
or
|
||||
// Unknown variables that may belong to a module imported with `import *`
|
||||
exists(Scope s |
|
||||
base = potential_import_star_base(s) and
|
||||
lbl =
|
||||
Label::member(any(string name |
|
||||
name_possibly_defined_in_import_star(ref.asCfgNode(), name, s)
|
||||
))
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -499,7 +563,7 @@ module API {
|
||||
*
|
||||
* The flow from `src` to that node may be inter-procedural.
|
||||
*/
|
||||
private DataFlow::LocalSourceNode trackUseNode(
|
||||
private DataFlow::TypeTrackingNode trackUseNode(
|
||||
DataFlow::LocalSourceNode src, DataFlow::TypeTracker t
|
||||
) {
|
||||
t.start() and
|
||||
@@ -517,7 +581,6 @@ module API {
|
||||
cached
|
||||
DataFlow::LocalSourceNode trackUseNode(DataFlow::LocalSourceNode src) {
|
||||
result = trackUseNode(src, DataFlow::TypeTracker::end()) and
|
||||
// We exclude module variable nodes, as these do not correspond to real uses.
|
||||
not result instanceof DataFlow::ModuleVariableNode
|
||||
}
|
||||
|
||||
@@ -585,5 +648,9 @@ private module Label {
|
||||
/** Gets the `return` edge label. */
|
||||
string return() { result = "getReturn()" }
|
||||
|
||||
/** Gets the `subclass` edge label. */
|
||||
string subclass() { result = "getASubclass()" }
|
||||
|
||||
/** Gets the `await` edge label. */
|
||||
string await() { result = "getAwaited()" }
|
||||
}
|
||||
@@ -49,12 +49,15 @@ abstract class AstNode extends AstNode_ {
|
||||
/** Whether this contains `inner` syntactically */
|
||||
predicate contains(AstNode inner) { this.getAChildNode+() = inner }
|
||||
|
||||
/** Whether this contains `inner` syntactically and `inner` has the same scope as `this` */
|
||||
predicate containsInScope(AstNode inner) {
|
||||
pragma[noinline]
|
||||
private predicate containsInScope(AstNode inner, Scope scope) {
|
||||
this.contains(inner) and
|
||||
this.getScope() = inner.getScope() and
|
||||
not inner instanceof Scope
|
||||
not inner instanceof Scope and
|
||||
scope = this.getScope()
|
||||
}
|
||||
|
||||
/** Whether this contains `inner` syntactically and `inner` has the same scope as `this` */
|
||||
predicate containsInScope(AstNode inner) { this.containsInScope(inner, inner.getScope()) }
|
||||
}
|
||||
|
||||
/* Parents */
|
||||
@@ -67,7 +67,7 @@ class CommentBlock extends @py_comment {
|
||||
* The location spans column `startcolumn` of line `startline` to
|
||||
* column `endcolumn` of line `endline` in file `filepath`.
|
||||
* For more information, see
|
||||
* [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html).
|
||||
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
|
||||
*/
|
||||
predicate hasLocationInfo(
|
||||
string filepath, int startline, int startcolumn, int endline, int endcolumn
|
||||
@@ -4,7 +4,7 @@
|
||||
* provide concrete subclasses.
|
||||
*/
|
||||
|
||||
import python
|
||||
private import python
|
||||
private import semmle.python.dataflow.new.DataFlow
|
||||
private import semmle.python.dataflow.new.RemoteFlowSources
|
||||
private import semmle.python.dataflow.new.TaintTracking
|
||||
@@ -72,6 +72,39 @@ module FileSystemAccess {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A data flow node that writes data to the file system access.
|
||||
*
|
||||
* Extend this class to refine existing API models. If you want to model new APIs,
|
||||
* extend `FileSystemWriteAccess::Range` instead.
|
||||
*/
|
||||
class FileSystemWriteAccess extends FileSystemAccess {
|
||||
override FileSystemWriteAccess::Range range;
|
||||
|
||||
/**
|
||||
* Gets a node that represents data to be written to the file system (possibly with
|
||||
* some transformation happening before it is written, like JSON encoding).
|
||||
*/
|
||||
DataFlow::Node getADataNode() { result = range.getADataNode() }
|
||||
}
|
||||
|
||||
/** Provides a class for modeling new file system writes. */
|
||||
module FileSystemWriteAccess {
|
||||
/**
|
||||
* A data flow node that writes data to the file system access.
|
||||
*
|
||||
* Extend this class to model new APIs. If you want to refine existing API models,
|
||||
* extend `FileSystemWriteAccess` instead.
|
||||
*/
|
||||
abstract class Range extends FileSystemAccess::Range {
|
||||
/**
|
||||
* Gets a node that represents data to be written to the file system (possibly with
|
||||
* some transformation happening before it is written, like JSON encoding).
|
||||
*/
|
||||
abstract DataFlow::Node getADataNode();
|
||||
}
|
||||
}
|
||||
|
||||
/** Provides classes for modeling path-related APIs. */
|
||||
module Path {
|
||||
/**
|
||||
@@ -235,6 +268,35 @@ private class EncodingAdditionalTaintStep extends TaintTracking::AdditionalTaint
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A data-flow node that logs data.
|
||||
*
|
||||
* Extend this class to refine existing API models. If you want to model new APIs,
|
||||
* extend `Logging::Range` instead.
|
||||
*/
|
||||
class Logging extends DataFlow::Node {
|
||||
Logging::Range range;
|
||||
|
||||
Logging() { this = range }
|
||||
|
||||
/** Gets an input that is logged. */
|
||||
DataFlow::Node getAnInput() { result = range.getAnInput() }
|
||||
}
|
||||
|
||||
/** Provides a class for modeling new logging mechanisms. */
|
||||
module Logging {
|
||||
/**
|
||||
* A data-flow node that logs data.
|
||||
*
|
||||
* Extend this class to model new APIs. If you want to refine existing API models,
|
||||
* extend `Logging` instead.
|
||||
*/
|
||||
abstract class Range extends DataFlow::Node {
|
||||
/** Gets an input that is logged. */
|
||||
abstract DataFlow::Node getAnInput();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A data-flow node that dynamically executes Python code.
|
||||
*
|
||||
@@ -293,6 +355,136 @@ module SqlExecution {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A data-flow node that executes a regular expression.
|
||||
*
|
||||
* Extend this class to refine existing API models. If you want to model new APIs,
|
||||
* extend `RegexExecution::Range` instead.
|
||||
*/
|
||||
class RegexExecution extends DataFlow::Node {
|
||||
RegexExecution::Range range;
|
||||
|
||||
RegexExecution() { this = range }
|
||||
|
||||
/** Gets the data flow node for the regex being executed by this node. */
|
||||
DataFlow::Node getRegex() { result = range.getRegex() }
|
||||
|
||||
/** Gets a dataflow node for the string to be searched or matched against. */
|
||||
DataFlow::Node getString() { result = range.getString() }
|
||||
|
||||
/**
|
||||
* Gets the name of this regex execution, typically the name of an executing method.
|
||||
* This is used for nice alert messages and should include the module if possible.
|
||||
*/
|
||||
string getName() { result = range.getName() }
|
||||
}
|
||||
|
||||
/** Provides classes for modeling new regular-expression execution APIs. */
|
||||
module RegexExecution {
|
||||
/**
|
||||
* A data-flow node that executes a regular expression.
|
||||
*
|
||||
* Extend this class to model new APIs. If you want to refine existing API models,
|
||||
* extend `RegexExecution` instead.
|
||||
*/
|
||||
abstract class Range extends DataFlow::Node {
|
||||
/** Gets the data flow node for the regex being executed by this node. */
|
||||
abstract DataFlow::Node getRegex();
|
||||
|
||||
/** Gets a dataflow node for the string to be searched or matched against. */
|
||||
abstract DataFlow::Node getString();
|
||||
|
||||
/**
|
||||
* Gets the name of this regex execution, typically the name of an executing method.
|
||||
* This is used for nice alert messages and should include the module if possible.
|
||||
*/
|
||||
abstract string getName();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A data-flow node that escapes meta-characters, which could be used to prevent
|
||||
* injection attacks.
|
||||
*
|
||||
* Extend this class to refine existing API models. If you want to model new APIs,
|
||||
* extend `Escaping::Range` instead.
|
||||
*/
|
||||
class Escaping extends DataFlow::Node {
|
||||
Escaping::Range range;
|
||||
|
||||
Escaping() {
|
||||
this = range and
|
||||
// escapes that don't have _both_ input/output defined are not valid
|
||||
exists(range.getAnInput()) and
|
||||
exists(range.getOutput())
|
||||
}
|
||||
|
||||
/** Gets an input that will be escaped. */
|
||||
DataFlow::Node getAnInput() { result = range.getAnInput() }
|
||||
|
||||
/** Gets the output that contains the escaped data. */
|
||||
DataFlow::Node getOutput() { result = range.getOutput() }
|
||||
|
||||
/**
|
||||
* Gets the context that this function escapes for, such as `html`, or `url`.
|
||||
*/
|
||||
string getKind() { result = range.getKind() }
|
||||
}
|
||||
|
||||
/** Provides a class for modeling new escaping APIs. */
|
||||
module Escaping {
|
||||
/**
|
||||
* A data-flow node that escapes meta-characters, which could be used to prevent
|
||||
* injection attacks.
|
||||
*
|
||||
* Extend this class to model new APIs. If you want to refine existing API models,
|
||||
* extend `Escaping` instead.
|
||||
*/
|
||||
abstract class Range extends DataFlow::Node {
|
||||
/** Gets an input that will be escaped. */
|
||||
abstract DataFlow::Node getAnInput();
|
||||
|
||||
/** Gets the output that contains the escaped data. */
|
||||
abstract DataFlow::Node getOutput();
|
||||
|
||||
/**
|
||||
* Gets the context that this function escapes for.
|
||||
*
|
||||
* While kinds are represented as strings, this should not be relied upon. Use the
|
||||
* predicates in the `Escaping` module, such as `getHtmlKind`.
|
||||
*/
|
||||
abstract string getKind();
|
||||
}
|
||||
|
||||
/** Gets the escape-kind for escaping a string so it can safely be included in HTML. */
|
||||
string getHtmlKind() { result = "html" }
|
||||
|
||||
/** Gets the escape-kind for escaping a string so it can safely be included in HTML. */
|
||||
string getRegexKind() { result = "regex" }
|
||||
// TODO: If adding an XML kind, update the modeling of the `MarkupSafe` PyPI package.
|
||||
//
|
||||
// Technically it claims to escape for both HTML and XML, but for now we don't have
|
||||
// anything that relies on XML escaping, so I'm going to defer deciding whether they
|
||||
// should be the same kind, or whether they deserve to be treated differently.
|
||||
}
|
||||
|
||||
/**
|
||||
* An escape of a string so it can be safely included in
|
||||
* the body of an HTML element, for example, replacing `{}` in
|
||||
* `<p>{}</p>`.
|
||||
*/
|
||||
class HtmlEscaping extends Escaping {
|
||||
HtmlEscaping() { range.getKind() = Escaping::getHtmlKind() }
|
||||
}
|
||||
|
||||
/**
|
||||
* An escape of a string so it can be safely included in
|
||||
* the body of a regex.
|
||||
*/
|
||||
class RegexEscaping extends Escaping {
|
||||
RegexEscaping() { range.getKind() = Escaping::getRegexKind() }
|
||||
}
|
||||
|
||||
/** Provides classes for modeling HTTP-related APIs. */
|
||||
module HTTP {
|
||||
import semmle.python.web.HttpConstants
|
||||
@@ -345,7 +537,7 @@ module HTTP {
|
||||
/** Gets the URL pattern for this route, if it can be statically determined. */
|
||||
string getUrlPattern() {
|
||||
exists(StrConst str |
|
||||
DataFlow::exprNode(str).(DataFlow::LocalSourceNode).flowsTo(this.getUrlPatternArg()) and
|
||||
this.getUrlPatternArg().getALocalSource() = DataFlow::exprNode(str) and
|
||||
result = str.getText()
|
||||
)
|
||||
}
|
||||
@@ -478,9 +670,7 @@ module HTTP {
|
||||
/** Gets the mimetype of this HTTP response, if it can be statically determined. */
|
||||
string getMimetype() {
|
||||
exists(StrConst str |
|
||||
DataFlow::exprNode(str)
|
||||
.(DataFlow::LocalSourceNode)
|
||||
.flowsTo(this.getMimetypeOrContentTypeArg()) and
|
||||
this.getMimetypeOrContentTypeArg().getALocalSource() = DataFlow::exprNode(str) and
|
||||
result = str.getText().splitAt(";", 0)
|
||||
)
|
||||
or
|
||||
@@ -524,6 +714,62 @@ module HTTP {
|
||||
abstract DataFlow::Node getRedirectLocation();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A data-flow node that sets a cookie in an HTTP response.
|
||||
*
|
||||
* Extend this class to refine existing API models. If you want to model new APIs,
|
||||
* extend `HTTP::CookieWrite::Range` instead.
|
||||
*/
|
||||
class CookieWrite extends DataFlow::Node {
|
||||
CookieWrite::Range range;
|
||||
|
||||
CookieWrite() { this = range }
|
||||
|
||||
/**
|
||||
* Gets the argument, if any, specifying the raw cookie header.
|
||||
*/
|
||||
DataFlow::Node getHeaderArg() { result = range.getHeaderArg() }
|
||||
|
||||
/**
|
||||
* Gets the argument, if any, specifying the cookie name.
|
||||
*/
|
||||
DataFlow::Node getNameArg() { result = range.getNameArg() }
|
||||
|
||||
/**
|
||||
* Gets the argument, if any, specifying the cookie value.
|
||||
*/
|
||||
DataFlow::Node getValueArg() { result = range.getValueArg() }
|
||||
}
|
||||
|
||||
/** Provides a class for modeling new cookie writes on HTTP responses. */
|
||||
module CookieWrite {
|
||||
/**
|
||||
* A data-flow node that sets a cookie in an HTTP response.
|
||||
*
|
||||
* Note: we don't require that this redirect must be sent to a client (a kind of
|
||||
* "if a tree falls in a forest and nobody hears it" situation).
|
||||
*
|
||||
* Extend this class to model new APIs. If you want to refine existing API models,
|
||||
* extend `HttpResponse` instead.
|
||||
*/
|
||||
abstract class Range extends DataFlow::Node {
|
||||
/**
|
||||
* Gets the argument, if any, specifying the raw cookie header.
|
||||
*/
|
||||
abstract DataFlow::Node getHeaderArg();
|
||||
|
||||
/**
|
||||
* Gets the argument, if any, specifying the cookie name.
|
||||
*/
|
||||
abstract DataFlow::Node getNameArg();
|
||||
|
||||
/**
|
||||
* Gets the argument, if any, specifying the cookie value.
|
||||
*/
|
||||
abstract DataFlow::Node getValueArg();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -570,7 +816,7 @@ module Cryptography {
|
||||
/** Provides classes for modeling new key-pair generation APIs. */
|
||||
module KeyGeneration {
|
||||
/** Gets a back-reference to the keysize argument `arg` that was used to generate a new key-pair. */
|
||||
private DataFlow::LocalSourceNode keysizeBacktracker(
|
||||
private DataFlow::TypeTrackingNode keysizeBacktracker(
|
||||
DataFlow::TypeBackTracker t, DataFlow::Node arg
|
||||
) {
|
||||
t.start() and
|
||||
@@ -17,7 +17,7 @@ class Expr extends Expr_, AstNode {
|
||||
* Whether this expression defines variable `v`
|
||||
* If doing dataflow, then consider using SsaVariable.getDefinition() for more precision.
|
||||
*/
|
||||
predicate defines(Variable v) { this.getASubExpression+().defines(v) }
|
||||
predicate defines(Variable v) { this.getASubExpression().defines(v) }
|
||||
|
||||
/** Whether this expression may have a side effect (as determined purely from its syntax) */
|
||||
predicate hasSideEffects() {
|
||||
@@ -240,7 +240,7 @@ class Call extends Call_ {
|
||||
/** Gets the tuple (*) argument of this call, provided there is exactly one. */
|
||||
Expr getStarArg() {
|
||||
count(this.getStarargs()) < 2 and
|
||||
result = getStarargs()
|
||||
result = this.getStarargs()
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,9 +1,7 @@
|
||||
import python
|
||||
|
||||
/** A file */
|
||||
class File extends Container {
|
||||
File() { files(this, _, _, _, _) }
|
||||
|
||||
class File extends Container, @file {
|
||||
/** DEPRECATED: Use `getAbsolutePath` instead. */
|
||||
deprecated override string getName() { result = this.getAbsolutePath() }
|
||||
|
||||
@@ -15,7 +13,7 @@ class File extends Container {
|
||||
* The location spans column `startcolumn` of line `startline` to
|
||||
* column `endcolumn` of line `endline` in file `filepath`.
|
||||
* For more information, see
|
||||
* [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html).
|
||||
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
|
||||
*/
|
||||
predicate hasLocationInfo(
|
||||
string filepath, int startline, int startcolumn, int endline, int endcolumn
|
||||
@@ -34,9 +32,7 @@ class File extends Container {
|
||||
}
|
||||
|
||||
/** Gets a short name for this file (just the file name) */
|
||||
string getShortName() {
|
||||
exists(string simple, string ext | files(this, _, simple, ext, _) | result = simple + ext)
|
||||
}
|
||||
string getShortName() { result = this.getBaseName() }
|
||||
|
||||
private int lastLine() {
|
||||
result = max(int i | exists(Location l | l.getFile() = this and l.getEndLine() = i))
|
||||
@@ -55,7 +51,7 @@ class File extends Container {
|
||||
)
|
||||
}
|
||||
|
||||
override string getAbsolutePath() { files(this, result, _, _, _) }
|
||||
override string getAbsolutePath() { files(this, result) }
|
||||
|
||||
/** Gets the URL of this file. */
|
||||
override string getURL() { result = "file://" + this.getAbsolutePath() + ":0:0:0:0" }
|
||||
@@ -89,7 +85,15 @@ class File extends Container {
|
||||
i.getTest().(Compare).compares(name, op, main) and
|
||||
name.getId() = "__name__" and
|
||||
main.getText() = "__main__"
|
||||
)
|
||||
) and
|
||||
// Exclude files named `__main__.py`. These are often _not_ meant to be run directly, but
|
||||
// contain this construct anyway.
|
||||
//
|
||||
// Their presence in a package (say, `foo`) means one can execute the package directly using
|
||||
// `python -m foo` (which will run the `foo/__main__.py` file). Since being an entry point for
|
||||
// execution means treating imports as absolute, this causes trouble, since when run with
|
||||
// `python -m`, the interpreter uses the usual package semantics.
|
||||
not this.getShortName() = "__main__.py"
|
||||
or
|
||||
// The file contains a `#!` line referencing the python interpreter
|
||||
exists(Comment c |
|
||||
@@ -110,21 +114,16 @@ private predicate occupied_line(File f, int n) {
|
||||
}
|
||||
|
||||
/** A folder (directory) */
|
||||
class Folder extends Container {
|
||||
Folder() { folders(this, _, _) }
|
||||
|
||||
class Folder extends Container, @folder {
|
||||
/** DEPRECATED: Use `getAbsolutePath` instead. */
|
||||
deprecated override string getName() { result = this.getAbsolutePath() }
|
||||
|
||||
/** DEPRECATED: Use `getBaseName` instead. */
|
||||
deprecated string getSimple() { folders(this, _, result) }
|
||||
|
||||
/**
|
||||
* Holds if this element is at the specified location.
|
||||
* The location spans column `startcolumn` of line `startline` to
|
||||
* column `endcolumn` of line `endline` in file `filepath`.
|
||||
* For more information, see
|
||||
* [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html).
|
||||
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
|
||||
*/
|
||||
predicate hasLocationInfo(
|
||||
string filepath, int startline, int startcolumn, int endline, int endcolumn
|
||||
@@ -136,7 +135,7 @@ class Folder extends Container {
|
||||
endcolumn = 0
|
||||
}
|
||||
|
||||
override string getAbsolutePath() { folders(this, result, _) }
|
||||
override string getAbsolutePath() { folders(this, result) }
|
||||
|
||||
/** Gets the URL of this folder. */
|
||||
override string getURL() { result = "folder://" + this.getAbsolutePath() }
|
||||
@@ -257,7 +256,7 @@ abstract class Container extends @container {
|
||||
* </table>
|
||||
*/
|
||||
string getBaseName() {
|
||||
result = getAbsolutePath().regexpCapture(".*/(([^/]*?)(?:\\.([^.]*))?)", 1)
|
||||
result = this.getAbsolutePath().regexpCapture(".*/(([^/]*?)(?:\\.([^.]*))?)", 1)
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -283,7 +282,9 @@ abstract class Container extends @container {
|
||||
* <tr><td>"/tmp/x.tar.gz"</td><td>"gz"</td></tr>
|
||||
* </table>
|
||||
*/
|
||||
string getExtension() { result = getAbsolutePath().regexpCapture(".*/([^/]*?)(\\.([^.]*))?", 3) }
|
||||
string getExtension() {
|
||||
result = this.getAbsolutePath().regexpCapture(".*/([^/]*?)(\\.([^.]*))?", 3)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the stem of this container, that is, the prefix of its base name up to
|
||||
@@ -302,7 +303,9 @@ abstract class Container extends @container {
|
||||
* <tr><td>"/tmp/x.tar.gz"</td><td>"x.tar"</td></tr>
|
||||
* </table>
|
||||
*/
|
||||
string getStem() { result = getAbsolutePath().regexpCapture(".*/([^/]*?)(?:\\.([^.]*))?", 1) }
|
||||
string getStem() {
|
||||
result = this.getAbsolutePath().regexpCapture(".*/([^/]*?)(?:\\.([^.]*))?", 1)
|
||||
}
|
||||
|
||||
File getFile(string baseName) {
|
||||
result = this.getAFile() and
|
||||
@@ -324,7 +327,7 @@ abstract class Container extends @container {
|
||||
/**
|
||||
* Gets a URL representing the location of this container.
|
||||
*
|
||||
* For more information see [Providing URLs](https://help.semmle.com/QL/learn-ql/ql/locations.html#providing-urls).
|
||||
* For more information see [Providing URLs](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/#providing-urls).
|
||||
*/
|
||||
abstract string getURL();
|
||||
|
||||
@@ -430,7 +433,7 @@ class Location extends @location {
|
||||
* The location spans column `startcolumn` of line `startline` to
|
||||
* column `endcolumn` of line `endline` in file `filepath`.
|
||||
* For more information, see
|
||||
* [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html).
|
||||
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
|
||||
*/
|
||||
predicate hasLocationInfo(
|
||||
string filepath, int startline, int startcolumn, int endline, int endcolumn
|
||||
@@ -458,7 +461,7 @@ class Line extends @py_line {
|
||||
* The location spans column `startcolumn` of line `startline` to
|
||||
* column `endcolumn` of line `endline` in file `filepath`.
|
||||
* For more information, see
|
||||
* [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html).
|
||||
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
|
||||
*/
|
||||
predicate hasLocationInfo(
|
||||
string filepath, int startline, int startcolumn, int endline, int endcolumn
|
||||
@@ -653,6 +653,8 @@ class DefinitionNode extends ControlFlowNode {
|
||||
DefinitionNode() {
|
||||
exists(Assign a | a.getATarget().getAFlowNode() = this)
|
||||
or
|
||||
exists(AnnAssign a | a.getTarget().getAFlowNode() = this and exists(a.getValue()))
|
||||
or
|
||||
exists(Alias a | a.getAsname().getAFlowNode() = this)
|
||||
or
|
||||
augstore(_, this)
|
||||
@@ -795,6 +797,9 @@ private AstNode assigned_value(Expr lhs) {
|
||||
/* lhs = result */
|
||||
exists(Assign a | a.getATarget() = lhs and result = a.getValue())
|
||||
or
|
||||
/* lhs : annotation = result */
|
||||
exists(AnnAssign a | a.getTarget() = lhs and result = a.getValue())
|
||||
or
|
||||
/* import result as lhs */
|
||||
exists(Alias a | a.getAsname() = lhs and result = a.getValue())
|
||||
or
|
||||
@@ -846,9 +851,9 @@ class ForNode extends ControlFlowNode {
|
||||
|
||||
/** Holds if this `for` statement causes iteration over `sequence` storing each step of the iteration in `target` */
|
||||
predicate iterates(ControlFlowNode target, ControlFlowNode sequence) {
|
||||
sequence = getSequence() and
|
||||
target = possibleTarget() and
|
||||
not target = unrolledSuffix().possibleTarget()
|
||||
sequence = this.getSequence() and
|
||||
target = this.possibleTarget() and
|
||||
not target = this.unrolledSuffix().possibleTarget()
|
||||
}
|
||||
|
||||
/** Gets the sequence node for this `for` statement. */
|
||||
@@ -1106,7 +1111,7 @@ class BasicBlock extends @py_flow_node {
|
||||
* The location spans column `startcolumn` of line `startline` to
|
||||
* column `endcolumn` of line `endline` in file `filepath`.
|
||||
* For more information, see
|
||||
* [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html).
|
||||
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
|
||||
*/
|
||||
predicate hasLocationInfo(
|
||||
string filepath, int startline, int startcolumn, int endline, int endcolumn
|
||||
@@ -4,23 +4,33 @@
|
||||
|
||||
// If you add modeling of a new framework/library, remember to add it it to the docs in
|
||||
// `docs/codeql/support/reusables/frameworks.rst`
|
||||
private import semmle.python.frameworks.Aioch
|
||||
private import semmle.python.frameworks.Aiohttp
|
||||
private import semmle.python.frameworks.ClickhouseDriver
|
||||
private import semmle.python.frameworks.Cryptodome
|
||||
private import semmle.python.frameworks.Cryptography
|
||||
private import semmle.python.frameworks.Dill
|
||||
private import semmle.python.frameworks.Django
|
||||
private import semmle.python.frameworks.Fabric
|
||||
private import semmle.python.frameworks.Flask
|
||||
private import semmle.python.frameworks.FlaskSqlAlchemy
|
||||
private import semmle.python.frameworks.Idna
|
||||
private import semmle.python.frameworks.Invoke
|
||||
private import semmle.python.frameworks.Jmespath
|
||||
private import semmle.python.frameworks.MarkupSafe
|
||||
private import semmle.python.frameworks.Multidict
|
||||
private import semmle.python.frameworks.Mysql
|
||||
private import semmle.python.frameworks.MySQLdb
|
||||
private import semmle.python.frameworks.Peewee
|
||||
private import semmle.python.frameworks.Psycopg2
|
||||
private import semmle.python.frameworks.PyMySQL
|
||||
private import semmle.python.frameworks.Rsa
|
||||
private import semmle.python.frameworks.RuamelYaml
|
||||
private import semmle.python.frameworks.Simplejson
|
||||
private import semmle.python.frameworks.SqlAlchemy
|
||||
private import semmle.python.frameworks.Stdlib
|
||||
private import semmle.python.frameworks.Tornado
|
||||
private import semmle.python.frameworks.Twisted
|
||||
private import semmle.python.frameworks.Ujson
|
||||
private import semmle.python.frameworks.Yaml
|
||||
private import semmle.python.frameworks.Yarl
|
||||
@@ -58,6 +58,7 @@ class Function extends Function_, Scope, AstNode {
|
||||
/** Gets the name of the nth argument (for simple arguments) */
|
||||
string getArgName(int index) { result = this.getArg(index).(Name).getId() }
|
||||
|
||||
/** Gets the parameter of this function with the name `name`. */
|
||||
Parameter getArgByName(string name) {
|
||||
(
|
||||
result = this.getAnArg()
|
||||
@@ -9,6 +9,7 @@ class ConditionBlock extends BasicBlock {
|
||||
}
|
||||
|
||||
/** Basic blocks controlled by this condition, i.e. those BBs for which the condition is testIsTrue */
|
||||
pragma[nomagic]
|
||||
predicate controls(BasicBlock controlled, boolean testIsTrue) {
|
||||
/*
|
||||
* For this block to control the block 'controlled' with 'testIsTrue' the following must be true:
|
||||
@@ -31,7 +31,7 @@ class ImportExpr extends ImportExpr_ {
|
||||
// relative imports are no longer allowed in Python 3
|
||||
major_version() < 3 and
|
||||
// and can be explicitly turned off in later versions of Python 2
|
||||
not getEnclosingModule().hasFromFuture("absolute_import")
|
||||
not this.getEnclosingModule().hasFromFuture("absolute_import")
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -53,8 +53,8 @@ class ImportExpr extends ImportExpr_ {
|
||||
* the name of the topmost module that will be imported.
|
||||
*/
|
||||
private string relativeTopName() {
|
||||
getLevel() = -1 and
|
||||
result = basePackageName(1) + "." + this.getTopName() and
|
||||
this.getLevel() = -1 and
|
||||
result = this.basePackageName(1) + "." + this.getTopName() and
|
||||
valid_module_name(result)
|
||||
}
|
||||
|
||||
@@ -62,7 +62,7 @@ class ImportExpr extends ImportExpr_ {
|
||||
if this.getLevel() <= 0
|
||||
then result = this.getTopName()
|
||||
else (
|
||||
result = basePackageName(this.getLevel()) and
|
||||
result = this.basePackageName(this.getLevel()) and
|
||||
valid_module_name(result)
|
||||
)
|
||||
}
|
||||
@@ -73,17 +73,17 @@ class ImportExpr extends ImportExpr_ {
|
||||
* which may not be the name of the module.
|
||||
*/
|
||||
string bottomModuleName() {
|
||||
result = relativeTopName() + this.remainderOfName()
|
||||
result = this.relativeTopName() + this.remainderOfName()
|
||||
or
|
||||
not exists(relativeTopName()) and
|
||||
not exists(this.relativeTopName()) and
|
||||
result = this.qualifiedTopName() + this.remainderOfName()
|
||||
}
|
||||
|
||||
/** Gets the name of topmost module or package being imported */
|
||||
string topModuleName() {
|
||||
result = relativeTopName()
|
||||
result = this.relativeTopName()
|
||||
or
|
||||
not exists(relativeTopName()) and
|
||||
not exists(this.relativeTopName()) and
|
||||
result = this.qualifiedTopName()
|
||||
}
|
||||
|
||||
@@ -94,7 +94,7 @@ class ImportExpr extends ImportExpr_ {
|
||||
*/
|
||||
string getImportedModuleName() {
|
||||
exists(string bottomName | bottomName = this.bottomModuleName() |
|
||||
if this.isTop() then result = topModuleName() else result = bottomName
|
||||
if this.isTop() then result = this.topModuleName() else result = bottomName
|
||||
)
|
||||
}
|
||||
|
||||
@@ -86,13 +86,13 @@ class Module extends Module_, Scope, AstNode {
|
||||
/** Gets the package containing this module (or parent package if this is a package) */
|
||||
Module getPackage() {
|
||||
this.getName().matches("%.%") and
|
||||
result.getName() = getName().regexpReplaceAll("\\.[^.]*$", "")
|
||||
result.getName() = this.getName().regexpReplaceAll("\\.[^.]*$", "")
|
||||
}
|
||||
|
||||
/** Gets the name of the package containing this module */
|
||||
string getPackageName() {
|
||||
this.getName().matches("%.%") and
|
||||
result = getName().regexpReplaceAll("\\.[^.]*$", "")
|
||||
result = this.getName().regexpReplaceAll("\\.[^.]*$", "")
|
||||
}
|
||||
|
||||
/** Gets the metrics for this module */
|
||||
@@ -7,6 +7,7 @@
|
||||
*/
|
||||
|
||||
import python
|
||||
import semmle.python.RegexTreeView
|
||||
|
||||
private newtype TPrintAstConfiguration = MkPrintAstConfiguration()
|
||||
|
||||
@@ -51,8 +52,10 @@ private newtype TPrintAstNode =
|
||||
TStmtListNode(StmtList list) {
|
||||
shouldPrint(list.getAnItem(), _) and
|
||||
not list = any(Module mod).getBody() and
|
||||
not forall(AstNode child | child = list.getAnItem() | isNotNeeded(child)) and
|
||||
exists(list.getAnItem())
|
||||
not forall(AstNode child | child = list.getAnItem() | isNotNeeded(child))
|
||||
} or
|
||||
TRegExpTermNode(RegExpTerm term) {
|
||||
exists(StrConst str | term.getRootTerm() = getParsedRegExp(str) and shouldPrint(str, _))
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -419,6 +422,42 @@ class ParameterNode extends AstElementNode {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A print node for a `StrConst`.
|
||||
*
|
||||
* The string has a child, if the child is used as a regular expression,
|
||||
* which is the root of the regular expression.
|
||||
*/
|
||||
class StrConstNode extends AstElementNode {
|
||||
override StrConst element;
|
||||
|
||||
override PrintAstNode getChild(int childIndex) {
|
||||
childIndex = 0 and result.(RegExpTermNode).getTerm() = getParsedRegExp(element)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A print node for a regular expression term.
|
||||
*/
|
||||
class RegExpTermNode extends TRegExpTermNode, PrintAstNode {
|
||||
RegExpTerm term;
|
||||
|
||||
RegExpTermNode() { this = TRegExpTermNode(term) }
|
||||
|
||||
/** Gets the `RegExpTerm` for this node. */
|
||||
RegExpTerm getTerm() { result = term }
|
||||
|
||||
override PrintAstNode getChild(int childIndex) {
|
||||
result.(RegExpTermNode).getTerm() = term.getChild(childIndex)
|
||||
}
|
||||
|
||||
override string toString() {
|
||||
result = "[" + strictconcat(term.getPrimaryQLClass(), " | ") + "] " + term.toString()
|
||||
}
|
||||
|
||||
override Location getLocation() { result = term.getLocation() }
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the `i`th child from `node` ordered by location.
|
||||
*/
|
||||
@@ -447,7 +486,7 @@ private module PrettyPrinting {
|
||||
string getQlClass(AstNode a) {
|
||||
shouldPrint(a, _) and
|
||||
(
|
||||
not exists(getQlCustomClass(a)) and result = a.toString()
|
||||
not exists(getQlCustomClass(a)) and result = strictconcat(a.toString(), " | ")
|
||||
or
|
||||
result = strictconcat(getQlCustomClass(a), " | ")
|
||||
)
|
||||
998
python/ql/lib/semmle/python/RegexTreeView.qll
Normal file
998
python/ql/lib/semmle/python/RegexTreeView.qll
Normal file
@@ -0,0 +1,998 @@
|
||||
/** Provides a class hierarchy corresponding to a parse tree of regular expressions. */
|
||||
|
||||
import python
|
||||
private import semmle.python.regex
|
||||
|
||||
/**
|
||||
* An element containing a regular expression term, that is, either
|
||||
* a string literal (parsed as a regular expression)
|
||||
* or another regular expression term.
|
||||
*
|
||||
* For sequences and alternations, we require at least one child.
|
||||
* Otherwise, we wish to represent the term differently.
|
||||
* This avoids multiple representations of the same term.
|
||||
*/
|
||||
newtype TRegExpParent =
|
||||
/** A string literal used as a regular expression */
|
||||
TRegExpLiteral(Regex re) or
|
||||
/** A quantified term */
|
||||
TRegExpQuantifier(Regex re, int start, int end) { re.qualifiedItem(start, end, _, _) } or
|
||||
/** A sequence term */
|
||||
TRegExpSequence(Regex re, int start, int end) {
|
||||
re.sequence(start, end) and
|
||||
exists(seqChild(re, start, end, 1)) // if a sequence does not have more than one element, it should be treated as that element instead.
|
||||
} or
|
||||
/** An alternation term */
|
||||
TRegExpAlt(Regex re, int start, int end) {
|
||||
re.alternation(start, end) and
|
||||
exists(int part_end |
|
||||
re.alternationOption(start, end, start, part_end) and
|
||||
part_end < end
|
||||
) // if an alternation does not have more than one element, it should be treated as that element instead.
|
||||
} or
|
||||
/** A character class term */
|
||||
TRegExpCharacterClass(Regex re, int start, int end) { re.charSet(start, end) } or
|
||||
/** A character range term */
|
||||
TRegExpCharacterRange(Regex re, int start, int end) { re.charRange(_, start, _, _, end) } or
|
||||
/** A group term */
|
||||
TRegExpGroup(Regex re, int start, int end) { re.group(start, end) } or
|
||||
/** A special character */
|
||||
TRegExpSpecialChar(Regex re, int start, int end) { re.specialCharacter(start, end, _) } or
|
||||
/** A normal character */
|
||||
TRegExpNormalChar(Regex re, int start, int end) { re.normalCharacter(start, end) } or
|
||||
/** A back reference */
|
||||
TRegExpBackRef(Regex re, int start, int end) { re.backreference(start, end) }
|
||||
|
||||
/**
|
||||
* An element containing a regular expression term, that is, either
|
||||
* a string literal (parsed as a regular expression)
|
||||
* or another regular expression term.
|
||||
*/
|
||||
class RegExpParent extends TRegExpParent {
|
||||
/** Gets a textual representation of this element. */
|
||||
string toString() { result = "RegExpParent" }
|
||||
|
||||
/** Gets the `i`th child term. */
|
||||
abstract RegExpTerm getChild(int i);
|
||||
|
||||
/** Gets a child term . */
|
||||
RegExpTerm getAChild() { result = this.getChild(_) }
|
||||
|
||||
/** Gets the number of child terms. */
|
||||
int getNumChild() { result = count(this.getAChild()) }
|
||||
|
||||
/** Gets the associated regex. */
|
||||
abstract Regex getRegex();
|
||||
}
|
||||
|
||||
/** A string literal used as a regular expression */
|
||||
class RegExpLiteral extends TRegExpLiteral, RegExpParent {
|
||||
Regex re;
|
||||
|
||||
RegExpLiteral() { this = TRegExpLiteral(re) }
|
||||
|
||||
override RegExpTerm getChild(int i) { i = 0 and result.getRegex() = re and result.isRootTerm() }
|
||||
|
||||
/** Holds if dot, `.`, matches all characters, including newlines. */
|
||||
predicate isDotAll() { re.getAMode() = "DOTALL" }
|
||||
|
||||
/** Holds if this regex matching is case-insensitive for this regex. */
|
||||
predicate isIgnoreCase() { re.getAMode() = "IGNORECASE" }
|
||||
|
||||
/** Get a string representing all modes for this regex. */
|
||||
string getFlags() { result = concat(string mode | mode = re.getAMode() | mode, " | ") }
|
||||
|
||||
override Regex getRegex() { result = re }
|
||||
|
||||
/** Gets the primary QL class for this regex. */
|
||||
string getPrimaryQLClass() { result = "RegExpLiteral" }
|
||||
}
|
||||
|
||||
/**
|
||||
* A regular expression term, that is, a syntactic part of a regular expression.
|
||||
*/
|
||||
class RegExpTerm extends RegExpParent {
|
||||
Regex re;
|
||||
int start;
|
||||
int end;
|
||||
|
||||
RegExpTerm() {
|
||||
this = TRegExpAlt(re, start, end)
|
||||
or
|
||||
this = TRegExpBackRef(re, start, end)
|
||||
or
|
||||
this = TRegExpCharacterClass(re, start, end)
|
||||
or
|
||||
this = TRegExpCharacterRange(re, start, end)
|
||||
or
|
||||
this = TRegExpNormalChar(re, start, end)
|
||||
or
|
||||
this = TRegExpGroup(re, start, end)
|
||||
or
|
||||
this = TRegExpQuantifier(re, start, end)
|
||||
or
|
||||
this = TRegExpSequence(re, start, end)
|
||||
or
|
||||
this = TRegExpSpecialChar(re, start, end)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the outermost term of this regular expression.
|
||||
*/
|
||||
RegExpTerm getRootTerm() {
|
||||
this.isRootTerm() and result = this
|
||||
or
|
||||
result = this.getParent().(RegExpTerm).getRootTerm()
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if this term is part of a string literal
|
||||
* that is interpreted as a regular expression.
|
||||
*/
|
||||
predicate isUsedAsRegExp() { any() }
|
||||
|
||||
/**
|
||||
* Holds if this is the root term of a regular expression.
|
||||
*/
|
||||
predicate isRootTerm() { start = 0 and end = re.getText().length() }
|
||||
|
||||
override RegExpTerm getChild(int i) {
|
||||
result = this.(RegExpAlt).getChild(i)
|
||||
or
|
||||
result = this.(RegExpBackRef).getChild(i)
|
||||
or
|
||||
result = this.(RegExpCharacterClass).getChild(i)
|
||||
or
|
||||
result = this.(RegExpCharacterRange).getChild(i)
|
||||
or
|
||||
result = this.(RegExpNormalChar).getChild(i)
|
||||
or
|
||||
result = this.(RegExpGroup).getChild(i)
|
||||
or
|
||||
result = this.(RegExpQuantifier).getChild(i)
|
||||
or
|
||||
result = this.(RegExpSequence).getChild(i)
|
||||
or
|
||||
result = this.(RegExpSpecialChar).getChild(i)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the parent term of this regular expression term, or the
|
||||
* regular expression literal if this is the root term.
|
||||
*/
|
||||
RegExpParent getParent() { result.getAChild() = this }
|
||||
|
||||
override Regex getRegex() { result = re }
|
||||
|
||||
/** Gets the offset at which this term starts. */
|
||||
int getStart() { result = start }
|
||||
|
||||
/** Gets the offset at which this term ends. */
|
||||
int getEnd() { result = end }
|
||||
|
||||
override string toString() { result = re.getText().substring(start, end) }
|
||||
|
||||
/**
|
||||
* Gets the location of the surrounding regex, as locations inside the regex do not exist.
|
||||
* To get location information corresponding to the term inside the regex,
|
||||
* use `hasLocationInfo`.
|
||||
*/
|
||||
Location getLocation() { result = re.getLocation() }
|
||||
|
||||
/** Holds if this term is found at the specified location offsets. */
|
||||
predicate hasLocationInfo(
|
||||
string filepath, int startline, int startcolumn, int endline, int endcolumn
|
||||
) {
|
||||
exists(int re_start, int re_end |
|
||||
re.getLocation().hasLocationInfo(filepath, startline, re_start, endline, re_end) and
|
||||
startcolumn = re_start + start + 4 and
|
||||
endcolumn = re_start + end + 3
|
||||
)
|
||||
}
|
||||
|
||||
/** Gets the file in which this term is found. */
|
||||
File getFile() { result = this.getLocation().getFile() }
|
||||
|
||||
/** Gets the raw source text of this term. */
|
||||
string getRawValue() { result = this.toString() }
|
||||
|
||||
/** Gets the string literal in which this term is found. */
|
||||
RegExpLiteral getLiteral() { result = TRegExpLiteral(re) }
|
||||
|
||||
/** Gets the regular expression term that is matched (textually) before this one, if any. */
|
||||
RegExpTerm getPredecessor() {
|
||||
exists(RegExpTerm parent | parent = this.getParent() |
|
||||
result = parent.(RegExpSequence).previousElement(this)
|
||||
or
|
||||
not exists(parent.(RegExpSequence).previousElement(this)) and
|
||||
not parent instanceof RegExpSubPattern and
|
||||
result = parent.getPredecessor()
|
||||
)
|
||||
}
|
||||
|
||||
/** Gets the regular expression term that is matched (textually) after this one, if any. */
|
||||
RegExpTerm getSuccessor() {
|
||||
exists(RegExpTerm parent | parent = this.getParent() |
|
||||
result = parent.(RegExpSequence).nextElement(this)
|
||||
or
|
||||
not exists(parent.(RegExpSequence).nextElement(this)) and
|
||||
not parent instanceof RegExpSubPattern and
|
||||
result = parent.getSuccessor()
|
||||
)
|
||||
}
|
||||
|
||||
/** Gets the primary QL class for this term. */
|
||||
string getPrimaryQLClass() { result = "RegExpTerm" }
|
||||
}
|
||||
|
||||
/**
|
||||
* A quantified regular expression term.
|
||||
*
|
||||
* Example:
|
||||
*
|
||||
* ```
|
||||
* ((ECMA|Java)[sS]cript)*
|
||||
* ```
|
||||
*/
|
||||
class RegExpQuantifier extends RegExpTerm, TRegExpQuantifier {
|
||||
int part_end;
|
||||
boolean maybe_empty;
|
||||
boolean may_repeat_forever;
|
||||
|
||||
RegExpQuantifier() {
|
||||
this = TRegExpQuantifier(re, start, end) and
|
||||
re.qualifiedPart(start, part_end, end, maybe_empty, may_repeat_forever)
|
||||
}
|
||||
|
||||
override RegExpTerm getChild(int i) {
|
||||
i = 0 and
|
||||
result.getRegex() = re and
|
||||
result.getStart() = start and
|
||||
result.getEnd() = part_end
|
||||
}
|
||||
|
||||
/** Hols if this term may match an unlimited number of times. */
|
||||
predicate mayRepeatForever() { may_repeat_forever = true }
|
||||
|
||||
/** Gets the qualifier for this term. That is e.g "?" for "a?". */
|
||||
string getQualifier() { result = re.getText().substring(part_end, end) }
|
||||
|
||||
override string getPrimaryQLClass() { result = "RegExpQuantifier" }
|
||||
}
|
||||
|
||||
/**
|
||||
* A regular expression term that permits unlimited repetitions.
|
||||
*/
|
||||
class InfiniteRepetitionQuantifier extends RegExpQuantifier {
|
||||
InfiniteRepetitionQuantifier() { this.mayRepeatForever() }
|
||||
}
|
||||
|
||||
/**
|
||||
* A star-quantified term.
|
||||
*
|
||||
* Example:
|
||||
*
|
||||
* ```
|
||||
* \w*
|
||||
* ```
|
||||
*/
|
||||
class RegExpStar extends InfiniteRepetitionQuantifier {
|
||||
RegExpStar() { this.getQualifier().charAt(0) = "*" }
|
||||
|
||||
override string getPrimaryQLClass() { result = "RegExpStar" }
|
||||
}
|
||||
|
||||
/**
|
||||
* A plus-quantified term.
|
||||
*
|
||||
* Example:
|
||||
*
|
||||
* ```
|
||||
* \w+
|
||||
* ```
|
||||
*/
|
||||
class RegExpPlus extends InfiniteRepetitionQuantifier {
|
||||
RegExpPlus() { this.getQualifier().charAt(0) = "+" }
|
||||
|
||||
override string getPrimaryQLClass() { result = "RegExpPlus" }
|
||||
}
|
||||
|
||||
/**
|
||||
* An optional term.
|
||||
*
|
||||
* Example:
|
||||
*
|
||||
* ```
|
||||
* ;?
|
||||
* ```
|
||||
*/
|
||||
class RegExpOpt extends RegExpQuantifier {
|
||||
RegExpOpt() { this.getQualifier().charAt(0) = "?" }
|
||||
|
||||
override string getPrimaryQLClass() { result = "RegExpOpt" }
|
||||
}
|
||||
|
||||
/**
|
||||
* A range-quantified term
|
||||
*
|
||||
* Examples:
|
||||
*
|
||||
* ```
|
||||
* \w{2,4}
|
||||
* \w{2,}
|
||||
* \w{2}
|
||||
* ```
|
||||
*/
|
||||
class RegExpRange extends RegExpQuantifier {
|
||||
string upper;
|
||||
string lower;
|
||||
|
||||
RegExpRange() { re.multiples(part_end, end, lower, upper) }
|
||||
|
||||
/** Gets the string defining the upper bound of this range, if any. */
|
||||
string getUpper() { result = upper }
|
||||
|
||||
/** Gets the string defining the lower bound of this range, if any. */
|
||||
string getLower() { result = lower }
|
||||
|
||||
/**
|
||||
* Gets the upper bound of the range, if any.
|
||||
*
|
||||
* If there is no upper bound, any number of repetitions is allowed.
|
||||
* For a term of the form `r{lo}`, both the lower and the upper bound
|
||||
* are `lo`.
|
||||
*/
|
||||
int getUpperBound() { result = this.getUpper().toInt() }
|
||||
|
||||
/** Gets the lower bound of the range. */
|
||||
int getLowerBound() { result = this.getLower().toInt() }
|
||||
|
||||
override string getPrimaryQLClass() { result = "RegExpRange" }
|
||||
}
|
||||
|
||||
/**
|
||||
* A sequence term.
|
||||
*
|
||||
* Example:
|
||||
*
|
||||
* ```
|
||||
* (ECMA|Java)Script
|
||||
* ```
|
||||
*
|
||||
* This is a sequence with the elements `(ECMA|Java)` and `Script`.
|
||||
*/
|
||||
class RegExpSequence extends RegExpTerm, TRegExpSequence {
|
||||
RegExpSequence() { this = TRegExpSequence(re, start, end) }
|
||||
|
||||
override RegExpTerm getChild(int i) { result = seqChild(re, start, end, i) }
|
||||
|
||||
/** Gets the element preceding `element` in this sequence. */
|
||||
RegExpTerm previousElement(RegExpTerm element) { element = this.nextElement(result) }
|
||||
|
||||
/** Gets the element following `element` in this sequence. */
|
||||
RegExpTerm nextElement(RegExpTerm element) {
|
||||
exists(int i |
|
||||
element = this.getChild(i) and
|
||||
result = this.getChild(i + 1)
|
||||
)
|
||||
}
|
||||
|
||||
override string getPrimaryQLClass() { result = "RegExpSequence" }
|
||||
}
|
||||
|
||||
pragma[nomagic]
|
||||
private int seqChildEnd(Regex re, int start, int end, int i) {
|
||||
result = seqChild(re, start, end, i).getEnd()
|
||||
}
|
||||
|
||||
// moved out so we can use it in the charpred
|
||||
private RegExpTerm seqChild(Regex re, int start, int end, int i) {
|
||||
re.sequence(start, end) and
|
||||
(
|
||||
i = 0 and
|
||||
result.getRegex() = re and
|
||||
result.getStart() = start and
|
||||
exists(int itemEnd |
|
||||
re.item(start, itemEnd) and
|
||||
result.getEnd() = itemEnd
|
||||
)
|
||||
or
|
||||
i > 0 and
|
||||
result.getRegex() = re and
|
||||
exists(int itemStart | itemStart = seqChildEnd(re, start, end, i - 1) |
|
||||
result.getStart() = itemStart and
|
||||
re.item(itemStart, result.getEnd())
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* An alternative term, that is, a term of the form `a|b`.
|
||||
*
|
||||
* Example:
|
||||
*
|
||||
* ```
|
||||
* ECMA|Java
|
||||
* ```
|
||||
*/
|
||||
class RegExpAlt extends RegExpTerm, TRegExpAlt {
|
||||
RegExpAlt() { this = TRegExpAlt(re, start, end) }
|
||||
|
||||
override RegExpTerm getChild(int i) {
|
||||
i = 0 and
|
||||
result.getRegex() = re and
|
||||
result.getStart() = start and
|
||||
exists(int part_end |
|
||||
re.alternationOption(start, end, start, part_end) and
|
||||
result.getEnd() = part_end
|
||||
)
|
||||
or
|
||||
i > 0 and
|
||||
result.getRegex() = re and
|
||||
exists(int part_start |
|
||||
part_start = this.getChild(i - 1).getEnd() + 1 // allow for the |
|
||||
|
|
||||
result.getStart() = part_start and
|
||||
re.alternationOption(start, end, part_start, result.getEnd())
|
||||
)
|
||||
}
|
||||
|
||||
override string getPrimaryQLClass() { result = "RegExpAlt" }
|
||||
}
|
||||
|
||||
/**
|
||||
* An escaped regular expression term, that is, a regular expression
|
||||
* term starting with a backslash, which is not a backreference.
|
||||
*
|
||||
* Example:
|
||||
*
|
||||
* ```
|
||||
* \.
|
||||
* \w
|
||||
* ```
|
||||
*/
|
||||
class RegExpEscape extends RegExpNormalChar {
|
||||
RegExpEscape() { re.escapedCharacter(start, end) }
|
||||
|
||||
/**
|
||||
* Gets the name of the escaped; for example, `w` for `\w`.
|
||||
* TODO: Handle named escapes.
|
||||
*/
|
||||
override string getValue() {
|
||||
this.isIdentityEscape() and result = this.getUnescaped()
|
||||
or
|
||||
this.getUnescaped() = "n" and result = "\n"
|
||||
or
|
||||
this.getUnescaped() = "r" and result = "\r"
|
||||
or
|
||||
this.getUnescaped() = "t" and result = "\t"
|
||||
or
|
||||
// TODO: Find a way to include a formfeed character
|
||||
// this.getUnescaped() = "f" and result = ""
|
||||
// or
|
||||
this.isUnicode() and
|
||||
result = this.getUnicode()
|
||||
}
|
||||
|
||||
/** Holds if this terms name is given by the part following the escape character. */
|
||||
predicate isIdentityEscape() { not this.getUnescaped() in ["n", "r", "t", "f"] }
|
||||
|
||||
override string getPrimaryQLClass() { result = "RegExpEscape" }
|
||||
|
||||
/** Gets the part of the term following the escape character. That is e.g. "w" if the term is "\w". */
|
||||
private string getUnescaped() { result = this.getText().suffix(1) }
|
||||
|
||||
/**
|
||||
* Gets the text for this escape. That is e.g. "\w".
|
||||
*/
|
||||
private string getText() { result = re.getText().substring(start, end) }
|
||||
|
||||
/**
|
||||
* Holds if this is a unicode escape.
|
||||
*/
|
||||
private predicate isUnicode() { this.getText().prefix(2) = ["\\u", "\\U"] }
|
||||
|
||||
/**
|
||||
* Gets the unicode char for this escape.
|
||||
* E.g. for `\u0061` this returns "a".
|
||||
*/
|
||||
private string getUnicode() {
|
||||
exists(int codepoint | codepoint = sum(this.getHexValueFromUnicode(_)) |
|
||||
result = codepoint.toUnicode()
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets int value for the `index`th char in the hex number of the unicode escape.
|
||||
* E.g. for `\u0061` and `index = 2` this returns 96 (the number `6` interpreted as hex).
|
||||
*/
|
||||
private int getHexValueFromUnicode(int index) {
|
||||
this.isUnicode() and
|
||||
exists(string hex, string char | hex = this.getText().suffix(2) |
|
||||
char = hex.charAt(index) and
|
||||
result = 16.pow(hex.length() - index - 1) * toHex(char)
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the hex number for the `hex` char.
|
||||
*/
|
||||
private int toHex(string hex) {
|
||||
hex = [0 .. 9].toString() and
|
||||
result = hex.toInt()
|
||||
or
|
||||
result = 10 and hex = ["a", "A"]
|
||||
or
|
||||
result = 11 and hex = ["b", "B"]
|
||||
or
|
||||
result = 12 and hex = ["c", "C"]
|
||||
or
|
||||
result = 13 and hex = ["d", "D"]
|
||||
or
|
||||
result = 14 and hex = ["e", "E"]
|
||||
or
|
||||
result = 15 and hex = ["f", "F"]
|
||||
}
|
||||
|
||||
/**
|
||||
* A character class escape in a regular expression.
|
||||
* That is, an escaped charachter that denotes multiple characters.
|
||||
*
|
||||
* Examples:
|
||||
*
|
||||
* ```
|
||||
* \w
|
||||
* \S
|
||||
* ```
|
||||
*/
|
||||
class RegExpCharacterClassEscape extends RegExpEscape {
|
||||
RegExpCharacterClassEscape() { this.getValue() in ["d", "D", "s", "S", "w", "W"] }
|
||||
|
||||
override RegExpTerm getChild(int i) { none() }
|
||||
|
||||
override string getPrimaryQLClass() { result = "RegExpCharacterClassEscape" }
|
||||
}
|
||||
|
||||
/**
|
||||
* A character class in a regular expression.
|
||||
*
|
||||
* Examples:
|
||||
*
|
||||
* ```
|
||||
* [a-z_]
|
||||
* [^<>&]
|
||||
* ```
|
||||
*/
|
||||
class RegExpCharacterClass extends RegExpTerm, TRegExpCharacterClass {
|
||||
RegExpCharacterClass() { this = TRegExpCharacterClass(re, start, end) }
|
||||
|
||||
/** Holds if this character class is inverted, matching the opposite of its content. */
|
||||
predicate isInverted() { re.getChar(start + 1) = "^" }
|
||||
|
||||
/** Gets the `i`th char inside this charater class. */
|
||||
string getCharThing(int i) { result = re.getChar(i + start) }
|
||||
|
||||
/** Holds if this character class can match anything. */
|
||||
predicate isUniversalClass() {
|
||||
// [^]
|
||||
this.isInverted() and not exists(this.getAChild())
|
||||
or
|
||||
// [\w\W] and similar
|
||||
not this.isInverted() and
|
||||
exists(string cce1, string cce2 |
|
||||
cce1 = this.getAChild().(RegExpCharacterClassEscape).getValue() and
|
||||
cce2 = this.getAChild().(RegExpCharacterClassEscape).getValue()
|
||||
|
|
||||
cce1 != cce2 and cce1.toLowerCase() = cce2.toLowerCase()
|
||||
)
|
||||
}
|
||||
|
||||
override RegExpTerm getChild(int i) {
|
||||
i = 0 and
|
||||
result.getRegex() = re and
|
||||
exists(int itemStart, int itemEnd |
|
||||
result.getStart() = itemStart and
|
||||
re.char_set_start(start, itemStart) and
|
||||
re.char_set_child(start, itemStart, itemEnd) and
|
||||
result.getEnd() = itemEnd
|
||||
)
|
||||
or
|
||||
i > 0 and
|
||||
result.getRegex() = re and
|
||||
exists(int itemStart | itemStart = this.getChild(i - 1).getEnd() |
|
||||
result.getStart() = itemStart and
|
||||
re.char_set_child(start, itemStart, result.getEnd())
|
||||
)
|
||||
}
|
||||
|
||||
override string getPrimaryQLClass() { result = "RegExpCharacterClass" }
|
||||
}
|
||||
|
||||
/**
|
||||
* A character range in a character class in a regular expression.
|
||||
*
|
||||
* Example:
|
||||
*
|
||||
* ```
|
||||
* a-z
|
||||
* ```
|
||||
*/
|
||||
class RegExpCharacterRange extends RegExpTerm, TRegExpCharacterRange {
|
||||
int lower_end;
|
||||
int upper_start;
|
||||
|
||||
RegExpCharacterRange() {
|
||||
this = TRegExpCharacterRange(re, start, end) and
|
||||
re.charRange(_, start, lower_end, upper_start, end)
|
||||
}
|
||||
|
||||
/** Holds if this range goes from `lo` to `hi`, in effect is `lo-hi`. */
|
||||
predicate isRange(string lo, string hi) {
|
||||
lo = re.getText().substring(start, lower_end) and
|
||||
hi = re.getText().substring(upper_start, end)
|
||||
}
|
||||
|
||||
override RegExpTerm getChild(int i) {
|
||||
i = 0 and
|
||||
result.getRegex() = re and
|
||||
result.getStart() = start and
|
||||
result.getEnd() = lower_end
|
||||
or
|
||||
i = 1 and
|
||||
result.getRegex() = re and
|
||||
result.getStart() = upper_start and
|
||||
result.getEnd() = end
|
||||
}
|
||||
|
||||
override string getPrimaryQLClass() { result = "RegExpCharacterRange" }
|
||||
}
|
||||
|
||||
/**
|
||||
* A normal character in a regular expression, that is, a character
|
||||
* without special meaning. This includes escaped characters.
|
||||
*
|
||||
* Examples:
|
||||
* ```
|
||||
* t
|
||||
* \t
|
||||
* ```
|
||||
*/
|
||||
class RegExpNormalChar extends RegExpTerm, TRegExpNormalChar {
|
||||
RegExpNormalChar() { this = TRegExpNormalChar(re, start, end) }
|
||||
|
||||
/**
|
||||
* Holds if this constant represents a valid Unicode character (as opposed
|
||||
* to a surrogate code point that does not correspond to a character by itself.)
|
||||
*/
|
||||
predicate isCharacter() { any() }
|
||||
|
||||
/** Gets the string representation of the char matched by this term. */
|
||||
string getValue() { result = re.getText().substring(start, end) }
|
||||
|
||||
override RegExpTerm getChild(int i) { none() }
|
||||
|
||||
override string getPrimaryQLClass() { result = "RegExpNormalChar" }
|
||||
}
|
||||
|
||||
/**
|
||||
* A constant regular expression term, that is, a regular expression
|
||||
* term matching a single string. Currently, this will always be a single character.
|
||||
*
|
||||
* Example:
|
||||
*
|
||||
* ```
|
||||
* a
|
||||
* ```
|
||||
*/
|
||||
class RegExpConstant extends RegExpTerm {
|
||||
string value;
|
||||
|
||||
RegExpConstant() {
|
||||
this = TRegExpNormalChar(re, start, end) and
|
||||
not this instanceof RegExpCharacterClassEscape and
|
||||
// exclude chars in qualifiers
|
||||
// TODO: push this into regex library
|
||||
not exists(int qstart, int qend | re.qualifiedPart(_, qstart, qend, _, _) |
|
||||
qstart <= start and end <= qend
|
||||
) and
|
||||
value = this.(RegExpNormalChar).getValue()
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if this constant represents a valid Unicode character (as opposed
|
||||
* to a surrogate code point that does not correspond to a character by itself.)
|
||||
*/
|
||||
predicate isCharacter() { any() }
|
||||
|
||||
/** Gets the string matched by this constant term. */
|
||||
string getValue() { result = value }
|
||||
|
||||
override RegExpTerm getChild(int i) { none() }
|
||||
|
||||
override string getPrimaryQLClass() { result = "RegExpConstant" }
|
||||
}
|
||||
|
||||
/**
|
||||
* A grouped regular expression.
|
||||
*
|
||||
* Examples:
|
||||
*
|
||||
* ```
|
||||
* (ECMA|Java)
|
||||
* (?:ECMA|Java)
|
||||
* (?<quote>['"])
|
||||
* ```
|
||||
*/
|
||||
class RegExpGroup extends RegExpTerm, TRegExpGroup {
|
||||
RegExpGroup() { this = TRegExpGroup(re, start, end) }
|
||||
|
||||
/**
|
||||
* Gets the index of this capture group within the enclosing regular
|
||||
* expression literal.
|
||||
*
|
||||
* For example, in the regular expression `/((a?).)(?:b)/`, the
|
||||
* group `((a?).)` has index 1, the group `(a?)` nested inside it
|
||||
* has index 2, and the group `(?:b)` has no index, since it is
|
||||
* not a capture group.
|
||||
*/
|
||||
int getNumber() { result = re.getGroupNumber(start, end) }
|
||||
|
||||
/** Holds if this is a named capture group. */
|
||||
predicate isNamed() { exists(this.getName()) }
|
||||
|
||||
/** Gets the name of this capture group, if any. */
|
||||
string getName() { result = re.getGroupName(start, end) }
|
||||
|
||||
override RegExpTerm getChild(int i) {
|
||||
result.getRegex() = re and
|
||||
i = 0 and
|
||||
re.groupContents(start, end, result.getStart(), result.getEnd())
|
||||
}
|
||||
|
||||
override string getPrimaryQLClass() { result = "RegExpGroup" }
|
||||
}
|
||||
|
||||
/**
|
||||
* A special character in a regular expression.
|
||||
*
|
||||
* Examples:
|
||||
* ```
|
||||
* ^
|
||||
* $
|
||||
* .
|
||||
* ```
|
||||
*/
|
||||
class RegExpSpecialChar extends RegExpTerm, TRegExpSpecialChar {
|
||||
string char;
|
||||
|
||||
RegExpSpecialChar() {
|
||||
this = TRegExpSpecialChar(re, start, end) and
|
||||
re.specialCharacter(start, end, char)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if this constant represents a valid Unicode character (as opposed
|
||||
* to a surrogate code point that does not correspond to a character by itself.)
|
||||
*/
|
||||
predicate isCharacter() { any() }
|
||||
|
||||
/** Gets the char for this term. */
|
||||
string getChar() { result = char }
|
||||
|
||||
override RegExpTerm getChild(int i) { none() }
|
||||
|
||||
override string getPrimaryQLClass() { result = "RegExpSpecialChar" }
|
||||
}
|
||||
|
||||
/**
|
||||
* A dot regular expression.
|
||||
*
|
||||
* Example:
|
||||
*
|
||||
* ```
|
||||
* .
|
||||
* ```
|
||||
*/
|
||||
class RegExpDot extends RegExpSpecialChar {
|
||||
RegExpDot() { this.getChar() = "." }
|
||||
|
||||
override string getPrimaryQLClass() { result = "RegExpDot" }
|
||||
}
|
||||
|
||||
/**
|
||||
* A dollar assertion `$` matching the end of a line.
|
||||
*
|
||||
* Example:
|
||||
*
|
||||
* ```
|
||||
* $
|
||||
* ```
|
||||
*/
|
||||
class RegExpDollar extends RegExpSpecialChar {
|
||||
RegExpDollar() { this.getChar() = "$" }
|
||||
|
||||
override string getPrimaryQLClass() { result = "RegExpDollar" }
|
||||
}
|
||||
|
||||
/**
|
||||
* A caret assertion `^` matching the beginning of a line.
|
||||
*
|
||||
* Example:
|
||||
*
|
||||
* ```
|
||||
* ^
|
||||
* ```
|
||||
*/
|
||||
class RegExpCaret extends RegExpSpecialChar {
|
||||
RegExpCaret() { this.getChar() = "^" }
|
||||
|
||||
override string getPrimaryQLClass() { result = "RegExpCaret" }
|
||||
}
|
||||
|
||||
/**
|
||||
* A zero-width match, that is, either an empty group or an assertion.
|
||||
*
|
||||
* Examples:
|
||||
* ```
|
||||
* ()
|
||||
* (?=\w)
|
||||
* ```
|
||||
*/
|
||||
class RegExpZeroWidthMatch extends RegExpGroup {
|
||||
RegExpZeroWidthMatch() { re.zeroWidthMatch(start, end) }
|
||||
|
||||
override RegExpTerm getChild(int i) { none() }
|
||||
|
||||
override string getPrimaryQLClass() { result = "RegExpZeroWidthMatch" }
|
||||
}
|
||||
|
||||
/**
|
||||
* A zero-width lookahead or lookbehind assertion.
|
||||
*
|
||||
* Examples:
|
||||
*
|
||||
* ```
|
||||
* (?=\w)
|
||||
* (?!\n)
|
||||
* (?<=\.)
|
||||
* (?<!\\)
|
||||
* ```
|
||||
*/
|
||||
class RegExpSubPattern extends RegExpZeroWidthMatch {
|
||||
RegExpSubPattern() { not re.emptyGroup(start, end) }
|
||||
|
||||
/** Gets the lookahead term. */
|
||||
RegExpTerm getOperand() {
|
||||
exists(int in_start, int in_end | re.groupContents(start, end, in_start, in_end) |
|
||||
result.getRegex() = re and
|
||||
result.getStart() = in_start and
|
||||
result.getEnd() = in_end
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A zero-width lookahead assertion.
|
||||
*
|
||||
* Examples:
|
||||
*
|
||||
* ```
|
||||
* (?=\w)
|
||||
* (?!\n)
|
||||
* ```
|
||||
*/
|
||||
abstract class RegExpLookahead extends RegExpSubPattern { }
|
||||
|
||||
/**
|
||||
* A positive-lookahead assertion.
|
||||
*
|
||||
* Examples:
|
||||
*
|
||||
* ```
|
||||
* (?=\w)
|
||||
* ```
|
||||
*/
|
||||
class RegExpPositiveLookahead extends RegExpLookahead {
|
||||
RegExpPositiveLookahead() { re.positiveLookaheadAssertionGroup(start, end) }
|
||||
|
||||
override string getPrimaryQLClass() { result = "RegExpPositiveLookahead" }
|
||||
}
|
||||
|
||||
/**
|
||||
* A negative-lookahead assertion.
|
||||
*
|
||||
* Examples:
|
||||
*
|
||||
* ```
|
||||
* (?!\n)
|
||||
* ```
|
||||
*/
|
||||
class RegExpNegativeLookahead extends RegExpLookahead {
|
||||
RegExpNegativeLookahead() { re.negativeLookaheadAssertionGroup(start, end) }
|
||||
|
||||
override string getPrimaryQLClass() { result = "RegExpNegativeLookahead" }
|
||||
}
|
||||
|
||||
/**
|
||||
* A zero-width lookbehind assertion.
|
||||
*
|
||||
* Examples:
|
||||
*
|
||||
* ```
|
||||
* (?<=\.)
|
||||
* (?<!\\)
|
||||
* ```
|
||||
*/
|
||||
abstract class RegExpLookbehind extends RegExpSubPattern { }
|
||||
|
||||
/**
|
||||
* A positive-lookbehind assertion.
|
||||
*
|
||||
* Examples:
|
||||
*
|
||||
* ```
|
||||
* (?<=\.)
|
||||
* ```
|
||||
*/
|
||||
class RegExpPositiveLookbehind extends RegExpLookbehind {
|
||||
RegExpPositiveLookbehind() { re.positiveLookbehindAssertionGroup(start, end) }
|
||||
|
||||
override string getPrimaryQLClass() { result = "RegExpPositiveLookbehind" }
|
||||
}
|
||||
|
||||
/**
|
||||
* A negative-lookbehind assertion.
|
||||
*
|
||||
* Examples:
|
||||
*
|
||||
* ```
|
||||
* (?<!\\)
|
||||
* ```
|
||||
*/
|
||||
class RegExpNegativeLookbehind extends RegExpLookbehind {
|
||||
RegExpNegativeLookbehind() { re.negativeLookbehindAssertionGroup(start, end) }
|
||||
|
||||
override string getPrimaryQLClass() { result = "RegExpNegativeLookbehind" }
|
||||
}
|
||||
|
||||
/**
|
||||
* A back reference, that is, a term of the form `\i` or `\k<name>`
|
||||
* in a regular expression.
|
||||
*
|
||||
* Examples:
|
||||
*
|
||||
* ```
|
||||
* \1
|
||||
* (?P=quote)
|
||||
* ```
|
||||
*/
|
||||
class RegExpBackRef extends RegExpTerm, TRegExpBackRef {
|
||||
RegExpBackRef() { this = TRegExpBackRef(re, start, end) }
|
||||
|
||||
/**
|
||||
* Gets the number of the capture group this back reference refers to, if any.
|
||||
*/
|
||||
int getNumber() { result = re.getBackrefNumber(start, end) }
|
||||
|
||||
/**
|
||||
* Gets the name of the capture group this back reference refers to, if any.
|
||||
*/
|
||||
string getName() { result = re.getBackrefName(start, end) }
|
||||
|
||||
/** Gets the capture group this back reference refers to. */
|
||||
RegExpGroup getGroup() {
|
||||
result.getLiteral() = this.getLiteral() and
|
||||
(
|
||||
result.getNumber() = this.getNumber() or
|
||||
result.getName() = this.getName()
|
||||
)
|
||||
}
|
||||
|
||||
override RegExpTerm getChild(int i) { none() }
|
||||
|
||||
override string getPrimaryQLClass() { result = "RegExpBackRef" }
|
||||
}
|
||||
|
||||
/** Gets the parse tree resulting from parsing `re`, if such has been constructed. */
|
||||
RegExpTerm getParsedRegExp(StrConst re) { result.getRegex() = re and result.isRootTerm() }
|
||||
@@ -153,6 +153,12 @@ class ExceptStmt extends ExceptStmt_ {
|
||||
override Stmt getASubStatement() { result = this.getAStmt() }
|
||||
|
||||
override Stmt getLastStatement() { result = this.getBody().getLastItem().getLastStatement() }
|
||||
|
||||
override Expr getType() {
|
||||
result = super.getType() and not result instanceof Tuple
|
||||
or
|
||||
result = super.getType().(Tuple).getAnElt()
|
||||
}
|
||||
}
|
||||
|
||||
/** An assert statement, such as `assert a == b, "A is not equal to b"` */
|
||||
@@ -15,68 +15,35 @@
|
||||
*/
|
||||
private module AlgorithmNames {
|
||||
predicate isStrongHashingAlgorithm(string name) {
|
||||
name = "DSA" or
|
||||
name = "ED25519" or
|
||||
name = "ES256" or
|
||||
name = "ECDSA256" or
|
||||
name = "ES384" or
|
||||
name = "ECDSA384" or
|
||||
name = "ES512" or
|
||||
name = "ECDSA512" or
|
||||
name = "SHA2" or
|
||||
name = "SHA224" or
|
||||
name = "SHA256" or
|
||||
name = "SHA384" or
|
||||
name = "SHA512" or
|
||||
name = "SHA3"
|
||||
name =
|
||||
[
|
||||
"DSA", "ED25519", "ES256", "ECDSA256", "ES384", "ECDSA384", "ES512", "ECDSA512", "SHA2",
|
||||
"SHA224", "SHA256", "SHA384", "SHA512", "SHA3", "SHA3224", "SHA3256", "SHA3384", "SHA3512"
|
||||
]
|
||||
}
|
||||
|
||||
predicate isWeakHashingAlgorithm(string name) {
|
||||
name = "HAVEL128" or
|
||||
name = "MD2" or
|
||||
name = "MD4" or
|
||||
name = "MD5" or
|
||||
name = "PANAMA" or
|
||||
name = "RIPEMD" or
|
||||
name = "RIPEMD128" or
|
||||
name = "RIPEMD256" or
|
||||
name = "RIPEMD160" or
|
||||
name = "RIPEMD320" or
|
||||
name = "SHA0" or
|
||||
name = "SHA1"
|
||||
name =
|
||||
[
|
||||
"HAVEL128", "MD2", "MD4", "MD5", "PANAMA", "RIPEMD", "RIPEMD128", "RIPEMD256", "RIPEMD160",
|
||||
"RIPEMD320", "SHA0", "SHA1"
|
||||
]
|
||||
}
|
||||
|
||||
predicate isStrongEncryptionAlgorithm(string name) {
|
||||
name = "AES" or
|
||||
name = "AES128" or
|
||||
name = "AES192" or
|
||||
name = "AES256" or
|
||||
name = "AES512" or
|
||||
name = "RSA" or
|
||||
name = "RABBIT" or
|
||||
name = "BLOWFISH"
|
||||
name = ["AES", "AES128", "AES192", "AES256", "AES512", "RSA", "RABBIT", "BLOWFISH"]
|
||||
}
|
||||
|
||||
predicate isWeakEncryptionAlgorithm(string name) {
|
||||
name = "DES" or
|
||||
name = "3DES" or
|
||||
name = "TRIPLEDES" or
|
||||
name = "TDEA" or
|
||||
name = "TRIPLEDEA" or
|
||||
name = "ARC2" or
|
||||
name = "RC2" or
|
||||
name = "ARC4" or
|
||||
name = "RC4" or
|
||||
name = "ARCFOUR" or
|
||||
name = "ARC5" or
|
||||
name = "RC5"
|
||||
name =
|
||||
[
|
||||
"DES", "3DES", "TRIPLEDES", "TDEA", "TRIPLEDEA", "ARC2", "RC2", "ARC4", "RC4", "ARCFOUR",
|
||||
"ARC5", "RC5"
|
||||
]
|
||||
}
|
||||
|
||||
predicate isStrongPasswordHashingAlgorithm(string name) {
|
||||
name = "ARGON2" or
|
||||
name = "PBKDF2" or
|
||||
name = "BCRYPT" or
|
||||
name = "SCRYPT"
|
||||
name = ["ARGON2", "PBKDF2", "BCRYPT", "SCRYPT"]
|
||||
}
|
||||
|
||||
predicate isWeakPasswordHashingAlgorithm(string name) { none() }
|
||||
@@ -55,12 +55,12 @@ private module SensitiveDataModeling {
|
||||
* Gets a reference to a function that is considered to be a sensitive source of
|
||||
* `classification`.
|
||||
*/
|
||||
private DataFlow::LocalSourceNode sensitiveFunction(
|
||||
private DataFlow::TypeTrackingNode sensitiveFunction(
|
||||
DataFlow::TypeTracker t, SensitiveDataClassification classification
|
||||
) {
|
||||
t.start() and
|
||||
exists(Function f |
|
||||
nameIndicatesSensitiveData(f.getName(), classification) and
|
||||
f.getName() = sensitiveString(classification) and
|
||||
result.asExpr() = f.getDefinition()
|
||||
)
|
||||
or
|
||||
@@ -76,28 +76,16 @@ private module SensitiveDataModeling {
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a reference to a string constant that, if used as the key in a lookup,
|
||||
* indicates the presence of sensitive data with `classification`.
|
||||
*/
|
||||
private DataFlow::LocalSourceNode sensitiveLookupStringConst(
|
||||
DataFlow::TypeTracker t, SensitiveDataClassification classification
|
||||
) {
|
||||
t.start() and
|
||||
nameIndicatesSensitiveData(result.asExpr().(StrConst).getText(), classification)
|
||||
or
|
||||
exists(DataFlow::TypeTracker t2 |
|
||||
result = sensitiveLookupStringConst(t2, classification).track(t2, t)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a reference to a string constant that, if used as the key in a lookup,
|
||||
* indicates the presence of sensitive data with `classification`.
|
||||
*
|
||||
* Also see `extraStepForCalls`.
|
||||
* Gets a reference (in local scope) to a string constant that, if used as the key in
|
||||
* a lookup, indicates the presence of sensitive data with `classification`.
|
||||
*/
|
||||
DataFlow::Node sensitiveLookupStringConst(SensitiveDataClassification classification) {
|
||||
sensitiveLookupStringConst(DataFlow::TypeTracker::end(), classification).flowsTo(result)
|
||||
// Note: If this is implemented with type-tracking, we will get cross-talk as
|
||||
// illustrated in python/ql/test/experimental/dataflow/sensitive-data/test.py
|
||||
exists(DataFlow::LocalSourceNode source |
|
||||
source.asExpr().(StrConst).getText() = sensitiveString(classification) and
|
||||
source.flowsTo(result)
|
||||
)
|
||||
}
|
||||
|
||||
/** A function call that is considered a source of sensitive data. */
|
||||
@@ -109,7 +97,7 @@ private module SensitiveDataModeling {
|
||||
or
|
||||
// to cover functions that we don't have the definition for, and where the
|
||||
// reference to the function has not already been marked as being sensitive
|
||||
nameIndicatesSensitiveData(this.getFunction().asCfgNode().(NameNode).getId(), classification)
|
||||
this.getFunction().asCfgNode().(NameNode).getId() = sensitiveString(classification)
|
||||
}
|
||||
|
||||
override SensitiveDataClassification getClassification() { result = classification }
|
||||
@@ -118,8 +106,10 @@ private module SensitiveDataModeling {
|
||||
/**
|
||||
* Tracks any modeled source of sensitive data (with any classification),
|
||||
* to limit the scope of `extraStepForCalls`. See it's QLDoc for more context.
|
||||
*
|
||||
* Also see `extraStepForCalls`.
|
||||
*/
|
||||
private DataFlow::LocalSourceNode possibleSensitiveCallable(DataFlow::TypeTracker t) {
|
||||
private DataFlow::TypeTrackingNode possibleSensitiveCallable(DataFlow::TypeTracker t) {
|
||||
t.start() and
|
||||
result instanceof SensitiveDataSource
|
||||
or
|
||||
@@ -129,6 +119,8 @@ private module SensitiveDataModeling {
|
||||
/**
|
||||
* Tracks any modeled source of sensitive data (with any classification),
|
||||
* to limit the scope of `extraStepForCalls`. See it's QLDoc for more context.
|
||||
*
|
||||
* Also see `extraStepForCalls`.
|
||||
*/
|
||||
private DataFlow::Node possibleSensitiveCallable() {
|
||||
possibleSensitiveCallable(DataFlow::TypeTracker::end()).flowsTo(result)
|
||||
@@ -172,6 +164,68 @@ private module SensitiveDataModeling {
|
||||
nodeFrom = possibleSensitiveCallable()
|
||||
}
|
||||
|
||||
pragma[nomagic]
|
||||
private string sensitiveStrConstCandidate() {
|
||||
result = any(StrConst s | not s.isDocString()).getText() and
|
||||
not result.regexpMatch(notSensitiveRegexp())
|
||||
}
|
||||
|
||||
pragma[nomagic]
|
||||
private string sensitiveAttributeNameCandidate() {
|
||||
result = any(DataFlow::AttrRead a).getAttributeName() and
|
||||
not result.regexpMatch(notSensitiveRegexp())
|
||||
}
|
||||
|
||||
pragma[nomagic]
|
||||
private string sensitiveParameterNameCandidate() {
|
||||
result = any(Parameter p).getName() and
|
||||
not result.regexpMatch(notSensitiveRegexp())
|
||||
}
|
||||
|
||||
pragma[nomagic]
|
||||
private string sensitiveFunctionNameCandidate() {
|
||||
result = any(Function f).getName() and
|
||||
not result.regexpMatch(notSensitiveRegexp())
|
||||
}
|
||||
|
||||
pragma[nomagic]
|
||||
private string sensitiveNameCandidate() {
|
||||
result = any(Name n).getId() and
|
||||
not result.regexpMatch(notSensitiveRegexp())
|
||||
}
|
||||
|
||||
/**
|
||||
* This helper predicate serves to deduplicate the results of the preceding predicates. This
|
||||
* means that if, say, an attribute and a function parameter have the same name, then that name will
|
||||
* only be matched once, which greatly cuts down on the number of regexp matches that have to be
|
||||
* performed.
|
||||
*
|
||||
* Under normal circumstances, deduplication is only performed when a predicate is materialized, and
|
||||
* so to see the effect of this we must create a separate predicate that calculates the union of the
|
||||
* preceding predicates.
|
||||
*/
|
||||
pragma[nomagic]
|
||||
private string sensitiveStringCandidate() {
|
||||
result in [
|
||||
sensitiveNameCandidate(), sensitiveAttributeNameCandidate(),
|
||||
sensitiveParameterNameCandidate(), sensitiveFunctionNameCandidate(),
|
||||
sensitiveStrConstCandidate()
|
||||
]
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns strings (primarily the names of various program entities) that may contain sensitive data
|
||||
* with the classification `classification`.
|
||||
*
|
||||
* This helper predicate ends up being very similar to `nameIndicatesSensitiveData`,
|
||||
* but is performance optimized to limit the number of regexp matches that have to be performed.
|
||||
*/
|
||||
pragma[nomagic]
|
||||
private string sensitiveString(SensitiveDataClassification classification) {
|
||||
result = sensitiveStringCandidate() and
|
||||
result.regexpMatch(maybeSensitiveRegexp(classification))
|
||||
}
|
||||
|
||||
/**
|
||||
* Any kind of variable assignment (also including with/for) where the name indicates
|
||||
* it contains sensitive data.
|
||||
@@ -190,7 +244,7 @@ private module SensitiveDataModeling {
|
||||
|
||||
SensitiveVariableAssignment() {
|
||||
exists(DefinitionNode def |
|
||||
nameIndicatesSensitiveData(def.(NameNode).getId(), classification) and
|
||||
def.(NameNode).getId() = sensitiveString(classification) and
|
||||
(
|
||||
this.asCfgNode() = def.getValue()
|
||||
or
|
||||
@@ -201,7 +255,7 @@ private module SensitiveDataModeling {
|
||||
)
|
||||
or
|
||||
exists(With with |
|
||||
nameIndicatesSensitiveData(with.getOptionalVars().(Name).getId(), classification) and
|
||||
with.getOptionalVars().(Name).getId() = sensitiveString(classification) and
|
||||
this.asExpr() = with.getContextExpr()
|
||||
)
|
||||
}
|
||||
@@ -217,7 +271,7 @@ private module SensitiveDataModeling {
|
||||
// Things like `foo.<sensitive-name>` or `from <module> import <sensitive-name>`
|
||||
// I considered excluding any `from ... import something_sensitive`, but then realized that
|
||||
// we should flag up `form ... import password as ...` as a password
|
||||
nameIndicatesSensitiveData(this.(DataFlow::AttrRead).getAttributeName(), classification)
|
||||
this.(DataFlow::AttrRead).getAttributeName() = sensitiveString(classification)
|
||||
or
|
||||
// Things like `getattr(foo, <reference-to-string>)`
|
||||
this.(DataFlow::AttrRead).getAttributeNameExpr() = sensitiveLookupStringConst(classification)
|
||||
@@ -243,7 +297,7 @@ private module SensitiveDataModeling {
|
||||
SensitiveDataClassification classification;
|
||||
|
||||
SensitiveGetCall() {
|
||||
this.getFunction().asCfgNode().(AttrNode).getName() = "get" and
|
||||
this.getFunction().(DataFlow::AttrRef).getAttributeName() = "get" and
|
||||
this.getArg(0) = sensitiveLookupStringConst(classification)
|
||||
}
|
||||
|
||||
@@ -254,9 +308,7 @@ private module SensitiveDataModeling {
|
||||
class SensitiveParameter extends SensitiveDataSource::Range, DataFlow::ParameterNode {
|
||||
SensitiveDataClassification classification;
|
||||
|
||||
SensitiveParameter() {
|
||||
nameIndicatesSensitiveData(this.getParameter().getName(), classification)
|
||||
}
|
||||
SensitiveParameter() { this.getParameter().getName() = sensitiveString(classification) }
|
||||
|
||||
override SensitiveDataClassification getClassification() { result = classification }
|
||||
}
|
||||
@@ -23,7 +23,7 @@ class OptionalAttributeName = Internal::OptionalContentName;
|
||||
* It is recommended that all uses of this type are written in the following form,
|
||||
* for tracking some type `myType`:
|
||||
* ```ql
|
||||
* DataFlow::LocalSourceNode myType(DataFlow::TypeTracker t) {
|
||||
* DataFlow::TypeTrackingNode myType(DataFlow::TypeTracker t) {
|
||||
* t.start() and
|
||||
* result = < source of myType >
|
||||
* or
|
||||
@@ -17,6 +17,13 @@ abstract class AttrRef extends Node {
|
||||
*/
|
||||
abstract Node getObject();
|
||||
|
||||
/**
|
||||
* Holds if this data flow node accesses attribute named `attrName` on object `object`.
|
||||
*/
|
||||
predicate accesses(Node object, string attrName) {
|
||||
this.getObject() = object and this.getAttributeName() = attrName
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the expression node that defines the attribute being accessed, if any. This is
|
||||
* usually an identifier or literal.
|
||||
@@ -191,7 +198,7 @@ private class ClassDefinitionAsAttrWrite extends AttrWrite, CfgNode {
|
||||
* - Dynamic attribute reads using `getattr`: `getattr(object, attr)`
|
||||
* - Qualified imports: `from module import attr as name`
|
||||
*/
|
||||
abstract class AttrRead extends AttrRef, Node { }
|
||||
abstract class AttrRead extends AttrRef, Node, LocalSourceNode { }
|
||||
|
||||
/** A simple attribute read, e.g. `object.attr` */
|
||||
private class AttributeReadAsAttrRead extends AttrRead, CfgNode {
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -724,7 +724,6 @@ private module Cached {
|
||||
Node node1, Content c, Node node2, DataFlowType contentType, DataFlowType containerType
|
||||
) {
|
||||
storeStep(node1, c, node2) and
|
||||
read(_, c, _) and
|
||||
contentType = getNodeDataFlowType(node1) and
|
||||
containerType = getNodeDataFlowType(node2)
|
||||
or
|
||||
@@ -787,16 +786,24 @@ private module Cached {
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if the call context `call` either improves virtual dispatch in
|
||||
* `callable` or if it allows us to prune unreachable nodes in `callable`.
|
||||
* Holds if the call context `call` improves virtual dispatch in `callable`.
|
||||
*/
|
||||
cached
|
||||
predicate recordDataFlowCallSite(DataFlowCall call, DataFlowCallable callable) {
|
||||
predicate recordDataFlowCallSiteDispatch(DataFlowCall call, DataFlowCallable callable) {
|
||||
reducedViableImplInCallContext(_, callable, call)
|
||||
or
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if the call context `call` allows us to prune unreachable nodes in `callable`.
|
||||
*/
|
||||
cached
|
||||
predicate recordDataFlowCallSiteUnreachable(DataFlowCall call, DataFlowCallable callable) {
|
||||
exists(Node n | getNodeEnclosingCallable(n) = callable | isUnreachableInCallCached(n, call))
|
||||
}
|
||||
|
||||
cached
|
||||
predicate allowParameterReturnInSelfCached(ParamNode p) { allowParameterReturnInSelf(p) }
|
||||
|
||||
cached
|
||||
newtype TCallContext =
|
||||
TAnyCallContext() or
|
||||
@@ -847,6 +854,15 @@ private module Cached {
|
||||
TAccessPathFrontSome(AccessPathFront apf)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if the call context `call` either improves virtual dispatch in
|
||||
* `callable` or if it allows us to prune unreachable nodes in `callable`.
|
||||
*/
|
||||
predicate recordDataFlowCallSite(DataFlowCall call, DataFlowCallable callable) {
|
||||
recordDataFlowCallSiteDispatch(call, callable) or
|
||||
recordDataFlowCallSiteUnreachable(call, callable)
|
||||
}
|
||||
|
||||
/**
|
||||
* A `Node` at which a cast can occur such that the type should be checked.
|
||||
*/
|
||||
@@ -924,7 +940,7 @@ class CallContextSpecificCall extends CallContextCall, TSpecificCall {
|
||||
}
|
||||
|
||||
override predicate relevantFor(DataFlowCallable callable) {
|
||||
recordDataFlowCallSite(getCall(), callable)
|
||||
recordDataFlowCallSite(this.getCall(), callable)
|
||||
}
|
||||
|
||||
override predicate matchesCall(DataFlowCall call) { call = this.getCall() }
|
||||
@@ -1118,6 +1134,44 @@ ReturnPosition getReturnPosition(ReturnNodeExt ret) {
|
||||
result = getReturnPosition0(ret, ret.getKind())
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks whether `inner` can return to `call` in the call context `innercc`.
|
||||
* Assumes a context of `inner = viableCallableExt(call)`.
|
||||
*/
|
||||
bindingset[innercc, inner, call]
|
||||
predicate checkCallContextReturn(CallContext innercc, DataFlowCallable inner, DataFlowCall call) {
|
||||
innercc instanceof CallContextAny
|
||||
or
|
||||
exists(DataFlowCallable c0, DataFlowCall call0 |
|
||||
callEnclosingCallable(call0, inner) and
|
||||
innercc = TReturn(c0, call0) and
|
||||
c0 = prunedViableImplInCallContextReverse(call0, call)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks whether `call` can resolve to `calltarget` in the call context `cc`.
|
||||
* Assumes a context of `calltarget = viableCallableExt(call)`.
|
||||
*/
|
||||
bindingset[cc, call, calltarget]
|
||||
predicate checkCallContextCall(CallContext cc, DataFlowCall call, DataFlowCallable calltarget) {
|
||||
exists(DataFlowCall ctx | cc = TSpecificCall(ctx) |
|
||||
if reducedViableImplInCallContext(call, _, ctx)
|
||||
then calltarget = prunedViableImplInCallContext(call, ctx)
|
||||
else any()
|
||||
)
|
||||
or
|
||||
cc instanceof CallContextSomeCall
|
||||
or
|
||||
cc instanceof CallContextAny
|
||||
or
|
||||
cc instanceof CallContextReturn
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolves a return from `callable` in `cc` to `call`. This is equivalent to
|
||||
* `callable = viableCallableExt(call) and checkCallContextReturn(cc, callable, call)`.
|
||||
*/
|
||||
bindingset[cc, callable]
|
||||
predicate resolveReturn(CallContext cc, DataFlowCallable callable, DataFlowCall call) {
|
||||
cc instanceof CallContextAny and callable = viableCallableExt(call)
|
||||
@@ -1129,6 +1183,10 @@ predicate resolveReturn(CallContext cc, DataFlowCallable callable, DataFlowCall
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolves a call from `call` in `cc` to `result`. This is equivalent to
|
||||
* `result = viableCallableExt(call) and checkCallContextCall(cc, call, result)`.
|
||||
*/
|
||||
bindingset[call, cc]
|
||||
DataFlowCallable resolveCall(DataFlowCall call, CallContext cc) {
|
||||
exists(DataFlowCall ctx | cc = TSpecificCall(ctx) |
|
||||
@@ -1181,6 +1239,13 @@ class TypedContent extends MkTypedContent {
|
||||
|
||||
/** Gets a textual representation of this content. */
|
||||
string toString() { result = c.toString() }
|
||||
|
||||
/**
|
||||
* Holds if access paths with this `TypedContent` at their head always should
|
||||
* be tracked at high precision. This disables adaptive access path precision
|
||||
* for such access paths.
|
||||
*/
|
||||
predicate forceHighPrecision() { forceHighPrecision(c) }
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -1195,7 +1260,7 @@ abstract class AccessPathFront extends TAccessPathFront {
|
||||
|
||||
TypedContent getHead() { this = TFrontHead(result) }
|
||||
|
||||
predicate isClearedAt(Node n) { clearsContentCached(n, getHead().getContent()) }
|
||||
predicate isClearedAt(Node n) { clearsContentCached(n, this.getHead().getContent()) }
|
||||
}
|
||||
|
||||
class AccessPathFrontNil extends AccessPathFront, TFrontNil {
|
||||
@@ -168,7 +168,14 @@ module Consistency {
|
||||
msg = "ArgumentNode is missing PostUpdateNode."
|
||||
}
|
||||
|
||||
query predicate postWithInFlow(PostUpdateNode n, string msg) {
|
||||
// This predicate helps the compiler forget that in some languages
|
||||
// it is impossible for a `PostUpdateNode` to be the target of
|
||||
// `simpleLocalFlowStep`.
|
||||
private predicate isPostUpdateNode(Node n) { n instanceof PostUpdateNode or none() }
|
||||
|
||||
query predicate postWithInFlow(Node n, string msg) {
|
||||
isPostUpdateNode(n) and
|
||||
not clearsContent(n, _) and
|
||||
simpleLocalFlowStep(_, n) and
|
||||
msg = "PostUpdateNode should not be the target of local flow."
|
||||
}
|
||||
@@ -152,6 +152,7 @@ class DataFlowExpr = Expr;
|
||||
* Flow comes from definitions, uses and refinements.
|
||||
*/
|
||||
// TODO: Consider constraining `nodeFrom` and `nodeTo` to be in the same scope.
|
||||
// If they have different enclosing callables, we get consistency errors.
|
||||
module EssaFlow {
|
||||
predicate essaFlowStep(Node nodeFrom, Node nodeTo) {
|
||||
// Definition
|
||||
@@ -200,6 +201,9 @@ module EssaFlow {
|
||||
// If expressions
|
||||
nodeFrom.asCfgNode() = nodeTo.asCfgNode().(IfExprNode).getAnOperand()
|
||||
or
|
||||
// boolean inline expressions such as `x or y` or `x and y`
|
||||
nodeFrom.asCfgNode() = nodeTo.asCfgNode().(BoolExprNode).getAnOperand()
|
||||
or
|
||||
// Flow inside an unpacking assignment
|
||||
iterableUnpackingFlowStep(nodeFrom, nodeTo)
|
||||
or
|
||||
@@ -225,35 +229,60 @@ module EssaFlow {
|
||||
//--------
|
||||
/**
|
||||
* This is the local flow predicate that is used as a building block in global
|
||||
* data flow. It is a strict subset of the `localFlowStep` predicate, as it
|
||||
* excludes SSA flow through instance fields.
|
||||
* data flow.
|
||||
*
|
||||
* Local flow can happen either at import time, when the module is initialised
|
||||
* or at runtime when callables in the module are called.
|
||||
*/
|
||||
predicate simpleLocalFlowStep(Node nodeFrom, Node nodeTo) {
|
||||
// If there is ESSA-flow out of a node `node`, we want flow
|
||||
// If there is local flow out of a node `node`, we want flow
|
||||
// both out of `node` and any post-update node of `node`.
|
||||
exists(Node node |
|
||||
EssaFlow::essaFlowStep(node, nodeTo) and
|
||||
nodeFrom = update(node) and
|
||||
(
|
||||
not node instanceof EssaNode or
|
||||
not nodeTo instanceof EssaNode or
|
||||
localEssaStep(node, nodeTo)
|
||||
importTimeLocalFlowStep(node, nodeTo) or
|
||||
runtimeLocalFlowStep(node, nodeTo)
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if there is an Essa flow step from `nodeFrom` to `nodeTo` that does not switch between
|
||||
* local and global SSA variables.
|
||||
* Holds if `node` is found at the top level of a module.
|
||||
*/
|
||||
private predicate localEssaStep(EssaNode nodeFrom, EssaNode nodeTo) {
|
||||
EssaFlow::essaFlowStep(nodeFrom, nodeTo) and
|
||||
(
|
||||
nodeFrom.getVar() instanceof GlobalSsaVariable and
|
||||
nodeTo.getVar() instanceof GlobalSsaVariable
|
||||
or
|
||||
not nodeFrom.getVar() instanceof GlobalSsaVariable and
|
||||
not nodeTo.getVar() instanceof GlobalSsaVariable
|
||||
pragma[inline]
|
||||
predicate isTopLevel(Node node) { node.getScope() instanceof Module }
|
||||
|
||||
/** Holds if there is local flow from `nodeFrom` to `nodeTo` at import time. */
|
||||
predicate importTimeLocalFlowStep(Node nodeFrom, Node nodeTo) {
|
||||
// As a proxy for whether statements can be executed at import time,
|
||||
// we check if they appear at the top level.
|
||||
// This will miss statements inside functions called from the top level.
|
||||
isTopLevel(nodeFrom) and
|
||||
isTopLevel(nodeTo) and
|
||||
EssaFlow::essaFlowStep(nodeFrom, nodeTo)
|
||||
}
|
||||
|
||||
/** Holds if there is local flow from `nodeFrom` to `nodeTo` at runtime. */
|
||||
predicate runtimeLocalFlowStep(Node nodeFrom, Node nodeTo) {
|
||||
// Anything not at the top level can be executed at runtime.
|
||||
not isTopLevel(nodeFrom) and
|
||||
not isTopLevel(nodeTo) and
|
||||
EssaFlow::essaFlowStep(nodeFrom, nodeTo)
|
||||
}
|
||||
|
||||
/** `ModuleVariable`s are accessed via jump steps at runtime. */
|
||||
predicate runtimeJumpStep(Node nodeFrom, Node nodeTo) {
|
||||
// Module variable read
|
||||
nodeFrom.(ModuleVariableNode).getARead() = nodeTo
|
||||
or
|
||||
// Module variable write
|
||||
nodeFrom = nodeTo.(ModuleVariableNode).getAWrite()
|
||||
or
|
||||
// Setting the possible values of the variable at the end of import time
|
||||
exists(SsaVariable def |
|
||||
def = any(SsaVariable var).getAnUltimateDefinition() and
|
||||
def.getDefinition() = nodeFrom.asCfgNode() and
|
||||
def.getVariable() = nodeTo.(ModuleVariableNode).getVariable()
|
||||
)
|
||||
}
|
||||
|
||||
@@ -581,11 +610,11 @@ class DataFlowLambda extends DataFlowCallable, TLambda {
|
||||
|
||||
override string toString() { result = lambda.toString() }
|
||||
|
||||
override CallNode getACall() { result = getCallableValue().getACall() }
|
||||
override CallNode getACall() { result = this.getCallableValue().getACall() }
|
||||
|
||||
override Scope getScope() { result = lambda.getEvaluatingScope() }
|
||||
|
||||
override NameNode getParameter(int n) { result = getParameter(getCallableValue(), n) }
|
||||
override NameNode getParameter(int n) { result = getParameter(this.getCallableValue(), n) }
|
||||
|
||||
override string getName() { result = "Lambda callable" }
|
||||
|
||||
@@ -857,11 +886,7 @@ string ppReprType(DataFlowType t) { none() }
|
||||
* taken into account.
|
||||
*/
|
||||
predicate jumpStep(Node nodeFrom, Node nodeTo) {
|
||||
// Module variable read
|
||||
nodeFrom.(ModuleVariableNode).getARead() = nodeTo
|
||||
or
|
||||
// Module variable write
|
||||
nodeFrom = nodeTo.(ModuleVariableNode).getAWrite()
|
||||
runtimeJumpStep(nodeFrom, nodeTo)
|
||||
or
|
||||
// Read of module attribute:
|
||||
exists(AttrRead r, ModuleValue mv |
|
||||
@@ -869,6 +894,9 @@ predicate jumpStep(Node nodeFrom, Node nodeTo) {
|
||||
module_export(mv.getScope(), r.getAttributeName(), nodeFrom) and
|
||||
nodeTo = r
|
||||
)
|
||||
or
|
||||
// Default value for parameter flows to that parameter
|
||||
defaultValueFlowStep(nodeFrom, nodeTo)
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -1033,6 +1061,19 @@ predicate kwOverflowStoreStep(CfgNode nodeFrom, DictionaryElementContent c, Node
|
||||
)
|
||||
}
|
||||
|
||||
predicate defaultValueFlowStep(CfgNode nodeFrom, CfgNode nodeTo) {
|
||||
exists(Function f, Parameter p, ParameterDefinition def |
|
||||
// `getArgByName` supports, unlike `getAnArg`, keyword-only parameters
|
||||
p = f.getArgByName(_) and
|
||||
nodeFrom.asExpr() = p.getDefault() and
|
||||
// The following expresses
|
||||
// nodeTo.(ParameterNode).getParameter() = p
|
||||
// without non-monotonic recursion
|
||||
def.getParameter() = p and
|
||||
nodeTo.getNode() = def.getDefiningNode()
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if data can flow from `nodeFrom` to `nodeTo` via a read of content `c`.
|
||||
*/
|
||||
@@ -1604,6 +1645,12 @@ predicate isImmutableOrUnobservable(Node n) { none() }
|
||||
|
||||
int accessPathLimit() { result = 5 }
|
||||
|
||||
/**
|
||||
* Holds if access paths with `c` at their head always should be tracked at high
|
||||
* precision. This disables adaptive access path precision for such access paths.
|
||||
*/
|
||||
predicate forceHighPrecision(Content c) { none() }
|
||||
|
||||
/** Holds if `n` should be hidden from path explanations. */
|
||||
predicate nodeIsHidden(Node n) { none() }
|
||||
|
||||
@@ -1617,3 +1664,12 @@ predicate lambdaCall(DataFlowCall call, LambdaCallKind kind, Node receiver) { no
|
||||
|
||||
/** Extra data-flow steps needed for lambda flow analysis. */
|
||||
predicate additionalLambdaFlowStep(Node nodeFrom, Node nodeTo, boolean preservesValue) { none() }
|
||||
|
||||
/**
|
||||
* Holds if flow is allowed to pass from parameter `p` and back to itself as a
|
||||
* side-effect, resulting in a summary from `p` to itself.
|
||||
*
|
||||
* One example would be to allow flow like `p.foo = p.bar;`, which is disallowed
|
||||
* by default as a heuristic.
|
||||
*/
|
||||
predicate allowParameterReturnInSelf(ParameterNode p) { none() }
|
||||
@@ -102,7 +102,7 @@ class Node extends TNode {
|
||||
* The location spans column `startcolumn` of line `startline` to
|
||||
* column `endcolumn` of line `endline` in file `filepath`.
|
||||
* For more information, see
|
||||
* [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html).
|
||||
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
|
||||
*/
|
||||
predicate hasLocationInfo(
|
||||
string filepath, int startline, int startcolumn, int endline, int endcolumn
|
||||
@@ -180,6 +180,45 @@ class CallCfgNode extends CfgNode, LocalSourceNode {
|
||||
Node getArgByName(string name) { result.asCfgNode() = node.getArgByName(name) }
|
||||
}
|
||||
|
||||
/**
|
||||
* A data-flow node corresponding to a method call, that is `foo.bar(...)`.
|
||||
*
|
||||
* Also covers the case where the method lookup is done separately from the call itself, as in
|
||||
* `temp = foo.bar; temp(...)`. Note that this is only tracked through local scope.
|
||||
*/
|
||||
class MethodCallNode extends CallCfgNode {
|
||||
AttrRead method_lookup;
|
||||
|
||||
MethodCallNode() { method_lookup = this.getFunction().getALocalSource() }
|
||||
|
||||
/**
|
||||
* Gets the name of the method being invoked (the `bar` in `foo.bar(...)`) if it can be determined.
|
||||
*
|
||||
* Note that this method may have multiple results if a single call node represents calls to
|
||||
* multiple different objects and methods. If you want to link up objects and method names
|
||||
* accurately, use the `calls` method instead.
|
||||
*/
|
||||
string getMethodName() { result = method_lookup.getAttributeName() }
|
||||
|
||||
/**
|
||||
* Gets the data-flow node corresponding to the object receiving this call. That is, the `foo` in
|
||||
* `foo.bar(...)`.
|
||||
*
|
||||
* Note that this method may have multiple results if a single call node represents calls to
|
||||
* multiple different objects and methods. If you want to link up objects and method names
|
||||
* accurately, use the `calls` method instead.
|
||||
*/
|
||||
Node getObject() { result = method_lookup.getObject() }
|
||||
|
||||
/** Holds if this data-flow node calls method `methodName` on the object node `object`. */
|
||||
predicate calls(Node object, string methodName) {
|
||||
// As `getObject` and `getMethodName` may both have multiple results, we must look up the object
|
||||
// and method name directly on `method_lookup`.
|
||||
object = method_lookup.getObject() and
|
||||
methodName = method_lookup.getAttributeName()
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* An expression, viewed as a node in a data flow graph.
|
||||
*
|
||||
@@ -293,7 +332,7 @@ class ModuleVariableNode extends Node, TModuleVariableNode {
|
||||
override Scope getScope() { result = mod }
|
||||
|
||||
override string toString() {
|
||||
result = "ModuleVariableNode for " + var.toString() + " in " + mod.toString()
|
||||
result = "ModuleVariableNode for " + mod.getName() + "." + var.getId()
|
||||
}
|
||||
|
||||
/** Gets the module in which this variable appears. */
|
||||
@@ -18,6 +18,10 @@ predicate localFlowStep(Node nodeFrom, Node nodeTo) { simpleLocalFlowStep(nodeFr
|
||||
predicate localFlow(Node source, Node sink) { localFlowStep*(source, sink) }
|
||||
|
||||
/**
|
||||
* DEPRECATED. Use the API graphs library (`semmle.python.ApiGraphs`) instead.
|
||||
*
|
||||
* For a drop-in replacement, use `API::moduleImport(name).getAUse()`.
|
||||
*
|
||||
* Gets a `Node` that refers to the module referenced by `name`.
|
||||
* Note that for the statement `import pkg.mod`, the new variable introduced is `pkg` that is a
|
||||
* reference to the module `pkg`.
|
||||
@@ -37,7 +41,7 @@ predicate localFlow(Node source, Node sink) { localFlowStep*(source, sink) }
|
||||
* `mypkg/foo.py` but the variable `foo` containing `42` -- however, `import mypkg.foo` will always cause `mypkg.foo`
|
||||
* to refer to the module.
|
||||
*/
|
||||
Node importNode(string name) {
|
||||
deprecated Node importNode(string name) {
|
||||
exists(Variable var, Import imp, Alias alias |
|
||||
alias = imp.getAName() and
|
||||
alias.getAsname() = var.getAStore() and
|
||||
@@ -33,15 +33,20 @@ private import DataFlowPrivate
|
||||
class LocalSourceNode extends Node {
|
||||
cached
|
||||
LocalSourceNode() {
|
||||
not simpleLocalFlowStep(_, this) and
|
||||
// Currently, we create synthetic post-update nodes for
|
||||
// - arguments to calls that may modify said argument
|
||||
// - direct reads a writes of object attributes
|
||||
// Both of these preserve the identity of the underlying pointer, and hence we exclude these as
|
||||
// local source nodes.
|
||||
// We do, however, allow the post-update nodes that arise from object creation (which are non-synthetic).
|
||||
not this instanceof SyntheticPostUpdateNode
|
||||
this instanceof ExprNode and
|
||||
not simpleLocalFlowStep(_, this)
|
||||
or
|
||||
// We include all module variable nodes, as these act as stepping stones between writes and
|
||||
// reads of global variables. Without them, type tracking based on `LocalSourceNode`s would be
|
||||
// unable to track across global variables.
|
||||
//
|
||||
// Once the `track` and `backtrack` methods have been fully deprecated, this disjunct can be
|
||||
// removed, and the entire class can extend `ExprNode`. At that point, `TypeTrackingNode` should
|
||||
// be used for type tracking instead of `LocalSourceNode`.
|
||||
this instanceof ModuleVariableNode
|
||||
or
|
||||
// We explicitly include any read of a global variable, as some of these may have local flow going
|
||||
// into them.
|
||||
this = any(ModuleVariableNode mvn).getARead()
|
||||
}
|
||||
|
||||
@@ -57,7 +62,12 @@ class LocalSourceNode extends Node {
|
||||
/**
|
||||
* Gets a read of attribute `attrName` on this node.
|
||||
*/
|
||||
AttrRead getAnAttributeRead(string attrName) { result = getAnAttributeReference(attrName) }
|
||||
AttrRead getAnAttributeRead(string attrName) { result = this.getAnAttributeReference(attrName) }
|
||||
|
||||
/**
|
||||
* Gets a write of attribute `attrName` on this node.
|
||||
*/
|
||||
AttrWrite getAnAttributeWrite(string attrName) { result = this.getAnAttributeReference(attrName) }
|
||||
|
||||
/**
|
||||
* Gets a reference (read or write) of any attribute on this node.
|
||||
@@ -71,13 +81,28 @@ class LocalSourceNode extends Node {
|
||||
/**
|
||||
* Gets a read of any attribute on this node.
|
||||
*/
|
||||
AttrRead getAnAttributeRead() { result = getAnAttributeReference() }
|
||||
AttrRead getAnAttributeRead() { result = this.getAnAttributeReference() }
|
||||
|
||||
/**
|
||||
* Gets a write of any attribute on this node.
|
||||
*/
|
||||
AttrWrite getAnAttributeWrite() { result = this.getAnAttributeReference() }
|
||||
|
||||
/**
|
||||
* Gets a call to this node.
|
||||
*/
|
||||
CallCfgNode getACall() { Cached::call(this, result) }
|
||||
|
||||
/**
|
||||
* Gets a call to the method `methodName` on this node.
|
||||
*
|
||||
* Includes both calls that have the syntactic shape of a method call (as in `obj.m(...)`), and
|
||||
* calls where the callee undergoes some additional local data flow (as in `tmp = obj.m; m(...)`).
|
||||
*/
|
||||
MethodCallNode getAMethodCall(string methodName) {
|
||||
result = this.getAnAttributeRead(methodName).getACall()
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a node that this node may flow to using one heap and/or interprocedural step.
|
||||
*
|
||||
@@ -95,6 +120,53 @@ class LocalSourceNode extends Node {
|
||||
LocalSourceNode backtrack(TypeBackTracker t2, TypeBackTracker t) { t2 = t.step(result, this) }
|
||||
}
|
||||
|
||||
/**
|
||||
* A node that can be used for type tracking or type back-tracking.
|
||||
*
|
||||
* All steps made during type tracking should be between instances of this class.
|
||||
*/
|
||||
class TypeTrackingNode = LocalSourceNode;
|
||||
|
||||
/** Temporary holding ground for the `TypeTrackingNode` class. */
|
||||
private module FutureWork {
|
||||
class FutureTypeTrackingNode extends Node {
|
||||
FutureTypeTrackingNode() {
|
||||
this instanceof LocalSourceNode
|
||||
or
|
||||
this instanceof ModuleVariableNode
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if this node can flow to `nodeTo` in one or more local flow steps.
|
||||
*
|
||||
* For `ModuleVariableNode`s, the only "local" step is to the node itself.
|
||||
* For `LocalSourceNode`s, this is the usual notion of local flow.
|
||||
*/
|
||||
pragma[inline]
|
||||
predicate flowsTo(Node node) {
|
||||
this instanceof ModuleVariableNode and this = node
|
||||
or
|
||||
this.(LocalSourceNode).flowsTo(node)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a node that this node may flow to using one heap and/or interprocedural step.
|
||||
*
|
||||
* See `TypeTracker` for more details about how to use this.
|
||||
*/
|
||||
pragma[inline]
|
||||
TypeTrackingNode track(TypeTracker t2, TypeTracker t) { t = t2.step(this, result) }
|
||||
|
||||
/**
|
||||
* Gets a node that may flow into this one using one heap and/or interprocedural step.
|
||||
*
|
||||
* See `TypeBackTracker` for more details about how to use this.
|
||||
*/
|
||||
pragma[inline]
|
||||
TypeTrackingNode backtrack(TypeBackTracker t2, TypeBackTracker t) { t2 = t.step(result, this) }
|
||||
}
|
||||
}
|
||||
|
||||
cached
|
||||
private module Cached {
|
||||
/**
|
||||
@@ -107,11 +179,21 @@ private module Cached {
|
||||
source = sink
|
||||
or
|
||||
exists(Node second |
|
||||
simpleLocalFlowStep(source, second) and
|
||||
simpleLocalFlowStep*(second, sink)
|
||||
localSourceFlowStep(source, second) and
|
||||
localSourceFlowStep*(second, sink)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper predicate for `hasLocalSource`. Removes any steps go to module variable reads, as these
|
||||
* are already local source nodes in their own right.
|
||||
*/
|
||||
cached
|
||||
private predicate localSourceFlowStep(Node nodeFrom, Node nodeTo) {
|
||||
simpleLocalFlowStep(nodeFrom, nodeTo) and
|
||||
not nodeTo = any(ModuleVariableNode v).getARead()
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `base` flows to the base of `ref` and `ref` has attribute name `attr`.
|
||||
*/
|
||||
@@ -1,6 +1,20 @@
|
||||
import python
|
||||
import semmle.python.dataflow.new.DataFlow
|
||||
/**
|
||||
* INTERNAL: Do not use.
|
||||
*
|
||||
* Provides helper predicates for pretty-printing `DataFlow::Node`s.
|
||||
*
|
||||
* Since these have not been performance optimized, please only use them for
|
||||
* debug-queries or in tests.
|
||||
*/
|
||||
|
||||
private import python
|
||||
private import semmle.python.dataflow.new.DataFlow
|
||||
|
||||
/**
|
||||
* INTERNAL: Do not use.
|
||||
*
|
||||
* Gets the pretty-printed version of the Expr `e`.
|
||||
*/
|
||||
string prettyExpr(Expr e) {
|
||||
not e instanceof Num and
|
||||
not e instanceof StrConst and
|
||||
@@ -27,7 +41,9 @@ string prettyExpr(Expr e) {
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets pretty-printed version of the DataFlow::Node `node`
|
||||
* INTERNAL: Do not use.
|
||||
*
|
||||
* Gets the pretty-printed version of the DataFlow::Node `node`
|
||||
*/
|
||||
bindingset[node]
|
||||
string prettyNode(DataFlow::Node node) {
|
||||
@@ -35,12 +51,13 @@ string prettyNode(DataFlow::Node node) {
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets pretty-printed version of the DataFlow::Node `node`, that is suitable for use
|
||||
* INTERNAL: Do not use.
|
||||
*
|
||||
* Gets the pretty-printed version of the DataFlow::Node `node`, that is suitable for use
|
||||
* with `TestUtilities.InlineExpectationsTest` (that is, no spaces unless required).
|
||||
*/
|
||||
bindingset[node]
|
||||
string prettyNodeForInlineTest(DataFlow::Node node) {
|
||||
exists(node.asExpr()) and
|
||||
result = prettyExpr(node.asExpr())
|
||||
or
|
||||
exists(Expr e | e = node.(DataFlow::PostUpdateNode).getPreUpdateNode().asExpr() |
|
||||
@@ -1,7 +1,8 @@
|
||||
private import python
|
||||
private import semmle.python.dataflow.new.DataFlow
|
||||
private import semmle.python.dataflow.new.internal.DataFlowPrivate
|
||||
private import semmle.python.dataflow.new.internal.DataFlowPrivate as DataFlowPrivate
|
||||
private import semmle.python.dataflow.new.internal.TaintTrackingPublic
|
||||
private import semmle.python.ApiGraphs
|
||||
|
||||
/**
|
||||
* Holds if `node` should be a sanitizer in all global taint flow configurations
|
||||
@@ -9,6 +10,13 @@ private import semmle.python.dataflow.new.internal.TaintTrackingPublic
|
||||
*/
|
||||
predicate defaultTaintSanitizer(DataFlow::Node node) { none() }
|
||||
|
||||
/**
|
||||
* Holds if default `TaintTracking::Configuration`s should allow implicit reads
|
||||
* of `c` at sinks and inputs to additional taint steps.
|
||||
*/
|
||||
bindingset[node]
|
||||
predicate defaultImplicitTaintRead(DataFlow::Node node, DataFlow::Content c) { none() }
|
||||
|
||||
private module Cached {
|
||||
/**
|
||||
* Holds if the additional step from `nodeFrom` to `nodeTo` should be included in all
|
||||
@@ -38,9 +46,13 @@ private module Cached {
|
||||
or
|
||||
copyStep(nodeFrom, nodeTo)
|
||||
or
|
||||
forStep(nodeFrom, nodeTo)
|
||||
DataFlowPrivate::forReadStep(nodeFrom, _, nodeTo)
|
||||
or
|
||||
unpackingAssignmentStep(nodeFrom, nodeTo)
|
||||
DataFlowPrivate::iterableUnpackingReadStep(nodeFrom, _, nodeTo)
|
||||
or
|
||||
DataFlowPrivate::iterableUnpackingStoreStep(nodeFrom, _, nodeTo)
|
||||
or
|
||||
awaitStep(nodeFrom, nodeTo)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -75,13 +87,13 @@ predicate subscriptStep(DataFlow::CfgNode nodeFrom, DataFlow::CfgNode nodeTo) {
|
||||
*/
|
||||
predicate stringManipulation(DataFlow::CfgNode nodeFrom, DataFlow::CfgNode nodeTo) {
|
||||
// transforming something tainted into a string will make the string tainted
|
||||
exists(CallNode call | call = nodeTo.getNode() |
|
||||
call.getFunction().(NameNode).getId() in ["str", "bytes", "unicode"] and
|
||||
exists(DataFlow::CallCfgNode call | call = nodeTo |
|
||||
(
|
||||
nodeFrom.getNode() = call.getArg(0)
|
||||
call = API::builtin(["str", "bytes", "unicode"]).getACall()
|
||||
or
|
||||
nodeFrom.getNode() = call.getArgByName("object")
|
||||
)
|
||||
call.getFunction().asCfgNode().(NameNode).getId() in ["str", "bytes", "unicode"]
|
||||
) and
|
||||
nodeFrom in [call.getArg(0), call.getArgByName("object")]
|
||||
)
|
||||
or
|
||||
// String methods. Note that this doesn't recognize `meth = "foo".upper; meth()`
|
||||
@@ -148,39 +160,37 @@ predicate stringManipulation(DataFlow::CfgNode nodeFrom, DataFlow::CfgNode nodeT
|
||||
predicate containerStep(DataFlow::CfgNode nodeFrom, DataFlow::Node nodeTo) {
|
||||
// construction by literal
|
||||
// TODO: Not limiting the content argument here feels like a BIG hack, but we currently get nothing for free :|
|
||||
storeStep(nodeFrom, _, nodeTo)
|
||||
DataFlowPrivate::storeStep(nodeFrom, _, nodeTo)
|
||||
or
|
||||
// constructor call
|
||||
exists(CallNode call | call = nodeTo.asCfgNode() |
|
||||
call.getFunction().(NameNode).getId() in [
|
||||
"list", "set", "frozenset", "dict", "defaultdict", "tuple"
|
||||
] and
|
||||
call.getArg(0) = nodeFrom.getNode()
|
||||
exists(DataFlow::CallCfgNode call | call = nodeTo |
|
||||
call = API::builtin(["list", "set", "frozenset", "dict", "tuple"]).getACall() and
|
||||
call.getArg(0) = nodeFrom
|
||||
// TODO: Properly handle defaultdict/namedtuple
|
||||
)
|
||||
or
|
||||
// functions operating on collections
|
||||
exists(CallNode call | call = nodeTo.asCfgNode() |
|
||||
call.getFunction().(NameNode).getId() in ["sorted", "reversed", "iter", "next"] and
|
||||
call.getArg(0) = nodeFrom.getNode()
|
||||
exists(DataFlow::CallCfgNode call | call = nodeTo |
|
||||
call = API::builtin(["sorted", "reversed", "iter", "next"]).getACall() and
|
||||
call.getArg(0) = nodeFrom
|
||||
)
|
||||
or
|
||||
// methods
|
||||
exists(CallNode call, string name | call = nodeTo.asCfgNode() |
|
||||
name in [
|
||||
exists(DataFlow::MethodCallNode call, string methodName | call = nodeTo |
|
||||
methodName in [
|
||||
// general
|
||||
"copy", "pop",
|
||||
// dict
|
||||
"values", "items", "get", "popitem"
|
||||
] and
|
||||
call.getFunction().(AttrNode).getObject(name) = nodeFrom.asCfgNode()
|
||||
call.calls(nodeFrom, methodName)
|
||||
)
|
||||
or
|
||||
// list.append, set.add
|
||||
exists(CallNode call, string name |
|
||||
name in ["append", "add"] and
|
||||
call.getFunction().(AttrNode).getObject(name) =
|
||||
nodeTo.(DataFlow::PostUpdateNode).getPreUpdateNode().asCfgNode() and
|
||||
call.getArg(0) = nodeFrom.getNode()
|
||||
exists(DataFlow::MethodCallNode call, DataFlow::Node obj |
|
||||
call.calls(obj, ["append", "add"]) and
|
||||
obj = nodeTo.(DataFlow::PostUpdateNode).getPreUpdateNode() and
|
||||
call.getArg(0) = nodeFrom
|
||||
)
|
||||
}
|
||||
|
||||
@@ -188,38 +198,16 @@ predicate containerStep(DataFlow::CfgNode nodeFrom, DataFlow::Node nodeTo) {
|
||||
* Holds if taint can flow from `nodeFrom` to `nodeTo` with a step related to copying.
|
||||
*/
|
||||
predicate copyStep(DataFlow::CfgNode nodeFrom, DataFlow::CfgNode nodeTo) {
|
||||
exists(CallNode call | call = nodeTo.getNode() |
|
||||
// Fully qualified: copy.copy, copy.deepcopy
|
||||
(
|
||||
call.getFunction().(NameNode).getId() in ["copy", "deepcopy"]
|
||||
or
|
||||
call.getFunction().(AttrNode).getObject(["copy", "deepcopy"]).(NameNode).getId() = "copy"
|
||||
) and
|
||||
call.getArg(0) = nodeFrom.getNode()
|
||||
exists(DataFlow::CallCfgNode call | call = nodeTo |
|
||||
call = API::moduleImport("copy").getMember(["copy", "deepcopy"]).getACall() and
|
||||
call.getArg(0) = nodeFrom
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if taint can flow from `nodeFrom` to `nodeTo` with a step related to `for`-iteration,
|
||||
* for example `for x in xs`, or `for x,y in points`.
|
||||
* Holds if taint can flow from `nodeFrom` to `nodeTo` with an `await`-step,
|
||||
* such that the whole expression `await x` is tainted if `x` is tainted.
|
||||
*/
|
||||
predicate forStep(DataFlow::CfgNode nodeFrom, DataFlow::EssaNode nodeTo) {
|
||||
exists(EssaNodeDefinition defn, For for |
|
||||
for.getTarget().getAChildNode*() = defn.getDefiningNode().getNode() and
|
||||
nodeTo.getVar() = defn and
|
||||
nodeFrom.asExpr() = for.getIter()
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if taint can flow from `nodeFrom` to `nodeTo` with a step related to iterable unpacking.
|
||||
* Only handles normal assignment (`x,y = calc_point()`), since `for x,y in points` is handled by `forStep`.
|
||||
*/
|
||||
predicate unpackingAssignmentStep(DataFlow::CfgNode nodeFrom, DataFlow::EssaNode nodeTo) {
|
||||
// `a, b = myiterable` or `head, *tail = myiterable` (only Python 3)
|
||||
exists(MultiAssignmentDefinition defn, Assign assign |
|
||||
assign.getATarget().contains(defn.getDefiningNode().getNode()) and
|
||||
nodeTo.getVar() = defn and
|
||||
nodeFrom.asExpr() = assign.getValue()
|
||||
)
|
||||
predicate awaitStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
|
||||
nodeTo.asExpr().(Await).getValue() = nodeFrom.asExpr()
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user