Merge remote-tracking branch 'origin/main' into jty/python/emailInjection

This commit is contained in:
jorgectf
2021-10-28 13:26:57 +02:00
7961 changed files with 793007 additions and 274834 deletions

View File

@@ -106,7 +106,7 @@
"prefix": "type tracking",
"body": [
"/** Gets a reference to ${3:a thing}. */",
"private DataFlow::LocalSourceNode ${1:myType}(DataFlow::TypeTracker t) {",
"private DataFlow::TypeTrackingNode ${1:myType}(DataFlow::TypeTracker t) {",
" t.start() and",
" result = ${2:value}",
" or",
@@ -152,4 +152,102 @@
]
},
"Type tracking class": {
"scope": "ql",
"prefix": "type tracking class",
"body": [
"/**",
" * Provides models for the `${TM_SELECTED_TEXT}` class",
" *",
" * See ${1:https://apiref (TODO)}.",
" */",
"module ${TM_SELECTED_TEXT/^(.*)\\.([^.]+)$/$2/} {",
" /** Gets a reference to the `${TM_SELECTED_TEXT}` class. */",
" private API::Node classRef() {",
" result = API::moduleImport(\"${TM_SELECTED_TEXT/\\.([^.]+)/\").getMember(\"$1/g}\")",
" }",
"",
" /**",
" * A source of instances of `${TM_SELECTED_TEXT}`, extend this class to model new instances.",
" *",
" * This can include instantiations of the class, return values from function",
" * calls, or a special parameter that will be set when functions are called by an external",
" * library.",
" *",
" * Use the predicate `${TM_SELECTED_TEXT/^(.*)\\.([^.]+)$/$2/}::instance()` to get references to instances of `${TM_SELECTED_TEXT}`.",
" */",
" abstract class InstanceSource extends DataFlow::LocalSourceNode { }",
"",
" /** A direct instantiation of `${TM_SELECTED_TEXT}`. */",
" private class ClassInstantiation extends InstanceSource, DataFlow::CallCfgNode {",
" ClassInstantiation() { this = classRef().getACall() }",
" }",
"",
" /** Gets a reference to an instance of `${TM_SELECTED_TEXT}`. */",
" private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t) {",
" t.start() and",
" result instanceof InstanceSource",
" or",
" exists(DataFlow::TypeTracker t2 | result = instance(t2).track(t2, t))",
" }",
"",
" /** Gets a reference to an instance of `${TM_SELECTED_TEXT}`. */",
" DataFlow::Node instance() { instance(DataFlow::TypeTracker::end()).flowsTo(result) }",
"",
" /**",
" * Taint propagation for `${TM_SELECTED_TEXT}`.",
" */",
" private class InstanceTaintSteps extends InstanceTaintStepsHelper {",
" InstanceTaintSteps() { this = \"${TM_SELECTED_TEXT}\" }",
" ",
" override DataFlow::Node getInstance() { result = instance() }",
" ",
" override string getAttributeName() { none() }",
" ",
" override string getMethodName() { none() }",
" ",
" override string getAsyncMethodName() { none() }",
" }",
"",
" /**",
" * Extra taint propagation for `${TM_SELECTED_TEXT}`, not covered by `InstanceTaintSteps`.",
" */",
" private class AdditionalTaintStep extends TaintTracking::AdditionalTaintStep {",
" override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {",
" // TODO",
" none()",
" }",
" }",
"}",
],
"description": "Type tracking class (select full class path before inserting)",
},
"foo": {
"scope": "ql",
"prefix": "foo",
"body": [
" /**",
" * Taint propagation for `$1`.",
" */",
" private class InstanceTaintSteps extends InstanceTaintStepsHelper {",
" InstanceTaintSteps() { this = \"$1\" }",
"",
" override DataFlow::Node getInstance() { result = instance() }",
"",
" override string getAttributeName() { none() }",
"",
" override string getMethodName() { none() }",
"",
" override string getAsyncMethodName() { none() }",
" }",
],
},
"API graph .getMember chain": {
"scope": "ql",
"prefix": "api graph .getMember chain",
"body": [
"API::moduleImport(\"${TM_SELECTED_TEXT/\\.([^.]+)/\").getMember(\"$1/g}\")"
],
"description": "API graph .getMember chain (select full path before inserting)",
},
}

View File

@@ -0,0 +1,2 @@
lgtm,codescanning
* API graph nodes now contain a `getAwaited()` member predicate, for getting the result of awaiting an item, such as `await foo`.

View File

@@ -0,0 +1,2 @@
lgtm,codescanning
* Added model of SQL execution in `clickhouse-driver` and `aioch` PyPI packages, resulting in additional sinks for the SQL Injection query (`py/sql-injection`). This modeling was originally [submitted as a contribution by @japroc](https://github.com/github/codeql/pull/5889).

View File

@@ -0,0 +1,2 @@
lgtm,codescanning
* Added modeling of sources/sinks when using `twisted` to create web servers.

View File

@@ -0,0 +1,2 @@
lgtm,codescanning
* Added modeling of the PyPI package `jmespath`.

View File

@@ -0,0 +1,2 @@
lgtm,codescanning
* Added modeling of the PyPI package `rsa`.

View File

@@ -0,0 +1,5 @@
lgtm,codescanning
* A new class `DataFlow::MethodCallNode` extends `DataFlow::CallCfgNode` with convenient methods for
accessing the receiver and method name of a method call.
* The `LocalSourceNode` class now has a `getAMethodCall` method, with which one can easily access
method calls with the given node as a receiver.

View File

@@ -0,0 +1,2 @@
lgtm,codescanning
* Added modeling of the PyPI package `MarkupSafe`.

View File

@@ -0,0 +1,2 @@
lgtm,codescanning
* Added `HTTP::Server::CookieWrite` concept for statements that sets a cookie in an HTTP response, along with modeling of this in supported web frameworks (aiohttp/flask/django/tornado/twisted).

View File

@@ -0,0 +1,2 @@
lgtm,codescanning
* The DataFlow libraries have been augmented with support for `Configuration`-specific in-place read steps at, for example, sinks and custom taint steps. This means that it is now possible to specify sinks that accept flow with non-empty access paths.

View File

@@ -0,0 +1,2 @@
lgtm,codescanning
* Added modeling of raw SQL execution from the PyPI package `peewee`.

View File

@@ -0,0 +1,2 @@
lgtm,codescanning
* The `track` and `backtrack` methods on `LocalSourceNode` are in the process of being deprecated. When using type trackers, the corresponding methods on `TypeTrackingNode` should be used instead.

View File

@@ -0,0 +1,2 @@
lgtm,codescanning
* Changed the way to provide extra sources/sinks for `@kind path-problem` queries, to avoid a potential performance problem due to re-evaluation of data-flow configurations. Please use the new `<query>Customization.qll` files and extend their classes instead (such as extending the `Sink` class from `python/ql/src/semmle/python/security/dataflow/SqlInjectionCustomizations.qll`). This is relevant for the queries: `py/sql-injection`, `py/code-injection`, `py/command-line-injection`, `py/reflective-xss`, `py/url-redirection`, `py/unsafe-deserialization`, `py/stack-trace-exposure`, `py/path-injection`.

View File

@@ -0,0 +1,4 @@
lgtm,codescanning
* The `importNode` predicate from the data-flow library has been deprecated. In its place, we
recommend using the API graphs library, accessible via `import semmle.python.ApiGraphs`.

View File

@@ -0,0 +1,3 @@
lgtm,codescanning
* Added an experimental _Inefficient regular expression_ (`py/redos`) query, which is already available in JavaScript.
* Added an experimental _Polynomial regular expression used on uncontrolled data_ (`py/polynomial-redos`), which is already available in JavaScript.

View File

@@ -0,0 +1,2 @@
lgtm,codescanning
* Updated _Modification of parameter with default_ (`py/modification-of-default-value`) query to use the new data flow library instead of the old taint tracking library and to remove the use of points-to analysis. You may see differences in the results found by the query, but overall this change should result in a more robust and accurate analysis.

View File

@@ -0,0 +1,2 @@
lgtm,codescanning
* Added modeling of SQL execution in the `Flask-SQLAlchemy` PyPI package, resulting in additional sinks for the SQL Injection query (`py/sql-injection`).

View File

@@ -0,0 +1,2 @@
lgtm,codescanning
* Added modeling of SQL execution in the `SQLAlchemy` PyPI package, resulting in additional sinks for the SQL Injection query (`py/sql-injection`). This modeling was originally [submitted as a contribution by @mrthankyou](https://github.com/github/codeql/pull/5680).

View File

@@ -0,0 +1,2 @@
lgtm,codescanning
* Expanded the query _SQL query built from user-controlled sources_ (`py/sql-injection`) to alert if user-input is added to a TextClause from SQLAlchemy, since that can lead to SQL injection.

View File

@@ -0,0 +1,2 @@
lgtm,codescanning
* Function parameters with default values will now see flow from those values.

View File

@@ -0,0 +1,2 @@
lgtm,codescanning
* The query "Regular expression injection" (`py/regex-injection`) has been promoted from experimental to the main query pack. Its results will now appear by default. This query was originally [submitted as an experimental query by @jorgectf](https://github.com/github/codeql/pull/5442).

View File

@@ -0,0 +1,3 @@
lgtm,codescanning
* Added data-flow from both `x` and `y` to `x or y` and `x and y`, as a slight over-approximation of what is described in the
[Python Language Reference](https://docs.python.org/3/reference/expressions.html#boolean-operations).

View File

@@ -0,0 +1,2 @@
lgtm,codescanning
* Improved modeling of decoding through pickle related functions (which can lead to code execution), resulting in additional sinks for the _Deserializing untrusted input_ query (`py/unsafe-deserialization`). Now we fully support `pickle.load`, `pickle.loads`, `pickle.Unpickler`, `marshal.load`, `marshal.loads`, `dill.load`, `dill.loads`, `shelve.open`.

View File

@@ -0,0 +1,2 @@
codescanning
* Problems with extraction that in most cases won't completely break the analysis are now reported as warnings rather than errors.

View File

@@ -0,0 +1,2 @@
lgtm,codescanning
* Added modeling of the `ruamel.yaml` PyPI package, resulting in additional sinks for the _Deserializing untrusted input_ (`py/unsafe-deserialization`) query (since `ruamel.yaml.load` can lead to code execution).

View File

@@ -0,0 +1,4 @@
---
dependencies: {}
compiled: false
lockVersion: 1.0.0

View File

@@ -1,3 +1,4 @@
name: codeql-python-examples
version: 0.0.0
libraryPathDependencies: codeql-python
name: codeql/python-examples
version: 0.0.2
dependencies:
codeql/python-all: "*"

View File

@@ -0,0 +1,4 @@
---
dependencies: {}
compiled: false
lockVersion: 1.0.0

7
python/ql/lib/qlpack.yml Normal file
View File

@@ -0,0 +1,7 @@
name: codeql/python-all
version: 0.0.2
dbscheme: semmlecode.python.dbscheme
extractor: python
library: true
dependencies:
codeql/python-upgrades: 0.0.2

View File

@@ -55,7 +55,7 @@ module API {
/**
* Gets a call to the function represented by this API component.
*/
DataFlow::CallCfgNode getACall() { result = getReturn().getAnImmediateUse() }
DataFlow::CallCfgNode getACall() { result = this.getReturn().getAnImmediateUse() }
/**
* Gets a node representing member `m` of this API component.
@@ -67,21 +67,21 @@ module API {
*/
bindingset[m]
bindingset[result]
Node getMember(string m) { result = getASuccessor(Label::member(m)) }
Node getMember(string m) { result = this.getASuccessor(Label::member(m)) }
/**
* Gets a node representing a member of this API component where the name of the member is
* not known statically.
*/
Node getUnknownMember() { result = getASuccessor(Label::unknownMember()) }
Node getUnknownMember() { result = this.getASuccessor(Label::unknownMember()) }
/**
* Gets a node representing a member of this API component where the name of the member may
* or may not be known statically.
*/
Node getAMember() {
result = getASuccessor(Label::member(_)) or
result = getUnknownMember()
result = this.getASuccessor(Label::member(_)) or
result = this.getUnknownMember()
}
/**
@@ -90,18 +90,25 @@ module API {
* This predicate may have multiple results when there are multiple invocations of this API component.
* Consider using `getACall()` if there is a need to distinguish between individual calls.
*/
Node getReturn() { result = getASuccessor(Label::return()) }
Node getReturn() { result = this.getASuccessor(Label::return()) }
/**
* Gets a node representing a subclass of the class represented by this node.
*/
Node getASubclass() { result = getASuccessor(Label::subclass()) }
Node getASubclass() { result = this.getASuccessor(Label::subclass()) }
/**
* Gets a node representing the result from awaiting this node.
*/
Node getAwaited() { result = this.getASuccessor(Label::await()) }
/**
* Gets a string representation of the lexicographically least among all shortest access paths
* from the root to this node.
*/
string getPath() { result = min(string p | p = getAPath(Impl::distanceFromRoot(this)) | p) }
string getPath() {
result = min(string p | p = this.getAPath(Impl::distanceFromRoot(this)) | p)
}
/**
* Gets a node such that there is an edge in the API graph between this node and the other
@@ -119,13 +126,13 @@ module API {
* Gets a node such that there is an edge in the API graph between this node and the other
* one.
*/
Node getAPredecessor() { result = getAPredecessor(_) }
Node getAPredecessor() { result = this.getAPredecessor(_) }
/**
* Gets a node such that there is an edge in the API graph between that other node and
* this one.
*/
Node getASuccessor() { result = getASuccessor(_) }
Node getASuccessor() { result = this.getASuccessor(_) }
/**
* Gets the data-flow node that gives rise to this node, if any.
@@ -137,16 +144,16 @@ module API {
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://help.semmle.com/QL/learn-ql/locations.html).
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
) {
getInducingNode().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
this.getInducingNode().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
or
// For nodes that do not have a meaningful location, `path` is the empty string and all other
// parameters are zero.
not exists(getInducingNode()) and
not exists(this.getInducingNode()) and
filepath = "" and
startline = 0 and
startcolumn = 0 and
@@ -197,7 +204,7 @@ module API {
or
this = Impl::MkModuleImport(_) and type = "ModuleImport "
|
result = type + getPath()
result = type + this.getPath()
or
not exists(this.getPath()) and result = type + "with no path"
)
@@ -419,13 +426,8 @@ module API {
* a value in the module `m`.
*/
private predicate possible_builtin_defined_in_module(string name, Module m) {
exists(NameNode n |
not exists(LocalVariable v | n.defines(v)) and
n.isStore() and
name = n.getId() and
name = getBuiltInName() and
m = n.getEnclosingModule()
)
global_name_defined_in_module(name, m) and
name = getBuiltInName()
}
/**
@@ -440,6 +442,51 @@ module API {
m = n.getEnclosingModule()
}
/**
* Holds if `n` is an access of a variable called `name` (which is _not_ the name of a
* built-in, and which is _not_ a global defined in the enclosing module) inside the scope `s`.
*/
private predicate name_possibly_defined_in_import_star(NameNode n, string name, Scope s) {
n.isLoad() and
name = n.getId() and
// Not already defined in an enclosing scope.
not exists(LocalVariable v |
v.getId() = name and v.getScope() = n.getScope().getEnclosingScope*()
) and
not name = getBuiltInName() and
s = n.getScope().getEnclosingScope*() and
exists(potential_import_star_base(s)) and
not global_name_defined_in_module(name, n.getEnclosingModule())
}
/** Holds if a global variable called `name` is assigned a value in the module `m`. */
private predicate global_name_defined_in_module(string name, Module m) {
exists(NameNode n |
not exists(LocalVariable v | n.defines(v)) and
n.isStore() and
name = n.getId() and
m = n.getEnclosingModule()
)
}
/**
* Gets the API graph node for all modules imported with `from ... import *` inside the scope `s`.
*
* For example, given
*
* `from foo.bar import *`
*
* this would be the API graph node with the path
*
* `moduleImport("foo").getMember("bar")`
*/
private TApiNode potential_import_star_base(Scope s) {
exists(DataFlow::Node ref |
ref.asCfgNode() = any(ImportStarNode n | n.getScope() = s).getModule() and
use(result, ref)
)
}
/**
* Holds if `ref` is a use of a node that should have an incoming edge from `base` labeled
* `lbl` in the API graph.
@@ -469,11 +516,28 @@ module API {
exists(DataFlow::Node superclass | pred.flowsTo(superclass) |
ref.asExpr().(ClassExpr).getABase() = superclass.asExpr()
)
or
// awaiting
exists(Await await, DataFlow::Node awaitedValue |
lbl = Label::await() and
ref.asExpr() = await and
await.getValue() = awaitedValue.asExpr() and
pred.flowsTo(awaitedValue)
)
)
or
// Built-ins, treated as members of the module `builtins`
base = MkModuleImport("builtins") and
lbl = Label::member(any(string name | ref = likely_builtin(name)))
or
// Unknown variables that may belong to a module imported with `import *`
exists(Scope s |
base = potential_import_star_base(s) and
lbl =
Label::member(any(string name |
name_possibly_defined_in_import_star(ref.asCfgNode(), name, s)
))
)
}
/**
@@ -499,7 +563,7 @@ module API {
*
* The flow from `src` to that node may be inter-procedural.
*/
private DataFlow::LocalSourceNode trackUseNode(
private DataFlow::TypeTrackingNode trackUseNode(
DataFlow::LocalSourceNode src, DataFlow::TypeTracker t
) {
t.start() and
@@ -517,7 +581,6 @@ module API {
cached
DataFlow::LocalSourceNode trackUseNode(DataFlow::LocalSourceNode src) {
result = trackUseNode(src, DataFlow::TypeTracker::end()) and
// We exclude module variable nodes, as these do not correspond to real uses.
not result instanceof DataFlow::ModuleVariableNode
}
@@ -585,5 +648,9 @@ private module Label {
/** Gets the `return` edge label. */
string return() { result = "getReturn()" }
/** Gets the `subclass` edge label. */
string subclass() { result = "getASubclass()" }
/** Gets the `await` edge label. */
string await() { result = "getAwaited()" }
}

View File

@@ -49,12 +49,15 @@ abstract class AstNode extends AstNode_ {
/** Whether this contains `inner` syntactically */
predicate contains(AstNode inner) { this.getAChildNode+() = inner }
/** Whether this contains `inner` syntactically and `inner` has the same scope as `this` */
predicate containsInScope(AstNode inner) {
pragma[noinline]
private predicate containsInScope(AstNode inner, Scope scope) {
this.contains(inner) and
this.getScope() = inner.getScope() and
not inner instanceof Scope
not inner instanceof Scope and
scope = this.getScope()
}
/** Whether this contains `inner` syntactically and `inner` has the same scope as `this` */
predicate containsInScope(AstNode inner) { this.containsInScope(inner, inner.getScope()) }
}
/* Parents */

View File

@@ -67,7 +67,7 @@ class CommentBlock extends @py_comment {
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html).
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn

View File

@@ -4,7 +4,7 @@
* provide concrete subclasses.
*/
import python
private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.dataflow.new.TaintTracking
@@ -72,6 +72,39 @@ module FileSystemAccess {
}
}
/**
* A data flow node that writes data to the file system access.
*
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `FileSystemWriteAccess::Range` instead.
*/
class FileSystemWriteAccess extends FileSystemAccess {
override FileSystemWriteAccess::Range range;
/**
* Gets a node that represents data to be written to the file system (possibly with
* some transformation happening before it is written, like JSON encoding).
*/
DataFlow::Node getADataNode() { result = range.getADataNode() }
}
/** Provides a class for modeling new file system writes. */
module FileSystemWriteAccess {
/**
* A data flow node that writes data to the file system access.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `FileSystemWriteAccess` instead.
*/
abstract class Range extends FileSystemAccess::Range {
/**
* Gets a node that represents data to be written to the file system (possibly with
* some transformation happening before it is written, like JSON encoding).
*/
abstract DataFlow::Node getADataNode();
}
}
/** Provides classes for modeling path-related APIs. */
module Path {
/**
@@ -235,6 +268,35 @@ private class EncodingAdditionalTaintStep extends TaintTracking::AdditionalTaint
}
}
/**
* A data-flow node that logs data.
*
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `Logging::Range` instead.
*/
class Logging extends DataFlow::Node {
Logging::Range range;
Logging() { this = range }
/** Gets an input that is logged. */
DataFlow::Node getAnInput() { result = range.getAnInput() }
}
/** Provides a class for modeling new logging mechanisms. */
module Logging {
/**
* A data-flow node that logs data.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `Logging` instead.
*/
abstract class Range extends DataFlow::Node {
/** Gets an input that is logged. */
abstract DataFlow::Node getAnInput();
}
}
/**
* A data-flow node that dynamically executes Python code.
*
@@ -293,6 +355,136 @@ module SqlExecution {
}
}
/**
* A data-flow node that executes a regular expression.
*
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `RegexExecution::Range` instead.
*/
class RegexExecution extends DataFlow::Node {
RegexExecution::Range range;
RegexExecution() { this = range }
/** Gets the data flow node for the regex being executed by this node. */
DataFlow::Node getRegex() { result = range.getRegex() }
/** Gets a dataflow node for the string to be searched or matched against. */
DataFlow::Node getString() { result = range.getString() }
/**
* Gets the name of this regex execution, typically the name of an executing method.
* This is used for nice alert messages and should include the module if possible.
*/
string getName() { result = range.getName() }
}
/** Provides classes for modeling new regular-expression execution APIs. */
module RegexExecution {
/**
* A data-flow node that executes a regular expression.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `RegexExecution` instead.
*/
abstract class Range extends DataFlow::Node {
/** Gets the data flow node for the regex being executed by this node. */
abstract DataFlow::Node getRegex();
/** Gets a dataflow node for the string to be searched or matched against. */
abstract DataFlow::Node getString();
/**
* Gets the name of this regex execution, typically the name of an executing method.
* This is used for nice alert messages and should include the module if possible.
*/
abstract string getName();
}
}
/**
* A data-flow node that escapes meta-characters, which could be used to prevent
* injection attacks.
*
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `Escaping::Range` instead.
*/
class Escaping extends DataFlow::Node {
Escaping::Range range;
Escaping() {
this = range and
// escapes that don't have _both_ input/output defined are not valid
exists(range.getAnInput()) and
exists(range.getOutput())
}
/** Gets an input that will be escaped. */
DataFlow::Node getAnInput() { result = range.getAnInput() }
/** Gets the output that contains the escaped data. */
DataFlow::Node getOutput() { result = range.getOutput() }
/**
* Gets the context that this function escapes for, such as `html`, or `url`.
*/
string getKind() { result = range.getKind() }
}
/** Provides a class for modeling new escaping APIs. */
module Escaping {
/**
* A data-flow node that escapes meta-characters, which could be used to prevent
* injection attacks.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `Escaping` instead.
*/
abstract class Range extends DataFlow::Node {
/** Gets an input that will be escaped. */
abstract DataFlow::Node getAnInput();
/** Gets the output that contains the escaped data. */
abstract DataFlow::Node getOutput();
/**
* Gets the context that this function escapes for.
*
* While kinds are represented as strings, this should not be relied upon. Use the
* predicates in the `Escaping` module, such as `getHtmlKind`.
*/
abstract string getKind();
}
/** Gets the escape-kind for escaping a string so it can safely be included in HTML. */
string getHtmlKind() { result = "html" }
/** Gets the escape-kind for escaping a string so it can safely be included in HTML. */
string getRegexKind() { result = "regex" }
// TODO: If adding an XML kind, update the modeling of the `MarkupSafe` PyPI package.
//
// Technically it claims to escape for both HTML and XML, but for now we don't have
// anything that relies on XML escaping, so I'm going to defer deciding whether they
// should be the same kind, or whether they deserve to be treated differently.
}
/**
* An escape of a string so it can be safely included in
* the body of an HTML element, for example, replacing `{}` in
* `<p>{}</p>`.
*/
class HtmlEscaping extends Escaping {
HtmlEscaping() { range.getKind() = Escaping::getHtmlKind() }
}
/**
* An escape of a string so it can be safely included in
* the body of a regex.
*/
class RegexEscaping extends Escaping {
RegexEscaping() { range.getKind() = Escaping::getRegexKind() }
}
/** Provides classes for modeling HTTP-related APIs. */
module HTTP {
import semmle.python.web.HttpConstants
@@ -345,7 +537,7 @@ module HTTP {
/** Gets the URL pattern for this route, if it can be statically determined. */
string getUrlPattern() {
exists(StrConst str |
DataFlow::exprNode(str).(DataFlow::LocalSourceNode).flowsTo(this.getUrlPatternArg()) and
this.getUrlPatternArg().getALocalSource() = DataFlow::exprNode(str) and
result = str.getText()
)
}
@@ -478,9 +670,7 @@ module HTTP {
/** Gets the mimetype of this HTTP response, if it can be statically determined. */
string getMimetype() {
exists(StrConst str |
DataFlow::exprNode(str)
.(DataFlow::LocalSourceNode)
.flowsTo(this.getMimetypeOrContentTypeArg()) and
this.getMimetypeOrContentTypeArg().getALocalSource() = DataFlow::exprNode(str) and
result = str.getText().splitAt(";", 0)
)
or
@@ -524,6 +714,62 @@ module HTTP {
abstract DataFlow::Node getRedirectLocation();
}
}
/**
* A data-flow node that sets a cookie in an HTTP response.
*
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `HTTP::CookieWrite::Range` instead.
*/
class CookieWrite extends DataFlow::Node {
CookieWrite::Range range;
CookieWrite() { this = range }
/**
* Gets the argument, if any, specifying the raw cookie header.
*/
DataFlow::Node getHeaderArg() { result = range.getHeaderArg() }
/**
* Gets the argument, if any, specifying the cookie name.
*/
DataFlow::Node getNameArg() { result = range.getNameArg() }
/**
* Gets the argument, if any, specifying the cookie value.
*/
DataFlow::Node getValueArg() { result = range.getValueArg() }
}
/** Provides a class for modeling new cookie writes on HTTP responses. */
module CookieWrite {
/**
* A data-flow node that sets a cookie in an HTTP response.
*
* Note: we don't require that this redirect must be sent to a client (a kind of
* "if a tree falls in a forest and nobody hears it" situation).
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `HttpResponse` instead.
*/
abstract class Range extends DataFlow::Node {
/**
* Gets the argument, if any, specifying the raw cookie header.
*/
abstract DataFlow::Node getHeaderArg();
/**
* Gets the argument, if any, specifying the cookie name.
*/
abstract DataFlow::Node getNameArg();
/**
* Gets the argument, if any, specifying the cookie value.
*/
abstract DataFlow::Node getValueArg();
}
}
}
}
@@ -570,7 +816,7 @@ module Cryptography {
/** Provides classes for modeling new key-pair generation APIs. */
module KeyGeneration {
/** Gets a back-reference to the keysize argument `arg` that was used to generate a new key-pair. */
private DataFlow::LocalSourceNode keysizeBacktracker(
private DataFlow::TypeTrackingNode keysizeBacktracker(
DataFlow::TypeBackTracker t, DataFlow::Node arg
) {
t.start() and

View File

@@ -17,7 +17,7 @@ class Expr extends Expr_, AstNode {
* Whether this expression defines variable `v`
* If doing dataflow, then consider using SsaVariable.getDefinition() for more precision.
*/
predicate defines(Variable v) { this.getASubExpression+().defines(v) }
predicate defines(Variable v) { this.getASubExpression().defines(v) }
/** Whether this expression may have a side effect (as determined purely from its syntax) */
predicate hasSideEffects() {
@@ -240,7 +240,7 @@ class Call extends Call_ {
/** Gets the tuple (*) argument of this call, provided there is exactly one. */
Expr getStarArg() {
count(this.getStarargs()) < 2 and
result = getStarargs()
result = this.getStarargs()
}
}

View File

@@ -1,9 +1,7 @@
import python
/** A file */
class File extends Container {
File() { files(this, _, _, _, _) }
class File extends Container, @file {
/** DEPRECATED: Use `getAbsolutePath` instead. */
deprecated override string getName() { result = this.getAbsolutePath() }
@@ -15,7 +13,7 @@ class File extends Container {
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html).
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
@@ -34,9 +32,7 @@ class File extends Container {
}
/** Gets a short name for this file (just the file name) */
string getShortName() {
exists(string simple, string ext | files(this, _, simple, ext, _) | result = simple + ext)
}
string getShortName() { result = this.getBaseName() }
private int lastLine() {
result = max(int i | exists(Location l | l.getFile() = this and l.getEndLine() = i))
@@ -55,7 +51,7 @@ class File extends Container {
)
}
override string getAbsolutePath() { files(this, result, _, _, _) }
override string getAbsolutePath() { files(this, result) }
/** Gets the URL of this file. */
override string getURL() { result = "file://" + this.getAbsolutePath() + ":0:0:0:0" }
@@ -89,7 +85,15 @@ class File extends Container {
i.getTest().(Compare).compares(name, op, main) and
name.getId() = "__name__" and
main.getText() = "__main__"
)
) and
// Exclude files named `__main__.py`. These are often _not_ meant to be run directly, but
// contain this construct anyway.
//
// Their presence in a package (say, `foo`) means one can execute the package directly using
// `python -m foo` (which will run the `foo/__main__.py` file). Since being an entry point for
// execution means treating imports as absolute, this causes trouble, since when run with
// `python -m`, the interpreter uses the usual package semantics.
not this.getShortName() = "__main__.py"
or
// The file contains a `#!` line referencing the python interpreter
exists(Comment c |
@@ -110,21 +114,16 @@ private predicate occupied_line(File f, int n) {
}
/** A folder (directory) */
class Folder extends Container {
Folder() { folders(this, _, _) }
class Folder extends Container, @folder {
/** DEPRECATED: Use `getAbsolutePath` instead. */
deprecated override string getName() { result = this.getAbsolutePath() }
/** DEPRECATED: Use `getBaseName` instead. */
deprecated string getSimple() { folders(this, _, result) }
/**
* Holds if this element is at the specified location.
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html).
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
@@ -136,7 +135,7 @@ class Folder extends Container {
endcolumn = 0
}
override string getAbsolutePath() { folders(this, result, _) }
override string getAbsolutePath() { folders(this, result) }
/** Gets the URL of this folder. */
override string getURL() { result = "folder://" + this.getAbsolutePath() }
@@ -257,7 +256,7 @@ abstract class Container extends @container {
* </table>
*/
string getBaseName() {
result = getAbsolutePath().regexpCapture(".*/(([^/]*?)(?:\\.([^.]*))?)", 1)
result = this.getAbsolutePath().regexpCapture(".*/(([^/]*?)(?:\\.([^.]*))?)", 1)
}
/**
@@ -283,7 +282,9 @@ abstract class Container extends @container {
* <tr><td>"/tmp/x.tar.gz"</td><td>"gz"</td></tr>
* </table>
*/
string getExtension() { result = getAbsolutePath().regexpCapture(".*/([^/]*?)(\\.([^.]*))?", 3) }
string getExtension() {
result = this.getAbsolutePath().regexpCapture(".*/([^/]*?)(\\.([^.]*))?", 3)
}
/**
* Gets the stem of this container, that is, the prefix of its base name up to
@@ -302,7 +303,9 @@ abstract class Container extends @container {
* <tr><td>"/tmp/x.tar.gz"</td><td>"x.tar"</td></tr>
* </table>
*/
string getStem() { result = getAbsolutePath().regexpCapture(".*/([^/]*?)(?:\\.([^.]*))?", 1) }
string getStem() {
result = this.getAbsolutePath().regexpCapture(".*/([^/]*?)(?:\\.([^.]*))?", 1)
}
File getFile(string baseName) {
result = this.getAFile() and
@@ -324,7 +327,7 @@ abstract class Container extends @container {
/**
* Gets a URL representing the location of this container.
*
* For more information see [Providing URLs](https://help.semmle.com/QL/learn-ql/ql/locations.html#providing-urls).
* For more information see [Providing URLs](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/#providing-urls).
*/
abstract string getURL();
@@ -430,7 +433,7 @@ class Location extends @location {
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html).
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
@@ -458,7 +461,7 @@ class Line extends @py_line {
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html).
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn

View File

@@ -653,6 +653,8 @@ class DefinitionNode extends ControlFlowNode {
DefinitionNode() {
exists(Assign a | a.getATarget().getAFlowNode() = this)
or
exists(AnnAssign a | a.getTarget().getAFlowNode() = this and exists(a.getValue()))
or
exists(Alias a | a.getAsname().getAFlowNode() = this)
or
augstore(_, this)
@@ -795,6 +797,9 @@ private AstNode assigned_value(Expr lhs) {
/* lhs = result */
exists(Assign a | a.getATarget() = lhs and result = a.getValue())
or
/* lhs : annotation = result */
exists(AnnAssign a | a.getTarget() = lhs and result = a.getValue())
or
/* import result as lhs */
exists(Alias a | a.getAsname() = lhs and result = a.getValue())
or
@@ -846,9 +851,9 @@ class ForNode extends ControlFlowNode {
/** Holds if this `for` statement causes iteration over `sequence` storing each step of the iteration in `target` */
predicate iterates(ControlFlowNode target, ControlFlowNode sequence) {
sequence = getSequence() and
target = possibleTarget() and
not target = unrolledSuffix().possibleTarget()
sequence = this.getSequence() and
target = this.possibleTarget() and
not target = this.unrolledSuffix().possibleTarget()
}
/** Gets the sequence node for this `for` statement. */
@@ -1106,7 +1111,7 @@ class BasicBlock extends @py_flow_node {
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html).
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn

View File

@@ -4,23 +4,33 @@
// If you add modeling of a new framework/library, remember to add it it to the docs in
// `docs/codeql/support/reusables/frameworks.rst`
private import semmle.python.frameworks.Aioch
private import semmle.python.frameworks.Aiohttp
private import semmle.python.frameworks.ClickhouseDriver
private import semmle.python.frameworks.Cryptodome
private import semmle.python.frameworks.Cryptography
private import semmle.python.frameworks.Dill
private import semmle.python.frameworks.Django
private import semmle.python.frameworks.Fabric
private import semmle.python.frameworks.Flask
private import semmle.python.frameworks.FlaskSqlAlchemy
private import semmle.python.frameworks.Idna
private import semmle.python.frameworks.Invoke
private import semmle.python.frameworks.Jmespath
private import semmle.python.frameworks.MarkupSafe
private import semmle.python.frameworks.Multidict
private import semmle.python.frameworks.Mysql
private import semmle.python.frameworks.MySQLdb
private import semmle.python.frameworks.Peewee
private import semmle.python.frameworks.Psycopg2
private import semmle.python.frameworks.PyMySQL
private import semmle.python.frameworks.Rsa
private import semmle.python.frameworks.RuamelYaml
private import semmle.python.frameworks.Simplejson
private import semmle.python.frameworks.SqlAlchemy
private import semmle.python.frameworks.Stdlib
private import semmle.python.frameworks.Tornado
private import semmle.python.frameworks.Twisted
private import semmle.python.frameworks.Ujson
private import semmle.python.frameworks.Yaml
private import semmle.python.frameworks.Yarl

View File

@@ -58,6 +58,7 @@ class Function extends Function_, Scope, AstNode {
/** Gets the name of the nth argument (for simple arguments) */
string getArgName(int index) { result = this.getArg(index).(Name).getId() }
/** Gets the parameter of this function with the name `name`. */
Parameter getArgByName(string name) {
(
result = this.getAnArg()

View File

@@ -9,6 +9,7 @@ class ConditionBlock extends BasicBlock {
}
/** Basic blocks controlled by this condition, i.e. those BBs for which the condition is testIsTrue */
pragma[nomagic]
predicate controls(BasicBlock controlled, boolean testIsTrue) {
/*
* For this block to control the block 'controlled' with 'testIsTrue' the following must be true:

View File

@@ -31,7 +31,7 @@ class ImportExpr extends ImportExpr_ {
// relative imports are no longer allowed in Python 3
major_version() < 3 and
// and can be explicitly turned off in later versions of Python 2
not getEnclosingModule().hasFromFuture("absolute_import")
not this.getEnclosingModule().hasFromFuture("absolute_import")
}
/**
@@ -53,8 +53,8 @@ class ImportExpr extends ImportExpr_ {
* the name of the topmost module that will be imported.
*/
private string relativeTopName() {
getLevel() = -1 and
result = basePackageName(1) + "." + this.getTopName() and
this.getLevel() = -1 and
result = this.basePackageName(1) + "." + this.getTopName() and
valid_module_name(result)
}
@@ -62,7 +62,7 @@ class ImportExpr extends ImportExpr_ {
if this.getLevel() <= 0
then result = this.getTopName()
else (
result = basePackageName(this.getLevel()) and
result = this.basePackageName(this.getLevel()) and
valid_module_name(result)
)
}
@@ -73,17 +73,17 @@ class ImportExpr extends ImportExpr_ {
* which may not be the name of the module.
*/
string bottomModuleName() {
result = relativeTopName() + this.remainderOfName()
result = this.relativeTopName() + this.remainderOfName()
or
not exists(relativeTopName()) and
not exists(this.relativeTopName()) and
result = this.qualifiedTopName() + this.remainderOfName()
}
/** Gets the name of topmost module or package being imported */
string topModuleName() {
result = relativeTopName()
result = this.relativeTopName()
or
not exists(relativeTopName()) and
not exists(this.relativeTopName()) and
result = this.qualifiedTopName()
}
@@ -94,7 +94,7 @@ class ImportExpr extends ImportExpr_ {
*/
string getImportedModuleName() {
exists(string bottomName | bottomName = this.bottomModuleName() |
if this.isTop() then result = topModuleName() else result = bottomName
if this.isTop() then result = this.topModuleName() else result = bottomName
)
}

View File

@@ -86,13 +86,13 @@ class Module extends Module_, Scope, AstNode {
/** Gets the package containing this module (or parent package if this is a package) */
Module getPackage() {
this.getName().matches("%.%") and
result.getName() = getName().regexpReplaceAll("\\.[^.]*$", "")
result.getName() = this.getName().regexpReplaceAll("\\.[^.]*$", "")
}
/** Gets the name of the package containing this module */
string getPackageName() {
this.getName().matches("%.%") and
result = getName().regexpReplaceAll("\\.[^.]*$", "")
result = this.getName().regexpReplaceAll("\\.[^.]*$", "")
}
/** Gets the metrics for this module */

View File

@@ -7,6 +7,7 @@
*/
import python
import semmle.python.RegexTreeView
private newtype TPrintAstConfiguration = MkPrintAstConfiguration()
@@ -51,8 +52,10 @@ private newtype TPrintAstNode =
TStmtListNode(StmtList list) {
shouldPrint(list.getAnItem(), _) and
not list = any(Module mod).getBody() and
not forall(AstNode child | child = list.getAnItem() | isNotNeeded(child)) and
exists(list.getAnItem())
not forall(AstNode child | child = list.getAnItem() | isNotNeeded(child))
} or
TRegExpTermNode(RegExpTerm term) {
exists(StrConst str | term.getRootTerm() = getParsedRegExp(str) and shouldPrint(str, _))
}
/**
@@ -419,6 +422,42 @@ class ParameterNode extends AstElementNode {
}
}
/**
* A print node for a `StrConst`.
*
* The string has a child, if the child is used as a regular expression,
* which is the root of the regular expression.
*/
class StrConstNode extends AstElementNode {
override StrConst element;
override PrintAstNode getChild(int childIndex) {
childIndex = 0 and result.(RegExpTermNode).getTerm() = getParsedRegExp(element)
}
}
/**
* A print node for a regular expression term.
*/
class RegExpTermNode extends TRegExpTermNode, PrintAstNode {
RegExpTerm term;
RegExpTermNode() { this = TRegExpTermNode(term) }
/** Gets the `RegExpTerm` for this node. */
RegExpTerm getTerm() { result = term }
override PrintAstNode getChild(int childIndex) {
result.(RegExpTermNode).getTerm() = term.getChild(childIndex)
}
override string toString() {
result = "[" + strictconcat(term.getPrimaryQLClass(), " | ") + "] " + term.toString()
}
override Location getLocation() { result = term.getLocation() }
}
/**
* Gets the `i`th child from `node` ordered by location.
*/
@@ -447,7 +486,7 @@ private module PrettyPrinting {
string getQlClass(AstNode a) {
shouldPrint(a, _) and
(
not exists(getQlCustomClass(a)) and result = a.toString()
not exists(getQlCustomClass(a)) and result = strictconcat(a.toString(), " | ")
or
result = strictconcat(getQlCustomClass(a), " | ")
)

View File

@@ -0,0 +1,998 @@
/** Provides a class hierarchy corresponding to a parse tree of regular expressions. */
import python
private import semmle.python.regex
/**
* An element containing a regular expression term, that is, either
* a string literal (parsed as a regular expression)
* or another regular expression term.
*
* For sequences and alternations, we require at least one child.
* Otherwise, we wish to represent the term differently.
* This avoids multiple representations of the same term.
*/
newtype TRegExpParent =
/** A string literal used as a regular expression */
TRegExpLiteral(Regex re) or
/** A quantified term */
TRegExpQuantifier(Regex re, int start, int end) { re.qualifiedItem(start, end, _, _) } or
/** A sequence term */
TRegExpSequence(Regex re, int start, int end) {
re.sequence(start, end) and
exists(seqChild(re, start, end, 1)) // if a sequence does not have more than one element, it should be treated as that element instead.
} or
/** An alternation term */
TRegExpAlt(Regex re, int start, int end) {
re.alternation(start, end) and
exists(int part_end |
re.alternationOption(start, end, start, part_end) and
part_end < end
) // if an alternation does not have more than one element, it should be treated as that element instead.
} or
/** A character class term */
TRegExpCharacterClass(Regex re, int start, int end) { re.charSet(start, end) } or
/** A character range term */
TRegExpCharacterRange(Regex re, int start, int end) { re.charRange(_, start, _, _, end) } or
/** A group term */
TRegExpGroup(Regex re, int start, int end) { re.group(start, end) } or
/** A special character */
TRegExpSpecialChar(Regex re, int start, int end) { re.specialCharacter(start, end, _) } or
/** A normal character */
TRegExpNormalChar(Regex re, int start, int end) { re.normalCharacter(start, end) } or
/** A back reference */
TRegExpBackRef(Regex re, int start, int end) { re.backreference(start, end) }
/**
* An element containing a regular expression term, that is, either
* a string literal (parsed as a regular expression)
* or another regular expression term.
*/
class RegExpParent extends TRegExpParent {
/** Gets a textual representation of this element. */
string toString() { result = "RegExpParent" }
/** Gets the `i`th child term. */
abstract RegExpTerm getChild(int i);
/** Gets a child term . */
RegExpTerm getAChild() { result = this.getChild(_) }
/** Gets the number of child terms. */
int getNumChild() { result = count(this.getAChild()) }
/** Gets the associated regex. */
abstract Regex getRegex();
}
/** A string literal used as a regular expression */
class RegExpLiteral extends TRegExpLiteral, RegExpParent {
Regex re;
RegExpLiteral() { this = TRegExpLiteral(re) }
override RegExpTerm getChild(int i) { i = 0 and result.getRegex() = re and result.isRootTerm() }
/** Holds if dot, `.`, matches all characters, including newlines. */
predicate isDotAll() { re.getAMode() = "DOTALL" }
/** Holds if this regex matching is case-insensitive for this regex. */
predicate isIgnoreCase() { re.getAMode() = "IGNORECASE" }
/** Get a string representing all modes for this regex. */
string getFlags() { result = concat(string mode | mode = re.getAMode() | mode, " | ") }
override Regex getRegex() { result = re }
/** Gets the primary QL class for this regex. */
string getPrimaryQLClass() { result = "RegExpLiteral" }
}
/**
* A regular expression term, that is, a syntactic part of a regular expression.
*/
class RegExpTerm extends RegExpParent {
Regex re;
int start;
int end;
RegExpTerm() {
this = TRegExpAlt(re, start, end)
or
this = TRegExpBackRef(re, start, end)
or
this = TRegExpCharacterClass(re, start, end)
or
this = TRegExpCharacterRange(re, start, end)
or
this = TRegExpNormalChar(re, start, end)
or
this = TRegExpGroup(re, start, end)
or
this = TRegExpQuantifier(re, start, end)
or
this = TRegExpSequence(re, start, end)
or
this = TRegExpSpecialChar(re, start, end)
}
/**
* Gets the outermost term of this regular expression.
*/
RegExpTerm getRootTerm() {
this.isRootTerm() and result = this
or
result = this.getParent().(RegExpTerm).getRootTerm()
}
/**
* Holds if this term is part of a string literal
* that is interpreted as a regular expression.
*/
predicate isUsedAsRegExp() { any() }
/**
* Holds if this is the root term of a regular expression.
*/
predicate isRootTerm() { start = 0 and end = re.getText().length() }
override RegExpTerm getChild(int i) {
result = this.(RegExpAlt).getChild(i)
or
result = this.(RegExpBackRef).getChild(i)
or
result = this.(RegExpCharacterClass).getChild(i)
or
result = this.(RegExpCharacterRange).getChild(i)
or
result = this.(RegExpNormalChar).getChild(i)
or
result = this.(RegExpGroup).getChild(i)
or
result = this.(RegExpQuantifier).getChild(i)
or
result = this.(RegExpSequence).getChild(i)
or
result = this.(RegExpSpecialChar).getChild(i)
}
/**
* Gets the parent term of this regular expression term, or the
* regular expression literal if this is the root term.
*/
RegExpParent getParent() { result.getAChild() = this }
override Regex getRegex() { result = re }
/** Gets the offset at which this term starts. */
int getStart() { result = start }
/** Gets the offset at which this term ends. */
int getEnd() { result = end }
override string toString() { result = re.getText().substring(start, end) }
/**
* Gets the location of the surrounding regex, as locations inside the regex do not exist.
* To get location information corresponding to the term inside the regex,
* use `hasLocationInfo`.
*/
Location getLocation() { result = re.getLocation() }
/** Holds if this term is found at the specified location offsets. */
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
) {
exists(int re_start, int re_end |
re.getLocation().hasLocationInfo(filepath, startline, re_start, endline, re_end) and
startcolumn = re_start + start + 4 and
endcolumn = re_start + end + 3
)
}
/** Gets the file in which this term is found. */
File getFile() { result = this.getLocation().getFile() }
/** Gets the raw source text of this term. */
string getRawValue() { result = this.toString() }
/** Gets the string literal in which this term is found. */
RegExpLiteral getLiteral() { result = TRegExpLiteral(re) }
/** Gets the regular expression term that is matched (textually) before this one, if any. */
RegExpTerm getPredecessor() {
exists(RegExpTerm parent | parent = this.getParent() |
result = parent.(RegExpSequence).previousElement(this)
or
not exists(parent.(RegExpSequence).previousElement(this)) and
not parent instanceof RegExpSubPattern and
result = parent.getPredecessor()
)
}
/** Gets the regular expression term that is matched (textually) after this one, if any. */
RegExpTerm getSuccessor() {
exists(RegExpTerm parent | parent = this.getParent() |
result = parent.(RegExpSequence).nextElement(this)
or
not exists(parent.(RegExpSequence).nextElement(this)) and
not parent instanceof RegExpSubPattern and
result = parent.getSuccessor()
)
}
/** Gets the primary QL class for this term. */
string getPrimaryQLClass() { result = "RegExpTerm" }
}
/**
* A quantified regular expression term.
*
* Example:
*
* ```
* ((ECMA|Java)[sS]cript)*
* ```
*/
class RegExpQuantifier extends RegExpTerm, TRegExpQuantifier {
int part_end;
boolean maybe_empty;
boolean may_repeat_forever;
RegExpQuantifier() {
this = TRegExpQuantifier(re, start, end) and
re.qualifiedPart(start, part_end, end, maybe_empty, may_repeat_forever)
}
override RegExpTerm getChild(int i) {
i = 0 and
result.getRegex() = re and
result.getStart() = start and
result.getEnd() = part_end
}
/** Hols if this term may match an unlimited number of times. */
predicate mayRepeatForever() { may_repeat_forever = true }
/** Gets the qualifier for this term. That is e.g "?" for "a?". */
string getQualifier() { result = re.getText().substring(part_end, end) }
override string getPrimaryQLClass() { result = "RegExpQuantifier" }
}
/**
* A regular expression term that permits unlimited repetitions.
*/
class InfiniteRepetitionQuantifier extends RegExpQuantifier {
InfiniteRepetitionQuantifier() { this.mayRepeatForever() }
}
/**
* A star-quantified term.
*
* Example:
*
* ```
* \w*
* ```
*/
class RegExpStar extends InfiniteRepetitionQuantifier {
RegExpStar() { this.getQualifier().charAt(0) = "*" }
override string getPrimaryQLClass() { result = "RegExpStar" }
}
/**
* A plus-quantified term.
*
* Example:
*
* ```
* \w+
* ```
*/
class RegExpPlus extends InfiniteRepetitionQuantifier {
RegExpPlus() { this.getQualifier().charAt(0) = "+" }
override string getPrimaryQLClass() { result = "RegExpPlus" }
}
/**
* An optional term.
*
* Example:
*
* ```
* ;?
* ```
*/
class RegExpOpt extends RegExpQuantifier {
RegExpOpt() { this.getQualifier().charAt(0) = "?" }
override string getPrimaryQLClass() { result = "RegExpOpt" }
}
/**
* A range-quantified term
*
* Examples:
*
* ```
* \w{2,4}
* \w{2,}
* \w{2}
* ```
*/
class RegExpRange extends RegExpQuantifier {
string upper;
string lower;
RegExpRange() { re.multiples(part_end, end, lower, upper) }
/** Gets the string defining the upper bound of this range, if any. */
string getUpper() { result = upper }
/** Gets the string defining the lower bound of this range, if any. */
string getLower() { result = lower }
/**
* Gets the upper bound of the range, if any.
*
* If there is no upper bound, any number of repetitions is allowed.
* For a term of the form `r{lo}`, both the lower and the upper bound
* are `lo`.
*/
int getUpperBound() { result = this.getUpper().toInt() }
/** Gets the lower bound of the range. */
int getLowerBound() { result = this.getLower().toInt() }
override string getPrimaryQLClass() { result = "RegExpRange" }
}
/**
* A sequence term.
*
* Example:
*
* ```
* (ECMA|Java)Script
* ```
*
* This is a sequence with the elements `(ECMA|Java)` and `Script`.
*/
class RegExpSequence extends RegExpTerm, TRegExpSequence {
RegExpSequence() { this = TRegExpSequence(re, start, end) }
override RegExpTerm getChild(int i) { result = seqChild(re, start, end, i) }
/** Gets the element preceding `element` in this sequence. */
RegExpTerm previousElement(RegExpTerm element) { element = this.nextElement(result) }
/** Gets the element following `element` in this sequence. */
RegExpTerm nextElement(RegExpTerm element) {
exists(int i |
element = this.getChild(i) and
result = this.getChild(i + 1)
)
}
override string getPrimaryQLClass() { result = "RegExpSequence" }
}
pragma[nomagic]
private int seqChildEnd(Regex re, int start, int end, int i) {
result = seqChild(re, start, end, i).getEnd()
}
// moved out so we can use it in the charpred
private RegExpTerm seqChild(Regex re, int start, int end, int i) {
re.sequence(start, end) and
(
i = 0 and
result.getRegex() = re and
result.getStart() = start and
exists(int itemEnd |
re.item(start, itemEnd) and
result.getEnd() = itemEnd
)
or
i > 0 and
result.getRegex() = re and
exists(int itemStart | itemStart = seqChildEnd(re, start, end, i - 1) |
result.getStart() = itemStart and
re.item(itemStart, result.getEnd())
)
)
}
/**
* An alternative term, that is, a term of the form `a|b`.
*
* Example:
*
* ```
* ECMA|Java
* ```
*/
class RegExpAlt extends RegExpTerm, TRegExpAlt {
RegExpAlt() { this = TRegExpAlt(re, start, end) }
override RegExpTerm getChild(int i) {
i = 0 and
result.getRegex() = re and
result.getStart() = start and
exists(int part_end |
re.alternationOption(start, end, start, part_end) and
result.getEnd() = part_end
)
or
i > 0 and
result.getRegex() = re and
exists(int part_start |
part_start = this.getChild(i - 1).getEnd() + 1 // allow for the |
|
result.getStart() = part_start and
re.alternationOption(start, end, part_start, result.getEnd())
)
}
override string getPrimaryQLClass() { result = "RegExpAlt" }
}
/**
* An escaped regular expression term, that is, a regular expression
* term starting with a backslash, which is not a backreference.
*
* Example:
*
* ```
* \.
* \w
* ```
*/
class RegExpEscape extends RegExpNormalChar {
RegExpEscape() { re.escapedCharacter(start, end) }
/**
* Gets the name of the escaped; for example, `w` for `\w`.
* TODO: Handle named escapes.
*/
override string getValue() {
this.isIdentityEscape() and result = this.getUnescaped()
or
this.getUnescaped() = "n" and result = "\n"
or
this.getUnescaped() = "r" and result = "\r"
or
this.getUnescaped() = "t" and result = "\t"
or
// TODO: Find a way to include a formfeed character
// this.getUnescaped() = "f" and result = " "
// or
this.isUnicode() and
result = this.getUnicode()
}
/** Holds if this terms name is given by the part following the escape character. */
predicate isIdentityEscape() { not this.getUnescaped() in ["n", "r", "t", "f"] }
override string getPrimaryQLClass() { result = "RegExpEscape" }
/** Gets the part of the term following the escape character. That is e.g. "w" if the term is "\w". */
private string getUnescaped() { result = this.getText().suffix(1) }
/**
* Gets the text for this escape. That is e.g. "\w".
*/
private string getText() { result = re.getText().substring(start, end) }
/**
* Holds if this is a unicode escape.
*/
private predicate isUnicode() { this.getText().prefix(2) = ["\\u", "\\U"] }
/**
* Gets the unicode char for this escape.
* E.g. for `\u0061` this returns "a".
*/
private string getUnicode() {
exists(int codepoint | codepoint = sum(this.getHexValueFromUnicode(_)) |
result = codepoint.toUnicode()
)
}
/**
* Gets int value for the `index`th char in the hex number of the unicode escape.
* E.g. for `\u0061` and `index = 2` this returns 96 (the number `6` interpreted as hex).
*/
private int getHexValueFromUnicode(int index) {
this.isUnicode() and
exists(string hex, string char | hex = this.getText().suffix(2) |
char = hex.charAt(index) and
result = 16.pow(hex.length() - index - 1) * toHex(char)
)
}
}
/**
* Gets the hex number for the `hex` char.
*/
private int toHex(string hex) {
hex = [0 .. 9].toString() and
result = hex.toInt()
or
result = 10 and hex = ["a", "A"]
or
result = 11 and hex = ["b", "B"]
or
result = 12 and hex = ["c", "C"]
or
result = 13 and hex = ["d", "D"]
or
result = 14 and hex = ["e", "E"]
or
result = 15 and hex = ["f", "F"]
}
/**
* A character class escape in a regular expression.
* That is, an escaped charachter that denotes multiple characters.
*
* Examples:
*
* ```
* \w
* \S
* ```
*/
class RegExpCharacterClassEscape extends RegExpEscape {
RegExpCharacterClassEscape() { this.getValue() in ["d", "D", "s", "S", "w", "W"] }
override RegExpTerm getChild(int i) { none() }
override string getPrimaryQLClass() { result = "RegExpCharacterClassEscape" }
}
/**
* A character class in a regular expression.
*
* Examples:
*
* ```
* [a-z_]
* [^<>&]
* ```
*/
class RegExpCharacterClass extends RegExpTerm, TRegExpCharacterClass {
RegExpCharacterClass() { this = TRegExpCharacterClass(re, start, end) }
/** Holds if this character class is inverted, matching the opposite of its content. */
predicate isInverted() { re.getChar(start + 1) = "^" }
/** Gets the `i`th char inside this charater class. */
string getCharThing(int i) { result = re.getChar(i + start) }
/** Holds if this character class can match anything. */
predicate isUniversalClass() {
// [^]
this.isInverted() and not exists(this.getAChild())
or
// [\w\W] and similar
not this.isInverted() and
exists(string cce1, string cce2 |
cce1 = this.getAChild().(RegExpCharacterClassEscape).getValue() and
cce2 = this.getAChild().(RegExpCharacterClassEscape).getValue()
|
cce1 != cce2 and cce1.toLowerCase() = cce2.toLowerCase()
)
}
override RegExpTerm getChild(int i) {
i = 0 and
result.getRegex() = re and
exists(int itemStart, int itemEnd |
result.getStart() = itemStart and
re.char_set_start(start, itemStart) and
re.char_set_child(start, itemStart, itemEnd) and
result.getEnd() = itemEnd
)
or
i > 0 and
result.getRegex() = re and
exists(int itemStart | itemStart = this.getChild(i - 1).getEnd() |
result.getStart() = itemStart and
re.char_set_child(start, itemStart, result.getEnd())
)
}
override string getPrimaryQLClass() { result = "RegExpCharacterClass" }
}
/**
* A character range in a character class in a regular expression.
*
* Example:
*
* ```
* a-z
* ```
*/
class RegExpCharacterRange extends RegExpTerm, TRegExpCharacterRange {
int lower_end;
int upper_start;
RegExpCharacterRange() {
this = TRegExpCharacterRange(re, start, end) and
re.charRange(_, start, lower_end, upper_start, end)
}
/** Holds if this range goes from `lo` to `hi`, in effect is `lo-hi`. */
predicate isRange(string lo, string hi) {
lo = re.getText().substring(start, lower_end) and
hi = re.getText().substring(upper_start, end)
}
override RegExpTerm getChild(int i) {
i = 0 and
result.getRegex() = re and
result.getStart() = start and
result.getEnd() = lower_end
or
i = 1 and
result.getRegex() = re and
result.getStart() = upper_start and
result.getEnd() = end
}
override string getPrimaryQLClass() { result = "RegExpCharacterRange" }
}
/**
* A normal character in a regular expression, that is, a character
* without special meaning. This includes escaped characters.
*
* Examples:
* ```
* t
* \t
* ```
*/
class RegExpNormalChar extends RegExpTerm, TRegExpNormalChar {
RegExpNormalChar() { this = TRegExpNormalChar(re, start, end) }
/**
* Holds if this constant represents a valid Unicode character (as opposed
* to a surrogate code point that does not correspond to a character by itself.)
*/
predicate isCharacter() { any() }
/** Gets the string representation of the char matched by this term. */
string getValue() { result = re.getText().substring(start, end) }
override RegExpTerm getChild(int i) { none() }
override string getPrimaryQLClass() { result = "RegExpNormalChar" }
}
/**
* A constant regular expression term, that is, a regular expression
* term matching a single string. Currently, this will always be a single character.
*
* Example:
*
* ```
* a
* ```
*/
class RegExpConstant extends RegExpTerm {
string value;
RegExpConstant() {
this = TRegExpNormalChar(re, start, end) and
not this instanceof RegExpCharacterClassEscape and
// exclude chars in qualifiers
// TODO: push this into regex library
not exists(int qstart, int qend | re.qualifiedPart(_, qstart, qend, _, _) |
qstart <= start and end <= qend
) and
value = this.(RegExpNormalChar).getValue()
}
/**
* Holds if this constant represents a valid Unicode character (as opposed
* to a surrogate code point that does not correspond to a character by itself.)
*/
predicate isCharacter() { any() }
/** Gets the string matched by this constant term. */
string getValue() { result = value }
override RegExpTerm getChild(int i) { none() }
override string getPrimaryQLClass() { result = "RegExpConstant" }
}
/**
* A grouped regular expression.
*
* Examples:
*
* ```
* (ECMA|Java)
* (?:ECMA|Java)
* (?<quote>['"])
* ```
*/
class RegExpGroup extends RegExpTerm, TRegExpGroup {
RegExpGroup() { this = TRegExpGroup(re, start, end) }
/**
* Gets the index of this capture group within the enclosing regular
* expression literal.
*
* For example, in the regular expression `/((a?).)(?:b)/`, the
* group `((a?).)` has index 1, the group `(a?)` nested inside it
* has index 2, and the group `(?:b)` has no index, since it is
* not a capture group.
*/
int getNumber() { result = re.getGroupNumber(start, end) }
/** Holds if this is a named capture group. */
predicate isNamed() { exists(this.getName()) }
/** Gets the name of this capture group, if any. */
string getName() { result = re.getGroupName(start, end) }
override RegExpTerm getChild(int i) {
result.getRegex() = re and
i = 0 and
re.groupContents(start, end, result.getStart(), result.getEnd())
}
override string getPrimaryQLClass() { result = "RegExpGroup" }
}
/**
* A special character in a regular expression.
*
* Examples:
* ```
* ^
* $
* .
* ```
*/
class RegExpSpecialChar extends RegExpTerm, TRegExpSpecialChar {
string char;
RegExpSpecialChar() {
this = TRegExpSpecialChar(re, start, end) and
re.specialCharacter(start, end, char)
}
/**
* Holds if this constant represents a valid Unicode character (as opposed
* to a surrogate code point that does not correspond to a character by itself.)
*/
predicate isCharacter() { any() }
/** Gets the char for this term. */
string getChar() { result = char }
override RegExpTerm getChild(int i) { none() }
override string getPrimaryQLClass() { result = "RegExpSpecialChar" }
}
/**
* A dot regular expression.
*
* Example:
*
* ```
* .
* ```
*/
class RegExpDot extends RegExpSpecialChar {
RegExpDot() { this.getChar() = "." }
override string getPrimaryQLClass() { result = "RegExpDot" }
}
/**
* A dollar assertion `$` matching the end of a line.
*
* Example:
*
* ```
* $
* ```
*/
class RegExpDollar extends RegExpSpecialChar {
RegExpDollar() { this.getChar() = "$" }
override string getPrimaryQLClass() { result = "RegExpDollar" }
}
/**
* A caret assertion `^` matching the beginning of a line.
*
* Example:
*
* ```
* ^
* ```
*/
class RegExpCaret extends RegExpSpecialChar {
RegExpCaret() { this.getChar() = "^" }
override string getPrimaryQLClass() { result = "RegExpCaret" }
}
/**
* A zero-width match, that is, either an empty group or an assertion.
*
* Examples:
* ```
* ()
* (?=\w)
* ```
*/
class RegExpZeroWidthMatch extends RegExpGroup {
RegExpZeroWidthMatch() { re.zeroWidthMatch(start, end) }
override RegExpTerm getChild(int i) { none() }
override string getPrimaryQLClass() { result = "RegExpZeroWidthMatch" }
}
/**
* A zero-width lookahead or lookbehind assertion.
*
* Examples:
*
* ```
* (?=\w)
* (?!\n)
* (?<=\.)
* (?<!\\)
* ```
*/
class RegExpSubPattern extends RegExpZeroWidthMatch {
RegExpSubPattern() { not re.emptyGroup(start, end) }
/** Gets the lookahead term. */
RegExpTerm getOperand() {
exists(int in_start, int in_end | re.groupContents(start, end, in_start, in_end) |
result.getRegex() = re and
result.getStart() = in_start and
result.getEnd() = in_end
)
}
}
/**
* A zero-width lookahead assertion.
*
* Examples:
*
* ```
* (?=\w)
* (?!\n)
* ```
*/
abstract class RegExpLookahead extends RegExpSubPattern { }
/**
* A positive-lookahead assertion.
*
* Examples:
*
* ```
* (?=\w)
* ```
*/
class RegExpPositiveLookahead extends RegExpLookahead {
RegExpPositiveLookahead() { re.positiveLookaheadAssertionGroup(start, end) }
override string getPrimaryQLClass() { result = "RegExpPositiveLookahead" }
}
/**
* A negative-lookahead assertion.
*
* Examples:
*
* ```
* (?!\n)
* ```
*/
class RegExpNegativeLookahead extends RegExpLookahead {
RegExpNegativeLookahead() { re.negativeLookaheadAssertionGroup(start, end) }
override string getPrimaryQLClass() { result = "RegExpNegativeLookahead" }
}
/**
* A zero-width lookbehind assertion.
*
* Examples:
*
* ```
* (?<=\.)
* (?<!\\)
* ```
*/
abstract class RegExpLookbehind extends RegExpSubPattern { }
/**
* A positive-lookbehind assertion.
*
* Examples:
*
* ```
* (?<=\.)
* ```
*/
class RegExpPositiveLookbehind extends RegExpLookbehind {
RegExpPositiveLookbehind() { re.positiveLookbehindAssertionGroup(start, end) }
override string getPrimaryQLClass() { result = "RegExpPositiveLookbehind" }
}
/**
* A negative-lookbehind assertion.
*
* Examples:
*
* ```
* (?<!\\)
* ```
*/
class RegExpNegativeLookbehind extends RegExpLookbehind {
RegExpNegativeLookbehind() { re.negativeLookbehindAssertionGroup(start, end) }
override string getPrimaryQLClass() { result = "RegExpNegativeLookbehind" }
}
/**
* A back reference, that is, a term of the form `\i` or `\k<name>`
* in a regular expression.
*
* Examples:
*
* ```
* \1
* (?P=quote)
* ```
*/
class RegExpBackRef extends RegExpTerm, TRegExpBackRef {
RegExpBackRef() { this = TRegExpBackRef(re, start, end) }
/**
* Gets the number of the capture group this back reference refers to, if any.
*/
int getNumber() { result = re.getBackrefNumber(start, end) }
/**
* Gets the name of the capture group this back reference refers to, if any.
*/
string getName() { result = re.getBackrefName(start, end) }
/** Gets the capture group this back reference refers to. */
RegExpGroup getGroup() {
result.getLiteral() = this.getLiteral() and
(
result.getNumber() = this.getNumber() or
result.getName() = this.getName()
)
}
override RegExpTerm getChild(int i) { none() }
override string getPrimaryQLClass() { result = "RegExpBackRef" }
}
/** Gets the parse tree resulting from parsing `re`, if such has been constructed. */
RegExpTerm getParsedRegExp(StrConst re) { result.getRegex() = re and result.isRootTerm() }

View File

@@ -153,6 +153,12 @@ class ExceptStmt extends ExceptStmt_ {
override Stmt getASubStatement() { result = this.getAStmt() }
override Stmt getLastStatement() { result = this.getBody().getLastItem().getLastStatement() }
override Expr getType() {
result = super.getType() and not result instanceof Tuple
or
result = super.getType().(Tuple).getAnElt()
}
}
/** An assert statement, such as `assert a == b, "A is not equal to b"` */

View File

@@ -15,68 +15,35 @@
*/
private module AlgorithmNames {
predicate isStrongHashingAlgorithm(string name) {
name = "DSA" or
name = "ED25519" or
name = "ES256" or
name = "ECDSA256" or
name = "ES384" or
name = "ECDSA384" or
name = "ES512" or
name = "ECDSA512" or
name = "SHA2" or
name = "SHA224" or
name = "SHA256" or
name = "SHA384" or
name = "SHA512" or
name = "SHA3"
name =
[
"DSA", "ED25519", "ES256", "ECDSA256", "ES384", "ECDSA384", "ES512", "ECDSA512", "SHA2",
"SHA224", "SHA256", "SHA384", "SHA512", "SHA3", "SHA3224", "SHA3256", "SHA3384", "SHA3512"
]
}
predicate isWeakHashingAlgorithm(string name) {
name = "HAVEL128" or
name = "MD2" or
name = "MD4" or
name = "MD5" or
name = "PANAMA" or
name = "RIPEMD" or
name = "RIPEMD128" or
name = "RIPEMD256" or
name = "RIPEMD160" or
name = "RIPEMD320" or
name = "SHA0" or
name = "SHA1"
name =
[
"HAVEL128", "MD2", "MD4", "MD5", "PANAMA", "RIPEMD", "RIPEMD128", "RIPEMD256", "RIPEMD160",
"RIPEMD320", "SHA0", "SHA1"
]
}
predicate isStrongEncryptionAlgorithm(string name) {
name = "AES" or
name = "AES128" or
name = "AES192" or
name = "AES256" or
name = "AES512" or
name = "RSA" or
name = "RABBIT" or
name = "BLOWFISH"
name = ["AES", "AES128", "AES192", "AES256", "AES512", "RSA", "RABBIT", "BLOWFISH"]
}
predicate isWeakEncryptionAlgorithm(string name) {
name = "DES" or
name = "3DES" or
name = "TRIPLEDES" or
name = "TDEA" or
name = "TRIPLEDEA" or
name = "ARC2" or
name = "RC2" or
name = "ARC4" or
name = "RC4" or
name = "ARCFOUR" or
name = "ARC5" or
name = "RC5"
name =
[
"DES", "3DES", "TRIPLEDES", "TDEA", "TRIPLEDEA", "ARC2", "RC2", "ARC4", "RC4", "ARCFOUR",
"ARC5", "RC5"
]
}
predicate isStrongPasswordHashingAlgorithm(string name) {
name = "ARGON2" or
name = "PBKDF2" or
name = "BCRYPT" or
name = "SCRYPT"
name = ["ARGON2", "PBKDF2", "BCRYPT", "SCRYPT"]
}
predicate isWeakPasswordHashingAlgorithm(string name) { none() }

View File

@@ -55,12 +55,12 @@ private module SensitiveDataModeling {
* Gets a reference to a function that is considered to be a sensitive source of
* `classification`.
*/
private DataFlow::LocalSourceNode sensitiveFunction(
private DataFlow::TypeTrackingNode sensitiveFunction(
DataFlow::TypeTracker t, SensitiveDataClassification classification
) {
t.start() and
exists(Function f |
nameIndicatesSensitiveData(f.getName(), classification) and
f.getName() = sensitiveString(classification) and
result.asExpr() = f.getDefinition()
)
or
@@ -76,28 +76,16 @@ private module SensitiveDataModeling {
}
/**
* Gets a reference to a string constant that, if used as the key in a lookup,
* indicates the presence of sensitive data with `classification`.
*/
private DataFlow::LocalSourceNode sensitiveLookupStringConst(
DataFlow::TypeTracker t, SensitiveDataClassification classification
) {
t.start() and
nameIndicatesSensitiveData(result.asExpr().(StrConst).getText(), classification)
or
exists(DataFlow::TypeTracker t2 |
result = sensitiveLookupStringConst(t2, classification).track(t2, t)
)
}
/**
* Gets a reference to a string constant that, if used as the key in a lookup,
* indicates the presence of sensitive data with `classification`.
*
* Also see `extraStepForCalls`.
* Gets a reference (in local scope) to a string constant that, if used as the key in
* a lookup, indicates the presence of sensitive data with `classification`.
*/
DataFlow::Node sensitiveLookupStringConst(SensitiveDataClassification classification) {
sensitiveLookupStringConst(DataFlow::TypeTracker::end(), classification).flowsTo(result)
// Note: If this is implemented with type-tracking, we will get cross-talk as
// illustrated in python/ql/test/experimental/dataflow/sensitive-data/test.py
exists(DataFlow::LocalSourceNode source |
source.asExpr().(StrConst).getText() = sensitiveString(classification) and
source.flowsTo(result)
)
}
/** A function call that is considered a source of sensitive data. */
@@ -109,7 +97,7 @@ private module SensitiveDataModeling {
or
// to cover functions that we don't have the definition for, and where the
// reference to the function has not already been marked as being sensitive
nameIndicatesSensitiveData(this.getFunction().asCfgNode().(NameNode).getId(), classification)
this.getFunction().asCfgNode().(NameNode).getId() = sensitiveString(classification)
}
override SensitiveDataClassification getClassification() { result = classification }
@@ -118,8 +106,10 @@ private module SensitiveDataModeling {
/**
* Tracks any modeled source of sensitive data (with any classification),
* to limit the scope of `extraStepForCalls`. See it's QLDoc for more context.
*
* Also see `extraStepForCalls`.
*/
private DataFlow::LocalSourceNode possibleSensitiveCallable(DataFlow::TypeTracker t) {
private DataFlow::TypeTrackingNode possibleSensitiveCallable(DataFlow::TypeTracker t) {
t.start() and
result instanceof SensitiveDataSource
or
@@ -129,6 +119,8 @@ private module SensitiveDataModeling {
/**
* Tracks any modeled source of sensitive data (with any classification),
* to limit the scope of `extraStepForCalls`. See it's QLDoc for more context.
*
* Also see `extraStepForCalls`.
*/
private DataFlow::Node possibleSensitiveCallable() {
possibleSensitiveCallable(DataFlow::TypeTracker::end()).flowsTo(result)
@@ -172,6 +164,68 @@ private module SensitiveDataModeling {
nodeFrom = possibleSensitiveCallable()
}
pragma[nomagic]
private string sensitiveStrConstCandidate() {
result = any(StrConst s | not s.isDocString()).getText() and
not result.regexpMatch(notSensitiveRegexp())
}
pragma[nomagic]
private string sensitiveAttributeNameCandidate() {
result = any(DataFlow::AttrRead a).getAttributeName() and
not result.regexpMatch(notSensitiveRegexp())
}
pragma[nomagic]
private string sensitiveParameterNameCandidate() {
result = any(Parameter p).getName() and
not result.regexpMatch(notSensitiveRegexp())
}
pragma[nomagic]
private string sensitiveFunctionNameCandidate() {
result = any(Function f).getName() and
not result.regexpMatch(notSensitiveRegexp())
}
pragma[nomagic]
private string sensitiveNameCandidate() {
result = any(Name n).getId() and
not result.regexpMatch(notSensitiveRegexp())
}
/**
* This helper predicate serves to deduplicate the results of the preceding predicates. This
* means that if, say, an attribute and a function parameter have the same name, then that name will
* only be matched once, which greatly cuts down on the number of regexp matches that have to be
* performed.
*
* Under normal circumstances, deduplication is only performed when a predicate is materialized, and
* so to see the effect of this we must create a separate predicate that calculates the union of the
* preceding predicates.
*/
pragma[nomagic]
private string sensitiveStringCandidate() {
result in [
sensitiveNameCandidate(), sensitiveAttributeNameCandidate(),
sensitiveParameterNameCandidate(), sensitiveFunctionNameCandidate(),
sensitiveStrConstCandidate()
]
}
/**
* Returns strings (primarily the names of various program entities) that may contain sensitive data
* with the classification `classification`.
*
* This helper predicate ends up being very similar to `nameIndicatesSensitiveData`,
* but is performance optimized to limit the number of regexp matches that have to be performed.
*/
pragma[nomagic]
private string sensitiveString(SensitiveDataClassification classification) {
result = sensitiveStringCandidate() and
result.regexpMatch(maybeSensitiveRegexp(classification))
}
/**
* Any kind of variable assignment (also including with/for) where the name indicates
* it contains sensitive data.
@@ -190,7 +244,7 @@ private module SensitiveDataModeling {
SensitiveVariableAssignment() {
exists(DefinitionNode def |
nameIndicatesSensitiveData(def.(NameNode).getId(), classification) and
def.(NameNode).getId() = sensitiveString(classification) and
(
this.asCfgNode() = def.getValue()
or
@@ -201,7 +255,7 @@ private module SensitiveDataModeling {
)
or
exists(With with |
nameIndicatesSensitiveData(with.getOptionalVars().(Name).getId(), classification) and
with.getOptionalVars().(Name).getId() = sensitiveString(classification) and
this.asExpr() = with.getContextExpr()
)
}
@@ -217,7 +271,7 @@ private module SensitiveDataModeling {
// Things like `foo.<sensitive-name>` or `from <module> import <sensitive-name>`
// I considered excluding any `from ... import something_sensitive`, but then realized that
// we should flag up `form ... import password as ...` as a password
nameIndicatesSensitiveData(this.(DataFlow::AttrRead).getAttributeName(), classification)
this.(DataFlow::AttrRead).getAttributeName() = sensitiveString(classification)
or
// Things like `getattr(foo, <reference-to-string>)`
this.(DataFlow::AttrRead).getAttributeNameExpr() = sensitiveLookupStringConst(classification)
@@ -243,7 +297,7 @@ private module SensitiveDataModeling {
SensitiveDataClassification classification;
SensitiveGetCall() {
this.getFunction().asCfgNode().(AttrNode).getName() = "get" and
this.getFunction().(DataFlow::AttrRef).getAttributeName() = "get" and
this.getArg(0) = sensitiveLookupStringConst(classification)
}
@@ -254,9 +308,7 @@ private module SensitiveDataModeling {
class SensitiveParameter extends SensitiveDataSource::Range, DataFlow::ParameterNode {
SensitiveDataClassification classification;
SensitiveParameter() {
nameIndicatesSensitiveData(this.getParameter().getName(), classification)
}
SensitiveParameter() { this.getParameter().getName() = sensitiveString(classification) }
override SensitiveDataClassification getClassification() { result = classification }
}

View File

@@ -23,7 +23,7 @@ class OptionalAttributeName = Internal::OptionalContentName;
* It is recommended that all uses of this type are written in the following form,
* for tracking some type `myType`:
* ```ql
* DataFlow::LocalSourceNode myType(DataFlow::TypeTracker t) {
* DataFlow::TypeTrackingNode myType(DataFlow::TypeTracker t) {
* t.start() and
* result = < source of myType >
* or

View File

@@ -17,6 +17,13 @@ abstract class AttrRef extends Node {
*/
abstract Node getObject();
/**
* Holds if this data flow node accesses attribute named `attrName` on object `object`.
*/
predicate accesses(Node object, string attrName) {
this.getObject() = object and this.getAttributeName() = attrName
}
/**
* Gets the expression node that defines the attribute being accessed, if any. This is
* usually an identifier or literal.
@@ -191,7 +198,7 @@ private class ClassDefinitionAsAttrWrite extends AttrWrite, CfgNode {
* - Dynamic attribute reads using `getattr`: `getattr(object, attr)`
* - Qualified imports: `from module import attr as name`
*/
abstract class AttrRead extends AttrRef, Node { }
abstract class AttrRead extends AttrRef, Node, LocalSourceNode { }
/** A simple attribute read, e.g. `object.attr` */
private class AttributeReadAsAttrRead extends AttrRead, CfgNode {

View File

@@ -724,7 +724,6 @@ private module Cached {
Node node1, Content c, Node node2, DataFlowType contentType, DataFlowType containerType
) {
storeStep(node1, c, node2) and
read(_, c, _) and
contentType = getNodeDataFlowType(node1) and
containerType = getNodeDataFlowType(node2)
or
@@ -787,16 +786,24 @@ private module Cached {
}
/**
* Holds if the call context `call` either improves virtual dispatch in
* `callable` or if it allows us to prune unreachable nodes in `callable`.
* Holds if the call context `call` improves virtual dispatch in `callable`.
*/
cached
predicate recordDataFlowCallSite(DataFlowCall call, DataFlowCallable callable) {
predicate recordDataFlowCallSiteDispatch(DataFlowCall call, DataFlowCallable callable) {
reducedViableImplInCallContext(_, callable, call)
or
}
/**
* Holds if the call context `call` allows us to prune unreachable nodes in `callable`.
*/
cached
predicate recordDataFlowCallSiteUnreachable(DataFlowCall call, DataFlowCallable callable) {
exists(Node n | getNodeEnclosingCallable(n) = callable | isUnreachableInCallCached(n, call))
}
cached
predicate allowParameterReturnInSelfCached(ParamNode p) { allowParameterReturnInSelf(p) }
cached
newtype TCallContext =
TAnyCallContext() or
@@ -847,6 +854,15 @@ private module Cached {
TAccessPathFrontSome(AccessPathFront apf)
}
/**
* Holds if the call context `call` either improves virtual dispatch in
* `callable` or if it allows us to prune unreachable nodes in `callable`.
*/
predicate recordDataFlowCallSite(DataFlowCall call, DataFlowCallable callable) {
recordDataFlowCallSiteDispatch(call, callable) or
recordDataFlowCallSiteUnreachable(call, callable)
}
/**
* A `Node` at which a cast can occur such that the type should be checked.
*/
@@ -924,7 +940,7 @@ class CallContextSpecificCall extends CallContextCall, TSpecificCall {
}
override predicate relevantFor(DataFlowCallable callable) {
recordDataFlowCallSite(getCall(), callable)
recordDataFlowCallSite(this.getCall(), callable)
}
override predicate matchesCall(DataFlowCall call) { call = this.getCall() }
@@ -1118,6 +1134,44 @@ ReturnPosition getReturnPosition(ReturnNodeExt ret) {
result = getReturnPosition0(ret, ret.getKind())
}
/**
* Checks whether `inner` can return to `call` in the call context `innercc`.
* Assumes a context of `inner = viableCallableExt(call)`.
*/
bindingset[innercc, inner, call]
predicate checkCallContextReturn(CallContext innercc, DataFlowCallable inner, DataFlowCall call) {
innercc instanceof CallContextAny
or
exists(DataFlowCallable c0, DataFlowCall call0 |
callEnclosingCallable(call0, inner) and
innercc = TReturn(c0, call0) and
c0 = prunedViableImplInCallContextReverse(call0, call)
)
}
/**
* Checks whether `call` can resolve to `calltarget` in the call context `cc`.
* Assumes a context of `calltarget = viableCallableExt(call)`.
*/
bindingset[cc, call, calltarget]
predicate checkCallContextCall(CallContext cc, DataFlowCall call, DataFlowCallable calltarget) {
exists(DataFlowCall ctx | cc = TSpecificCall(ctx) |
if reducedViableImplInCallContext(call, _, ctx)
then calltarget = prunedViableImplInCallContext(call, ctx)
else any()
)
or
cc instanceof CallContextSomeCall
or
cc instanceof CallContextAny
or
cc instanceof CallContextReturn
}
/**
* Resolves a return from `callable` in `cc` to `call`. This is equivalent to
* `callable = viableCallableExt(call) and checkCallContextReturn(cc, callable, call)`.
*/
bindingset[cc, callable]
predicate resolveReturn(CallContext cc, DataFlowCallable callable, DataFlowCall call) {
cc instanceof CallContextAny and callable = viableCallableExt(call)
@@ -1129,6 +1183,10 @@ predicate resolveReturn(CallContext cc, DataFlowCallable callable, DataFlowCall
)
}
/**
* Resolves a call from `call` in `cc` to `result`. This is equivalent to
* `result = viableCallableExt(call) and checkCallContextCall(cc, call, result)`.
*/
bindingset[call, cc]
DataFlowCallable resolveCall(DataFlowCall call, CallContext cc) {
exists(DataFlowCall ctx | cc = TSpecificCall(ctx) |
@@ -1181,6 +1239,13 @@ class TypedContent extends MkTypedContent {
/** Gets a textual representation of this content. */
string toString() { result = c.toString() }
/**
* Holds if access paths with this `TypedContent` at their head always should
* be tracked at high precision. This disables adaptive access path precision
* for such access paths.
*/
predicate forceHighPrecision() { forceHighPrecision(c) }
}
/**
@@ -1195,7 +1260,7 @@ abstract class AccessPathFront extends TAccessPathFront {
TypedContent getHead() { this = TFrontHead(result) }
predicate isClearedAt(Node n) { clearsContentCached(n, getHead().getContent()) }
predicate isClearedAt(Node n) { clearsContentCached(n, this.getHead().getContent()) }
}
class AccessPathFrontNil extends AccessPathFront, TFrontNil {

View File

@@ -168,7 +168,14 @@ module Consistency {
msg = "ArgumentNode is missing PostUpdateNode."
}
query predicate postWithInFlow(PostUpdateNode n, string msg) {
// This predicate helps the compiler forget that in some languages
// it is impossible for a `PostUpdateNode` to be the target of
// `simpleLocalFlowStep`.
private predicate isPostUpdateNode(Node n) { n instanceof PostUpdateNode or none() }
query predicate postWithInFlow(Node n, string msg) {
isPostUpdateNode(n) and
not clearsContent(n, _) and
simpleLocalFlowStep(_, n) and
msg = "PostUpdateNode should not be the target of local flow."
}

View File

@@ -152,6 +152,7 @@ class DataFlowExpr = Expr;
* Flow comes from definitions, uses and refinements.
*/
// TODO: Consider constraining `nodeFrom` and `nodeTo` to be in the same scope.
// If they have different enclosing callables, we get consistency errors.
module EssaFlow {
predicate essaFlowStep(Node nodeFrom, Node nodeTo) {
// Definition
@@ -200,6 +201,9 @@ module EssaFlow {
// If expressions
nodeFrom.asCfgNode() = nodeTo.asCfgNode().(IfExprNode).getAnOperand()
or
// boolean inline expressions such as `x or y` or `x and y`
nodeFrom.asCfgNode() = nodeTo.asCfgNode().(BoolExprNode).getAnOperand()
or
// Flow inside an unpacking assignment
iterableUnpackingFlowStep(nodeFrom, nodeTo)
or
@@ -225,35 +229,60 @@ module EssaFlow {
//--------
/**
* This is the local flow predicate that is used as a building block in global
* data flow. It is a strict subset of the `localFlowStep` predicate, as it
* excludes SSA flow through instance fields.
* data flow.
*
* Local flow can happen either at import time, when the module is initialised
* or at runtime when callables in the module are called.
*/
predicate simpleLocalFlowStep(Node nodeFrom, Node nodeTo) {
// If there is ESSA-flow out of a node `node`, we want flow
// If there is local flow out of a node `node`, we want flow
// both out of `node` and any post-update node of `node`.
exists(Node node |
EssaFlow::essaFlowStep(node, nodeTo) and
nodeFrom = update(node) and
(
not node instanceof EssaNode or
not nodeTo instanceof EssaNode or
localEssaStep(node, nodeTo)
importTimeLocalFlowStep(node, nodeTo) or
runtimeLocalFlowStep(node, nodeTo)
)
)
}
/**
* Holds if there is an Essa flow step from `nodeFrom` to `nodeTo` that does not switch between
* local and global SSA variables.
* Holds if `node` is found at the top level of a module.
*/
private predicate localEssaStep(EssaNode nodeFrom, EssaNode nodeTo) {
EssaFlow::essaFlowStep(nodeFrom, nodeTo) and
(
nodeFrom.getVar() instanceof GlobalSsaVariable and
nodeTo.getVar() instanceof GlobalSsaVariable
or
not nodeFrom.getVar() instanceof GlobalSsaVariable and
not nodeTo.getVar() instanceof GlobalSsaVariable
pragma[inline]
predicate isTopLevel(Node node) { node.getScope() instanceof Module }
/** Holds if there is local flow from `nodeFrom` to `nodeTo` at import time. */
predicate importTimeLocalFlowStep(Node nodeFrom, Node nodeTo) {
// As a proxy for whether statements can be executed at import time,
// we check if they appear at the top level.
// This will miss statements inside functions called from the top level.
isTopLevel(nodeFrom) and
isTopLevel(nodeTo) and
EssaFlow::essaFlowStep(nodeFrom, nodeTo)
}
/** Holds if there is local flow from `nodeFrom` to `nodeTo` at runtime. */
predicate runtimeLocalFlowStep(Node nodeFrom, Node nodeTo) {
// Anything not at the top level can be executed at runtime.
not isTopLevel(nodeFrom) and
not isTopLevel(nodeTo) and
EssaFlow::essaFlowStep(nodeFrom, nodeTo)
}
/** `ModuleVariable`s are accessed via jump steps at runtime. */
predicate runtimeJumpStep(Node nodeFrom, Node nodeTo) {
// Module variable read
nodeFrom.(ModuleVariableNode).getARead() = nodeTo
or
// Module variable write
nodeFrom = nodeTo.(ModuleVariableNode).getAWrite()
or
// Setting the possible values of the variable at the end of import time
exists(SsaVariable def |
def = any(SsaVariable var).getAnUltimateDefinition() and
def.getDefinition() = nodeFrom.asCfgNode() and
def.getVariable() = nodeTo.(ModuleVariableNode).getVariable()
)
}
@@ -581,11 +610,11 @@ class DataFlowLambda extends DataFlowCallable, TLambda {
override string toString() { result = lambda.toString() }
override CallNode getACall() { result = getCallableValue().getACall() }
override CallNode getACall() { result = this.getCallableValue().getACall() }
override Scope getScope() { result = lambda.getEvaluatingScope() }
override NameNode getParameter(int n) { result = getParameter(getCallableValue(), n) }
override NameNode getParameter(int n) { result = getParameter(this.getCallableValue(), n) }
override string getName() { result = "Lambda callable" }
@@ -857,11 +886,7 @@ string ppReprType(DataFlowType t) { none() }
* taken into account.
*/
predicate jumpStep(Node nodeFrom, Node nodeTo) {
// Module variable read
nodeFrom.(ModuleVariableNode).getARead() = nodeTo
or
// Module variable write
nodeFrom = nodeTo.(ModuleVariableNode).getAWrite()
runtimeJumpStep(nodeFrom, nodeTo)
or
// Read of module attribute:
exists(AttrRead r, ModuleValue mv |
@@ -869,6 +894,9 @@ predicate jumpStep(Node nodeFrom, Node nodeTo) {
module_export(mv.getScope(), r.getAttributeName(), nodeFrom) and
nodeTo = r
)
or
// Default value for parameter flows to that parameter
defaultValueFlowStep(nodeFrom, nodeTo)
}
/**
@@ -1033,6 +1061,19 @@ predicate kwOverflowStoreStep(CfgNode nodeFrom, DictionaryElementContent c, Node
)
}
predicate defaultValueFlowStep(CfgNode nodeFrom, CfgNode nodeTo) {
exists(Function f, Parameter p, ParameterDefinition def |
// `getArgByName` supports, unlike `getAnArg`, keyword-only parameters
p = f.getArgByName(_) and
nodeFrom.asExpr() = p.getDefault() and
// The following expresses
// nodeTo.(ParameterNode).getParameter() = p
// without non-monotonic recursion
def.getParameter() = p and
nodeTo.getNode() = def.getDefiningNode()
)
}
/**
* Holds if data can flow from `nodeFrom` to `nodeTo` via a read of content `c`.
*/
@@ -1604,6 +1645,12 @@ predicate isImmutableOrUnobservable(Node n) { none() }
int accessPathLimit() { result = 5 }
/**
* Holds if access paths with `c` at their head always should be tracked at high
* precision. This disables adaptive access path precision for such access paths.
*/
predicate forceHighPrecision(Content c) { none() }
/** Holds if `n` should be hidden from path explanations. */
predicate nodeIsHidden(Node n) { none() }
@@ -1617,3 +1664,12 @@ predicate lambdaCall(DataFlowCall call, LambdaCallKind kind, Node receiver) { no
/** Extra data-flow steps needed for lambda flow analysis. */
predicate additionalLambdaFlowStep(Node nodeFrom, Node nodeTo, boolean preservesValue) { none() }
/**
* Holds if flow is allowed to pass from parameter `p` and back to itself as a
* side-effect, resulting in a summary from `p` to itself.
*
* One example would be to allow flow like `p.foo = p.bar;`, which is disallowed
* by default as a heuristic.
*/
predicate allowParameterReturnInSelf(ParameterNode p) { none() }

View File

@@ -102,7 +102,7 @@ class Node extends TNode {
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html).
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
@@ -180,6 +180,45 @@ class CallCfgNode extends CfgNode, LocalSourceNode {
Node getArgByName(string name) { result.asCfgNode() = node.getArgByName(name) }
}
/**
* A data-flow node corresponding to a method call, that is `foo.bar(...)`.
*
* Also covers the case where the method lookup is done separately from the call itself, as in
* `temp = foo.bar; temp(...)`. Note that this is only tracked through local scope.
*/
class MethodCallNode extends CallCfgNode {
AttrRead method_lookup;
MethodCallNode() { method_lookup = this.getFunction().getALocalSource() }
/**
* Gets the name of the method being invoked (the `bar` in `foo.bar(...)`) if it can be determined.
*
* Note that this method may have multiple results if a single call node represents calls to
* multiple different objects and methods. If you want to link up objects and method names
* accurately, use the `calls` method instead.
*/
string getMethodName() { result = method_lookup.getAttributeName() }
/**
* Gets the data-flow node corresponding to the object receiving this call. That is, the `foo` in
* `foo.bar(...)`.
*
* Note that this method may have multiple results if a single call node represents calls to
* multiple different objects and methods. If you want to link up objects and method names
* accurately, use the `calls` method instead.
*/
Node getObject() { result = method_lookup.getObject() }
/** Holds if this data-flow node calls method `methodName` on the object node `object`. */
predicate calls(Node object, string methodName) {
// As `getObject` and `getMethodName` may both have multiple results, we must look up the object
// and method name directly on `method_lookup`.
object = method_lookup.getObject() and
methodName = method_lookup.getAttributeName()
}
}
/**
* An expression, viewed as a node in a data flow graph.
*
@@ -293,7 +332,7 @@ class ModuleVariableNode extends Node, TModuleVariableNode {
override Scope getScope() { result = mod }
override string toString() {
result = "ModuleVariableNode for " + var.toString() + " in " + mod.toString()
result = "ModuleVariableNode for " + mod.getName() + "." + var.getId()
}
/** Gets the module in which this variable appears. */

View File

@@ -18,6 +18,10 @@ predicate localFlowStep(Node nodeFrom, Node nodeTo) { simpleLocalFlowStep(nodeFr
predicate localFlow(Node source, Node sink) { localFlowStep*(source, sink) }
/**
* DEPRECATED. Use the API graphs library (`semmle.python.ApiGraphs`) instead.
*
* For a drop-in replacement, use `API::moduleImport(name).getAUse()`.
*
* Gets a `Node` that refers to the module referenced by `name`.
* Note that for the statement `import pkg.mod`, the new variable introduced is `pkg` that is a
* reference to the module `pkg`.
@@ -37,7 +41,7 @@ predicate localFlow(Node source, Node sink) { localFlowStep*(source, sink) }
* `mypkg/foo.py` but the variable `foo` containing `42` -- however, `import mypkg.foo` will always cause `mypkg.foo`
* to refer to the module.
*/
Node importNode(string name) {
deprecated Node importNode(string name) {
exists(Variable var, Import imp, Alias alias |
alias = imp.getAName() and
alias.getAsname() = var.getAStore() and

View File

@@ -33,15 +33,20 @@ private import DataFlowPrivate
class LocalSourceNode extends Node {
cached
LocalSourceNode() {
not simpleLocalFlowStep(_, this) and
// Currently, we create synthetic post-update nodes for
// - arguments to calls that may modify said argument
// - direct reads a writes of object attributes
// Both of these preserve the identity of the underlying pointer, and hence we exclude these as
// local source nodes.
// We do, however, allow the post-update nodes that arise from object creation (which are non-synthetic).
not this instanceof SyntheticPostUpdateNode
this instanceof ExprNode and
not simpleLocalFlowStep(_, this)
or
// We include all module variable nodes, as these act as stepping stones between writes and
// reads of global variables. Without them, type tracking based on `LocalSourceNode`s would be
// unable to track across global variables.
//
// Once the `track` and `backtrack` methods have been fully deprecated, this disjunct can be
// removed, and the entire class can extend `ExprNode`. At that point, `TypeTrackingNode` should
// be used for type tracking instead of `LocalSourceNode`.
this instanceof ModuleVariableNode
or
// We explicitly include any read of a global variable, as some of these may have local flow going
// into them.
this = any(ModuleVariableNode mvn).getARead()
}
@@ -57,7 +62,12 @@ class LocalSourceNode extends Node {
/**
* Gets a read of attribute `attrName` on this node.
*/
AttrRead getAnAttributeRead(string attrName) { result = getAnAttributeReference(attrName) }
AttrRead getAnAttributeRead(string attrName) { result = this.getAnAttributeReference(attrName) }
/**
* Gets a write of attribute `attrName` on this node.
*/
AttrWrite getAnAttributeWrite(string attrName) { result = this.getAnAttributeReference(attrName) }
/**
* Gets a reference (read or write) of any attribute on this node.
@@ -71,13 +81,28 @@ class LocalSourceNode extends Node {
/**
* Gets a read of any attribute on this node.
*/
AttrRead getAnAttributeRead() { result = getAnAttributeReference() }
AttrRead getAnAttributeRead() { result = this.getAnAttributeReference() }
/**
* Gets a write of any attribute on this node.
*/
AttrWrite getAnAttributeWrite() { result = this.getAnAttributeReference() }
/**
* Gets a call to this node.
*/
CallCfgNode getACall() { Cached::call(this, result) }
/**
* Gets a call to the method `methodName` on this node.
*
* Includes both calls that have the syntactic shape of a method call (as in `obj.m(...)`), and
* calls where the callee undergoes some additional local data flow (as in `tmp = obj.m; m(...)`).
*/
MethodCallNode getAMethodCall(string methodName) {
result = this.getAnAttributeRead(methodName).getACall()
}
/**
* Gets a node that this node may flow to using one heap and/or interprocedural step.
*
@@ -95,6 +120,53 @@ class LocalSourceNode extends Node {
LocalSourceNode backtrack(TypeBackTracker t2, TypeBackTracker t) { t2 = t.step(result, this) }
}
/**
* A node that can be used for type tracking or type back-tracking.
*
* All steps made during type tracking should be between instances of this class.
*/
class TypeTrackingNode = LocalSourceNode;
/** Temporary holding ground for the `TypeTrackingNode` class. */
private module FutureWork {
class FutureTypeTrackingNode extends Node {
FutureTypeTrackingNode() {
this instanceof LocalSourceNode
or
this instanceof ModuleVariableNode
}
/**
* Holds if this node can flow to `nodeTo` in one or more local flow steps.
*
* For `ModuleVariableNode`s, the only "local" step is to the node itself.
* For `LocalSourceNode`s, this is the usual notion of local flow.
*/
pragma[inline]
predicate flowsTo(Node node) {
this instanceof ModuleVariableNode and this = node
or
this.(LocalSourceNode).flowsTo(node)
}
/**
* Gets a node that this node may flow to using one heap and/or interprocedural step.
*
* See `TypeTracker` for more details about how to use this.
*/
pragma[inline]
TypeTrackingNode track(TypeTracker t2, TypeTracker t) { t = t2.step(this, result) }
/**
* Gets a node that may flow into this one using one heap and/or interprocedural step.
*
* See `TypeBackTracker` for more details about how to use this.
*/
pragma[inline]
TypeTrackingNode backtrack(TypeBackTracker t2, TypeBackTracker t) { t2 = t.step(result, this) }
}
}
cached
private module Cached {
/**
@@ -107,11 +179,21 @@ private module Cached {
source = sink
or
exists(Node second |
simpleLocalFlowStep(source, second) and
simpleLocalFlowStep*(second, sink)
localSourceFlowStep(source, second) and
localSourceFlowStep*(second, sink)
)
}
/**
* Helper predicate for `hasLocalSource`. Removes any steps go to module variable reads, as these
* are already local source nodes in their own right.
*/
cached
private predicate localSourceFlowStep(Node nodeFrom, Node nodeTo) {
simpleLocalFlowStep(nodeFrom, nodeTo) and
not nodeTo = any(ModuleVariableNode v).getARead()
}
/**
* Holds if `base` flows to the base of `ref` and `ref` has attribute name `attr`.
*/

View File

@@ -1,6 +1,20 @@
import python
import semmle.python.dataflow.new.DataFlow
/**
* INTERNAL: Do not use.
*
* Provides helper predicates for pretty-printing `DataFlow::Node`s.
*
* Since these have not been performance optimized, please only use them for
* debug-queries or in tests.
*/
private import python
private import semmle.python.dataflow.new.DataFlow
/**
* INTERNAL: Do not use.
*
* Gets the pretty-printed version of the Expr `e`.
*/
string prettyExpr(Expr e) {
not e instanceof Num and
not e instanceof StrConst and
@@ -27,7 +41,9 @@ string prettyExpr(Expr e) {
}
/**
* Gets pretty-printed version of the DataFlow::Node `node`
* INTERNAL: Do not use.
*
* Gets the pretty-printed version of the DataFlow::Node `node`
*/
bindingset[node]
string prettyNode(DataFlow::Node node) {
@@ -35,12 +51,13 @@ string prettyNode(DataFlow::Node node) {
}
/**
* Gets pretty-printed version of the DataFlow::Node `node`, that is suitable for use
* INTERNAL: Do not use.
*
* Gets the pretty-printed version of the DataFlow::Node `node`, that is suitable for use
* with `TestUtilities.InlineExpectationsTest` (that is, no spaces unless required).
*/
bindingset[node]
string prettyNodeForInlineTest(DataFlow::Node node) {
exists(node.asExpr()) and
result = prettyExpr(node.asExpr())
or
exists(Expr e | e = node.(DataFlow::PostUpdateNode).getPreUpdateNode().asExpr() |

View File

@@ -1,7 +1,8 @@
private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.internal.DataFlowPrivate
private import semmle.python.dataflow.new.internal.DataFlowPrivate as DataFlowPrivate
private import semmle.python.dataflow.new.internal.TaintTrackingPublic
private import semmle.python.ApiGraphs
/**
* Holds if `node` should be a sanitizer in all global taint flow configurations
@@ -9,6 +10,13 @@ private import semmle.python.dataflow.new.internal.TaintTrackingPublic
*/
predicate defaultTaintSanitizer(DataFlow::Node node) { none() }
/**
* Holds if default `TaintTracking::Configuration`s should allow implicit reads
* of `c` at sinks and inputs to additional taint steps.
*/
bindingset[node]
predicate defaultImplicitTaintRead(DataFlow::Node node, DataFlow::Content c) { none() }
private module Cached {
/**
* Holds if the additional step from `nodeFrom` to `nodeTo` should be included in all
@@ -38,9 +46,13 @@ private module Cached {
or
copyStep(nodeFrom, nodeTo)
or
forStep(nodeFrom, nodeTo)
DataFlowPrivate::forReadStep(nodeFrom, _, nodeTo)
or
unpackingAssignmentStep(nodeFrom, nodeTo)
DataFlowPrivate::iterableUnpackingReadStep(nodeFrom, _, nodeTo)
or
DataFlowPrivate::iterableUnpackingStoreStep(nodeFrom, _, nodeTo)
or
awaitStep(nodeFrom, nodeTo)
}
}
@@ -75,13 +87,13 @@ predicate subscriptStep(DataFlow::CfgNode nodeFrom, DataFlow::CfgNode nodeTo) {
*/
predicate stringManipulation(DataFlow::CfgNode nodeFrom, DataFlow::CfgNode nodeTo) {
// transforming something tainted into a string will make the string tainted
exists(CallNode call | call = nodeTo.getNode() |
call.getFunction().(NameNode).getId() in ["str", "bytes", "unicode"] and
exists(DataFlow::CallCfgNode call | call = nodeTo |
(
nodeFrom.getNode() = call.getArg(0)
call = API::builtin(["str", "bytes", "unicode"]).getACall()
or
nodeFrom.getNode() = call.getArgByName("object")
)
call.getFunction().asCfgNode().(NameNode).getId() in ["str", "bytes", "unicode"]
) and
nodeFrom in [call.getArg(0), call.getArgByName("object")]
)
or
// String methods. Note that this doesn't recognize `meth = "foo".upper; meth()`
@@ -148,39 +160,37 @@ predicate stringManipulation(DataFlow::CfgNode nodeFrom, DataFlow::CfgNode nodeT
predicate containerStep(DataFlow::CfgNode nodeFrom, DataFlow::Node nodeTo) {
// construction by literal
// TODO: Not limiting the content argument here feels like a BIG hack, but we currently get nothing for free :|
storeStep(nodeFrom, _, nodeTo)
DataFlowPrivate::storeStep(nodeFrom, _, nodeTo)
or
// constructor call
exists(CallNode call | call = nodeTo.asCfgNode() |
call.getFunction().(NameNode).getId() in [
"list", "set", "frozenset", "dict", "defaultdict", "tuple"
] and
call.getArg(0) = nodeFrom.getNode()
exists(DataFlow::CallCfgNode call | call = nodeTo |
call = API::builtin(["list", "set", "frozenset", "dict", "tuple"]).getACall() and
call.getArg(0) = nodeFrom
// TODO: Properly handle defaultdict/namedtuple
)
or
// functions operating on collections
exists(CallNode call | call = nodeTo.asCfgNode() |
call.getFunction().(NameNode).getId() in ["sorted", "reversed", "iter", "next"] and
call.getArg(0) = nodeFrom.getNode()
exists(DataFlow::CallCfgNode call | call = nodeTo |
call = API::builtin(["sorted", "reversed", "iter", "next"]).getACall() and
call.getArg(0) = nodeFrom
)
or
// methods
exists(CallNode call, string name | call = nodeTo.asCfgNode() |
name in [
exists(DataFlow::MethodCallNode call, string methodName | call = nodeTo |
methodName in [
// general
"copy", "pop",
// dict
"values", "items", "get", "popitem"
] and
call.getFunction().(AttrNode).getObject(name) = nodeFrom.asCfgNode()
call.calls(nodeFrom, methodName)
)
or
// list.append, set.add
exists(CallNode call, string name |
name in ["append", "add"] and
call.getFunction().(AttrNode).getObject(name) =
nodeTo.(DataFlow::PostUpdateNode).getPreUpdateNode().asCfgNode() and
call.getArg(0) = nodeFrom.getNode()
exists(DataFlow::MethodCallNode call, DataFlow::Node obj |
call.calls(obj, ["append", "add"]) and
obj = nodeTo.(DataFlow::PostUpdateNode).getPreUpdateNode() and
call.getArg(0) = nodeFrom
)
}
@@ -188,38 +198,16 @@ predicate containerStep(DataFlow::CfgNode nodeFrom, DataFlow::Node nodeTo) {
* Holds if taint can flow from `nodeFrom` to `nodeTo` with a step related to copying.
*/
predicate copyStep(DataFlow::CfgNode nodeFrom, DataFlow::CfgNode nodeTo) {
exists(CallNode call | call = nodeTo.getNode() |
// Fully qualified: copy.copy, copy.deepcopy
(
call.getFunction().(NameNode).getId() in ["copy", "deepcopy"]
or
call.getFunction().(AttrNode).getObject(["copy", "deepcopy"]).(NameNode).getId() = "copy"
) and
call.getArg(0) = nodeFrom.getNode()
exists(DataFlow::CallCfgNode call | call = nodeTo |
call = API::moduleImport("copy").getMember(["copy", "deepcopy"]).getACall() and
call.getArg(0) = nodeFrom
)
}
/**
* Holds if taint can flow from `nodeFrom` to `nodeTo` with a step related to `for`-iteration,
* for example `for x in xs`, or `for x,y in points`.
* Holds if taint can flow from `nodeFrom` to `nodeTo` with an `await`-step,
* such that the whole expression `await x` is tainted if `x` is tainted.
*/
predicate forStep(DataFlow::CfgNode nodeFrom, DataFlow::EssaNode nodeTo) {
exists(EssaNodeDefinition defn, For for |
for.getTarget().getAChildNode*() = defn.getDefiningNode().getNode() and
nodeTo.getVar() = defn and
nodeFrom.asExpr() = for.getIter()
)
}
/**
* Holds if taint can flow from `nodeFrom` to `nodeTo` with a step related to iterable unpacking.
* Only handles normal assignment (`x,y = calc_point()`), since `for x,y in points` is handled by `forStep`.
*/
predicate unpackingAssignmentStep(DataFlow::CfgNode nodeFrom, DataFlow::EssaNode nodeTo) {
// `a, b = myiterable` or `head, *tail = myiterable` (only Python 3)
exists(MultiAssignmentDefinition defn, Assign assign |
assign.getATarget().contains(defn.getDefiningNode().getNode()) and
nodeTo.getVar() = defn and
nodeFrom.asExpr() = assign.getValue()
)
predicate awaitStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
nodeTo.asExpr().(Await).getValue() = nodeFrom.asExpr()
}

Some files were not shown because too many files have changed in this diff Show More