mirror of
https://github.com/github/codeql.git
synced 2025-12-18 01:33:15 +01:00
Packaging: Rafactor Python core libraries
Extract the external facing `qll` files into the codeql/python-all query pack.
This commit is contained in:
@@ -1,20 +0,0 @@
|
||||
/**
|
||||
* Contains customizations to the standard library.
|
||||
*
|
||||
* This module is imported by `python.qll`, so any customizations defined here automatically
|
||||
* apply to all queries.
|
||||
*
|
||||
* Typical examples of customizations include adding new subclasses of abstract classes such as
|
||||
* the `RemoteFlowSource::Range` and `AdditionalTaintStep` classes associated with the security
|
||||
* queries to model frameworks that are not covered by the standard library.
|
||||
*/
|
||||
|
||||
import python
|
||||
/* General import that is useful */
|
||||
// import semmle.python.dataflow.new.DataFlow
|
||||
//
|
||||
/* for extending `TaintTracking::AdditionalTaintStep` */
|
||||
// import semmle.python.dataflow.new.TaintTracking
|
||||
//
|
||||
/* for extending `RemoteFlowSource::Range` */
|
||||
// import semmle.python.dataflow.new.RemoteFlowSources
|
||||
@@ -1,4 +1,4 @@
|
||||
- description: Standard Code Scanning queries for Python
|
||||
- qlpack: codeql-python
|
||||
- queries: .
|
||||
- apply: code-scanning-selectors.yml
|
||||
from: codeql/suite-helpers
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
- description: Standard LGTM queries for Python, including ones not displayed by default
|
||||
- qlpack: codeql-python
|
||||
- queries: .
|
||||
- apply: lgtm-selectors.yml
|
||||
from: codeql/suite-helpers
|
||||
# These are only for IDE use.
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
- description: Security-and-quality queries for Python
|
||||
- qlpack: codeql-python
|
||||
- queries: .
|
||||
- apply: security-and-quality-selectors.yml
|
||||
from: codeql/suite-helpers
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
- description: Security-extended queries for Python
|
||||
- qlpack: codeql-python
|
||||
- queries: .
|
||||
- apply: security-extended-selectors.yml
|
||||
from: codeql/suite-helpers
|
||||
|
||||
@@ -1,6 +0,0 @@
|
||||
/**
|
||||
* WARNING: Use of this module is DEPRECATED.
|
||||
* All new queries should use `import python`.
|
||||
*/
|
||||
|
||||
import python
|
||||
@@ -1,43 +0,0 @@
|
||||
import Customizations
|
||||
import semmle.python.Files
|
||||
import semmle.python.Operations
|
||||
import semmle.python.Variables
|
||||
import semmle.python.AstGenerated
|
||||
import semmle.python.AstExtended
|
||||
import semmle.python.Function
|
||||
import semmle.python.Module
|
||||
import semmle.python.Class
|
||||
import semmle.python.Import
|
||||
import semmle.python.Stmts
|
||||
import semmle.python.Exprs
|
||||
import semmle.python.Keywords
|
||||
import semmle.python.Comprehensions
|
||||
import semmle.python.Flow
|
||||
import semmle.python.Metrics
|
||||
import semmle.python.Constants
|
||||
import semmle.python.Scope
|
||||
import semmle.python.Comment
|
||||
import semmle.python.GuardedControlFlow
|
||||
import semmle.python.types.ImportTime
|
||||
import semmle.python.types.Object
|
||||
import semmle.python.types.ClassObject
|
||||
import semmle.python.types.FunctionObject
|
||||
import semmle.python.types.ModuleObject
|
||||
import semmle.python.types.Version
|
||||
import semmle.python.types.Descriptors
|
||||
import semmle.python.protocols
|
||||
import semmle.python.SSA
|
||||
import semmle.python.SelfAttribute
|
||||
import semmle.python.types.Properties
|
||||
import semmle.python.xml.XML
|
||||
import semmle.python.essa.Essa
|
||||
import semmle.python.pointsto.Base
|
||||
import semmle.python.pointsto.Context
|
||||
import semmle.python.pointsto.CallGraph
|
||||
import semmle.python.objects.ObjectAPI
|
||||
import semmle.python.Unit
|
||||
import site
|
||||
// Removing this import perturbs the compilation process enough that the points-to analysis gets
|
||||
// compiled -- and cached -- differently depending on whether the data flow library is imported. By
|
||||
// importing it privately here, we ensure that the points-to analysis is compiled the same way.
|
||||
private import semmle.python.dataflow.new.DataFlow
|
||||
4
python/ql/src/qlpack.lock.yml
Normal file
4
python/ql/src/qlpack.lock.yml
Normal file
@@ -0,0 +1,4 @@
|
||||
---
|
||||
dependencies: {}
|
||||
compiled: false
|
||||
lockVersion: 1.0.0
|
||||
@@ -1,5 +1,8 @@
|
||||
name: codeql-python
|
||||
version: 0.0.0
|
||||
dbscheme: semmlecode.python.dbscheme
|
||||
name: codeql/python-queries
|
||||
version: 0.0.2
|
||||
dependencies:
|
||||
codeql/python-all: ^0.0.2
|
||||
codeql/suite-helpers: ^0.0.2
|
||||
suites: codeql-suites
|
||||
extractor: python
|
||||
defaultSuiteFile: codeql-suites/python-code-scanning.qls
|
||||
|
||||
@@ -1,3 +0,0 @@
|
||||
/** DEPRECATED: Use `semmle.python.concepts.CryptoAlgorithms` instead. */
|
||||
|
||||
import semmle.python.concepts.CryptoAlgorithms
|
||||
@@ -1,3 +0,0 @@
|
||||
/** For backward compatibility */
|
||||
|
||||
import semmle.python.essa.Essa
|
||||
@@ -1,3 +0,0 @@
|
||||
/** Provides classes for working with files and folders. */
|
||||
|
||||
import semmle.python.Files
|
||||
@@ -1,605 +0,0 @@
|
||||
/**
|
||||
* Provides an implementation of _API graphs_, which are an abstract representation of the API
|
||||
* surface used and/or defined by a code base.
|
||||
*
|
||||
* The nodes of the API graph represent definitions and uses of API components. The edges are
|
||||
* directed and labeled; they specify how the components represented by nodes relate to each other.
|
||||
*/
|
||||
|
||||
private import python
|
||||
import semmle.python.dataflow.new.DataFlow
|
||||
|
||||
/**
|
||||
* Provides classes and predicates for working with APIs used in a database.
|
||||
*/
|
||||
module API {
|
||||
/**
|
||||
* An abstract representation of a definition or use of an API component such as a function
|
||||
* exported by a Python package, or its result.
|
||||
*/
|
||||
class Node extends Impl::TApiNode {
|
||||
/**
|
||||
* Gets a data-flow node corresponding to a use of the API component represented by this node.
|
||||
*
|
||||
* For example, `import re; re.escape` is a use of the `escape` function from the
|
||||
* `re` module, and `import re; re.escape("hello")` is a use of the return of that function.
|
||||
*
|
||||
* This includes indirect uses found via data flow, meaning that in
|
||||
* ```python
|
||||
* def f(x):
|
||||
* pass
|
||||
*
|
||||
* f(obj.foo)
|
||||
* ```
|
||||
* both `obj.foo` and `x` are uses of the `foo` member from `obj`.
|
||||
*/
|
||||
DataFlow::Node getAUse() {
|
||||
exists(DataFlow::LocalSourceNode src | Impl::use(this, src) |
|
||||
Impl::trackUseNode(src).flowsTo(result)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets an immediate use of the API component represented by this node.
|
||||
*
|
||||
* For example, `import re; re.escape` is a an immediate use of the `escape` member
|
||||
* from the `re` module.
|
||||
*
|
||||
* Unlike `getAUse()`, this predicate only gets the immediate references, not the indirect uses
|
||||
* found via data flow. This means that in `x = re.escape` only `re.escape` is a reference
|
||||
* to the `escape` member of `re`, neither `x` nor any node that `x` flows to is a reference to
|
||||
* this API component.
|
||||
*/
|
||||
DataFlow::LocalSourceNode getAnImmediateUse() { Impl::use(this, result) }
|
||||
|
||||
/**
|
||||
* Gets a call to the function represented by this API component.
|
||||
*/
|
||||
DataFlow::CallCfgNode getACall() { result = getReturn().getAnImmediateUse() }
|
||||
|
||||
/**
|
||||
* Gets a node representing member `m` of this API component.
|
||||
*
|
||||
* For example, a member can be:
|
||||
*
|
||||
* - A submodule of a module
|
||||
* - An attribute of an object
|
||||
*/
|
||||
bindingset[m]
|
||||
bindingset[result]
|
||||
Node getMember(string m) { result = getASuccessor(Label::member(m)) }
|
||||
|
||||
/**
|
||||
* Gets a node representing a member of this API component where the name of the member is
|
||||
* not known statically.
|
||||
*/
|
||||
Node getUnknownMember() { result = getASuccessor(Label::unknownMember()) }
|
||||
|
||||
/**
|
||||
* Gets a node representing a member of this API component where the name of the member may
|
||||
* or may not be known statically.
|
||||
*/
|
||||
Node getAMember() {
|
||||
result = getASuccessor(Label::member(_)) or
|
||||
result = getUnknownMember()
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a node representing the result of the function represented by this node.
|
||||
*
|
||||
* This predicate may have multiple results when there are multiple invocations of this API component.
|
||||
* Consider using `getACall()` if there is a need to distinguish between individual calls.
|
||||
*/
|
||||
Node getReturn() { result = getASuccessor(Label::return()) }
|
||||
|
||||
/**
|
||||
* Gets a node representing a subclass of the class represented by this node.
|
||||
*/
|
||||
Node getASubclass() { result = getASuccessor(Label::subclass()) }
|
||||
|
||||
/**
|
||||
* Gets a node representing the result from awaiting this node.
|
||||
*/
|
||||
Node getAwaited() { result = getASuccessor(Label::await()) }
|
||||
|
||||
/**
|
||||
* Gets a string representation of the lexicographically least among all shortest access paths
|
||||
* from the root to this node.
|
||||
*/
|
||||
string getPath() { result = min(string p | p = getAPath(Impl::distanceFromRoot(this)) | p) }
|
||||
|
||||
/**
|
||||
* Gets a node such that there is an edge in the API graph between this node and the other
|
||||
* one, and that edge is labeled with `lbl`.
|
||||
*/
|
||||
Node getASuccessor(string lbl) { Impl::edge(this, lbl, result) }
|
||||
|
||||
/**
|
||||
* Gets a node such that there is an edge in the API graph between that other node and
|
||||
* this one, and that edge is labeled with `lbl`
|
||||
*/
|
||||
Node getAPredecessor(string lbl) { this = result.getASuccessor(lbl) }
|
||||
|
||||
/**
|
||||
* Gets a node such that there is an edge in the API graph between this node and the other
|
||||
* one.
|
||||
*/
|
||||
Node getAPredecessor() { result = getAPredecessor(_) }
|
||||
|
||||
/**
|
||||
* Gets a node such that there is an edge in the API graph between that other node and
|
||||
* this one.
|
||||
*/
|
||||
Node getASuccessor() { result = getASuccessor(_) }
|
||||
|
||||
/**
|
||||
* Gets the data-flow node that gives rise to this node, if any.
|
||||
*/
|
||||
DataFlow::Node getInducingNode() { this = Impl::MkUse(result) }
|
||||
|
||||
/**
|
||||
* Holds if this element is at the specified location.
|
||||
* The location spans column `startcolumn` of line `startline` to
|
||||
* column `endcolumn` of line `endline` in file `filepath`.
|
||||
* For more information, see
|
||||
* [Locations](https://help.semmle.com/QL/learn-ql/locations.html).
|
||||
*/
|
||||
predicate hasLocationInfo(
|
||||
string filepath, int startline, int startcolumn, int endline, int endcolumn
|
||||
) {
|
||||
getInducingNode().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
|
||||
or
|
||||
// For nodes that do not have a meaningful location, `path` is the empty string and all other
|
||||
// parameters are zero.
|
||||
not exists(getInducingNode()) and
|
||||
filepath = "" and
|
||||
startline = 0 and
|
||||
startcolumn = 0 and
|
||||
endline = 0 and
|
||||
endcolumn = 0
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a textual representation of this element.
|
||||
*/
|
||||
abstract string toString();
|
||||
|
||||
/**
|
||||
* Gets a path of the given `length` from the root to this node.
|
||||
*/
|
||||
private string getAPath(int length) {
|
||||
this instanceof Impl::MkRoot and
|
||||
length = 0 and
|
||||
result = ""
|
||||
or
|
||||
exists(Node pred, string lbl, string predpath |
|
||||
Impl::edge(pred, lbl, this) and
|
||||
lbl != "" and
|
||||
predpath = pred.getAPath(length - 1) and
|
||||
exists(string dot | if length = 1 then dot = "" else dot = "." |
|
||||
result = predpath + dot + lbl and
|
||||
// avoid producing strings longer than 1MB
|
||||
result.length() < 1000 * 1000
|
||||
)
|
||||
) and
|
||||
length in [1 .. Impl::distanceFromRoot(this)]
|
||||
}
|
||||
|
||||
/** Gets the shortest distance from the root to this node in the API graph. */
|
||||
int getDepth() { result = Impl::distanceFromRoot(this) }
|
||||
}
|
||||
|
||||
/** The root node of an API graph. */
|
||||
class Root extends Node, Impl::MkRoot {
|
||||
override string toString() { result = "root" }
|
||||
}
|
||||
|
||||
/** A node corresponding to the use of an API component. */
|
||||
class Use extends Node, Impl::TUse {
|
||||
override string toString() {
|
||||
exists(string type |
|
||||
this = Impl::MkUse(_) and type = "Use "
|
||||
or
|
||||
this = Impl::MkModuleImport(_) and type = "ModuleImport "
|
||||
|
|
||||
result = type + getPath()
|
||||
or
|
||||
not exists(this.getPath()) and result = type + "with no path"
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/** Gets the root node. */
|
||||
Root root() { any() }
|
||||
|
||||
/**
|
||||
* Gets a node corresponding to an import of module `m`.
|
||||
*
|
||||
* Note: You should only use this predicate for top level modules. If you want nodes corresponding to a submodule,
|
||||
* you should use `.getMember` on the parent module. For example, for nodes corresponding to the module `foo.bar`,
|
||||
* use `moduleImport("foo").getMember("bar")`.
|
||||
*/
|
||||
Node moduleImport(string m) { result = Impl::MkModuleImport(m) }
|
||||
|
||||
/** Gets a node corresponding to the built-in with the given name, if any. */
|
||||
Node builtin(string n) { result = moduleImport("builtins").getMember(n) }
|
||||
|
||||
/**
|
||||
* Provides the actual implementation of API graphs, cached for performance.
|
||||
*
|
||||
* Ideally, we'd like nodes to correspond to (global) access paths, with edge labels
|
||||
* corresponding to extending the access path by one element. We also want to be able to map
|
||||
* nodes to their definitions and uses in the data-flow graph, and this should happen modulo
|
||||
* (inter-procedural) data flow.
|
||||
*
|
||||
* This, however, is not easy to implement, since access paths can have unbounded length
|
||||
* and we need some way of recognizing cycles to avoid non-termination. Unfortunately, expressing
|
||||
* a condition like "this node hasn't been involved in constructing any predecessor of
|
||||
* this node in the API graph" without negative recursion is tricky.
|
||||
*
|
||||
* So instead most nodes are directly associated with a data-flow node, representing
|
||||
* either a use or a definition of an API component. This ensures that we only have a finite
|
||||
* number of nodes. However, we can now have multiple nodes with the same access
|
||||
* path, which are essentially indistinguishable for a client of the API.
|
||||
*
|
||||
* On the other hand, a single node can have multiple access paths (which is, of
|
||||
* course, unavoidable). We pick as canonical the alphabetically least access path with
|
||||
* shortest length.
|
||||
*/
|
||||
cached
|
||||
private module Impl {
|
||||
/*
|
||||
* Modeling imports is slightly tricky because of the way we handle dotted name imports in our
|
||||
* libraries. In dotted imports such as
|
||||
*
|
||||
* ```python
|
||||
* import foo.bar.baz as fbb
|
||||
* from foo.bar.baz import quux as fbbq
|
||||
* ```
|
||||
*
|
||||
* the dotted name is simply represented as a string. We would like `fbb.quux` and `fbbq` to
|
||||
* be represented as API graph nodes with the following path:
|
||||
*
|
||||
* ```ql
|
||||
* moduleImport("foo").getMember("bar").getMember("baz").getMember("quux")
|
||||
* ```
|
||||
*
|
||||
* To do this, we produce an API graph node for each dotted name prefix we find in the set of
|
||||
* imports. Thus, for the above two imports, we would get nodes for
|
||||
*
|
||||
* ```python
|
||||
* foo
|
||||
* foo.bar
|
||||
* foo.bar.baz
|
||||
* ```
|
||||
*
|
||||
* Only the first of these can act as the beginning of a path (and become a
|
||||
* `moduleImport`-labeled edge from the global root node).
|
||||
*
|
||||
* (Using prefixes rather than simply `foo`, `bar`, and `baz` is important. We don't want
|
||||
* potential crosstalk between `foo.bar.baz` and `ham.bar.eggs`.)
|
||||
*
|
||||
* We then add `getMember` edges between these prefixes: `foo` steps to `foo.bar` via an edge
|
||||
* labeled `getMember("bar")` and so on.
|
||||
*
|
||||
* When we then see `import foo.bar.baz as fbb`, the data-flow node `fbb` gets marked as a use
|
||||
* of the API graph node corresponding to the prefix `foo.bar.baz`. Because of the edges leading to
|
||||
* this node, it is reachable via `moduleImport("foo").getMember("bar").getMember("baz")` and
|
||||
* thus `fbb.quux` is reachable via the path mentioned above.
|
||||
*
|
||||
* When we see `from foo.bar.baz import quux as fbbq` a similar thing happens. First, `foo.bar.baz`
|
||||
* is seen as a use of the API graph node as before. Then `import quux as fbbq` is seen as
|
||||
* a member lookup of `quux` on the API graph node for `foo.bar.baz`, and then finally the
|
||||
* data-flow node `fbbq` is marked as a use of the same path mentioned above.
|
||||
*
|
||||
* Finally, in a non-aliased import such as
|
||||
*
|
||||
* ```python
|
||||
* import foo.bar.baz
|
||||
* ```
|
||||
*
|
||||
* we only consider this as a definition of the name `foo` (thus making it a use of the corresponding
|
||||
* API graph node for the prefix `foo`), in accordance with the usual semantics of Python.
|
||||
*/
|
||||
|
||||
cached
|
||||
newtype TApiNode =
|
||||
/** The root of the API graph. */
|
||||
MkRoot() or
|
||||
/** An abstract representative for imports of the module called `name`. */
|
||||
MkModuleImport(string name) {
|
||||
// Ignore the following module name for Python 2, as we alias `__builtin__` to `builtins` elsewhere
|
||||
(name != "__builtin__" or major_version() = 3) and
|
||||
(
|
||||
imports(_, name)
|
||||
or
|
||||
// When we `import foo.bar.baz` we want to create API graph nodes also for the prefixes
|
||||
// `foo` and `foo.bar`:
|
||||
name = any(ImportExpr e | not e.isRelative()).getAnImportedModuleName()
|
||||
)
|
||||
or
|
||||
// The `builtins` module should always be implicitly available
|
||||
name = "builtins"
|
||||
} or
|
||||
/** A use of an API member at the node `nd`. */
|
||||
MkUse(DataFlow::Node nd) { use(_, _, nd) }
|
||||
|
||||
class TUse = MkModuleImport or MkUse;
|
||||
|
||||
/**
|
||||
* Holds if the dotted module name `sub` refers to the `member` member of `base`.
|
||||
*
|
||||
* For instance, `prefix_member("foo.bar", "baz", "foo.bar.baz")` would hold.
|
||||
*/
|
||||
private predicate prefix_member(TApiNode base, string member, TApiNode sub) {
|
||||
exists(string sub_str, string regexp |
|
||||
regexp = "(.+)[.]([^.]+)" and
|
||||
base = MkModuleImport(sub_str.regexpCapture(regexp, 1)) and
|
||||
member = sub_str.regexpCapture(regexp, 2) and
|
||||
sub = MkModuleImport(sub_str)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `imp` is a data-flow node inside an import statement that refers to a module by the
|
||||
* name `name`.
|
||||
*
|
||||
* Ignores relative imports, such as `from ..foo.bar import baz`.
|
||||
*/
|
||||
private predicate imports(DataFlow::Node imp, string name) {
|
||||
exists(ImportExprNode iexpr |
|
||||
imp.asCfgNode() = iexpr and
|
||||
not iexpr.getNode().isRelative() and
|
||||
name = iexpr.getNode().getImportedModuleName()
|
||||
)
|
||||
}
|
||||
|
||||
/** Gets the name of a known built-in. */
|
||||
private string getBuiltInName() {
|
||||
// These lists were created by inspecting the `builtins` and `__builtin__` modules in
|
||||
// Python 3 and 2 respectively, using the `dir` built-in.
|
||||
// Built-in functions and exceptions shared between Python 2 and 3
|
||||
result in [
|
||||
"abs", "all", "any", "bin", "bool", "bytearray", "callable", "chr", "classmethod",
|
||||
"compile", "complex", "delattr", "dict", "dir", "divmod", "enumerate", "eval", "filter",
|
||||
"float", "format", "frozenset", "getattr", "globals", "hasattr", "hash", "help", "hex",
|
||||
"id", "input", "int", "isinstance", "issubclass", "iter", "len", "list", "locals", "map",
|
||||
"max", "memoryview", "min", "next", "object", "oct", "open", "ord", "pow", "print",
|
||||
"property", "range", "repr", "reversed", "round", "set", "setattr", "slice", "sorted",
|
||||
"staticmethod", "str", "sum", "super", "tuple", "type", "vars", "zip", "__import__",
|
||||
// Exceptions
|
||||
"ArithmeticError", "AssertionError", "AttributeError", "BaseException", "BufferError",
|
||||
"BytesWarning", "DeprecationWarning", "EOFError", "EnvironmentError", "Exception",
|
||||
"FloatingPointError", "FutureWarning", "GeneratorExit", "IOError", "ImportError",
|
||||
"ImportWarning", "IndentationError", "IndexError", "KeyError", "KeyboardInterrupt",
|
||||
"LookupError", "MemoryError", "NameError", "NotImplemented", "NotImplementedError",
|
||||
"OSError", "OverflowError", "PendingDeprecationWarning", "ReferenceError", "RuntimeError",
|
||||
"RuntimeWarning", "StandardError", "StopIteration", "SyntaxError", "SyntaxWarning",
|
||||
"SystemError", "SystemExit", "TabError", "TypeError", "UnboundLocalError",
|
||||
"UnicodeDecodeError", "UnicodeEncodeError", "UnicodeError", "UnicodeTranslateError",
|
||||
"UnicodeWarning", "UserWarning", "ValueError", "Warning", "ZeroDivisionError",
|
||||
// Added for compatibility
|
||||
"exec"
|
||||
]
|
||||
or
|
||||
// Built-in constants shared between Python 2 and 3
|
||||
result in ["False", "True", "None", "NotImplemented", "Ellipsis", "__debug__"]
|
||||
or
|
||||
// Python 3 only
|
||||
result in [
|
||||
"ascii", "breakpoint", "bytes", "exec",
|
||||
// Exceptions
|
||||
"BlockingIOError", "BrokenPipeError", "ChildProcessError", "ConnectionAbortedError",
|
||||
"ConnectionError", "ConnectionRefusedError", "ConnectionResetError", "FileExistsError",
|
||||
"FileNotFoundError", "InterruptedError", "IsADirectoryError", "ModuleNotFoundError",
|
||||
"NotADirectoryError", "PermissionError", "ProcessLookupError", "RecursionError",
|
||||
"ResourceWarning", "StopAsyncIteration", "TimeoutError"
|
||||
]
|
||||
or
|
||||
// Python 2 only
|
||||
result in [
|
||||
"basestring", "cmp", "execfile", "file", "long", "raw_input", "reduce", "reload",
|
||||
"unichr", "unicode", "xrange"
|
||||
]
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a data flow node that is likely to refer to a built-in with the name `name`.
|
||||
*
|
||||
* Currently this is an over-approximation, and may not account for things like overwriting a
|
||||
* built-in with a different value.
|
||||
*/
|
||||
private DataFlow::Node likely_builtin(string name) {
|
||||
exists(Module m |
|
||||
result.asCfgNode() =
|
||||
any(NameNode n |
|
||||
possible_builtin_accessed_in_module(n, name, m) and
|
||||
not possible_builtin_defined_in_module(name, m)
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if a global variable called `name` (which is also the name of a built-in) is assigned
|
||||
* a value in the module `m`.
|
||||
*/
|
||||
private predicate possible_builtin_defined_in_module(string name, Module m) {
|
||||
exists(NameNode n |
|
||||
not exists(LocalVariable v | n.defines(v)) and
|
||||
n.isStore() and
|
||||
name = n.getId() and
|
||||
name = getBuiltInName() and
|
||||
m = n.getEnclosingModule()
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `n` is an access of a global variable called `name` (which is also the name of a
|
||||
* built-in) inside the module `m`.
|
||||
*/
|
||||
private predicate possible_builtin_accessed_in_module(NameNode n, string name, Module m) {
|
||||
n.isGlobal() and
|
||||
n.isLoad() and
|
||||
name = n.getId() and
|
||||
name = getBuiltInName() and
|
||||
m = n.getEnclosingModule()
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `ref` is a use of a node that should have an incoming edge from `base` labeled
|
||||
* `lbl` in the API graph.
|
||||
*/
|
||||
cached
|
||||
predicate use(TApiNode base, string lbl, DataFlow::Node ref) {
|
||||
exists(DataFlow::LocalSourceNode src, DataFlow::LocalSourceNode pred |
|
||||
// First, we find a predecessor of the node `ref` that we want to determine. The predecessor
|
||||
// is any node that is a type-tracked use of a data flow node (`src`), which is itself a
|
||||
// reference to the API node `base`. Thus, `pred` and `src` both represent uses of `base`.
|
||||
//
|
||||
// Once we have identified the predecessor, we define its relation to the successor `ref` as
|
||||
// well as the label on the edge from `pred` to `ref`. This label describes the nature of
|
||||
// the relationship between `pred` and `ref`.
|
||||
use(base, src) and pred = trackUseNode(src)
|
||||
|
|
||||
// Referring to an attribute on a node that is a use of `base`:
|
||||
lbl = Label::memberFromRef(ref) and
|
||||
ref = pred.getAnAttributeReference()
|
||||
or
|
||||
// Calling a node that is a use of `base`
|
||||
lbl = Label::return() and
|
||||
ref = pred.getACall()
|
||||
or
|
||||
// Subclassing a node
|
||||
lbl = Label::subclass() and
|
||||
exists(DataFlow::Node superclass | pred.flowsTo(superclass) |
|
||||
ref.asExpr().(ClassExpr).getABase() = superclass.asExpr()
|
||||
)
|
||||
or
|
||||
// awaiting
|
||||
exists(Await await, DataFlow::Node awaitedValue |
|
||||
lbl = Label::await() and
|
||||
ref.asExpr() = await and
|
||||
await.getValue() = awaitedValue.asExpr() and
|
||||
pred.flowsTo(awaitedValue)
|
||||
)
|
||||
)
|
||||
or
|
||||
// Built-ins, treated as members of the module `builtins`
|
||||
base = MkModuleImport("builtins") and
|
||||
lbl = Label::member(any(string name | ref = likely_builtin(name)))
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `ref` is a use of node `nd`.
|
||||
*/
|
||||
cached
|
||||
predicate use(TApiNode nd, DataFlow::Node ref) {
|
||||
exists(string name |
|
||||
nd = MkModuleImport(name) and
|
||||
imports(ref, name)
|
||||
)
|
||||
or
|
||||
// Ensure the Python 2 `__builtin__` module gets the name of the Python 3 `builtins` module.
|
||||
major_version() = 2 and
|
||||
nd = MkModuleImport("builtins") and
|
||||
imports(ref, "__builtin__")
|
||||
or
|
||||
nd = MkUse(ref)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a data-flow node to which `src`, which is a use of an API-graph node, flows.
|
||||
*
|
||||
* The flow from `src` to that node may be inter-procedural.
|
||||
*/
|
||||
private DataFlow::TypeTrackingNode trackUseNode(
|
||||
DataFlow::LocalSourceNode src, DataFlow::TypeTracker t
|
||||
) {
|
||||
t.start() and
|
||||
use(_, src) and
|
||||
result = src
|
||||
or
|
||||
exists(DataFlow::TypeTracker t2 | result = trackUseNode(src, t2).track(t2, t))
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a data-flow node to which `src`, which is a use of an API-graph node, flows.
|
||||
*
|
||||
* The flow from `src` to that node may be inter-procedural.
|
||||
*/
|
||||
cached
|
||||
DataFlow::LocalSourceNode trackUseNode(DataFlow::LocalSourceNode src) {
|
||||
result = trackUseNode(src, DataFlow::TypeTracker::end()) and
|
||||
not result instanceof DataFlow::ModuleVariableNode
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if there is an edge from `pred` to `succ` in the API graph that is labeled with `lbl`.
|
||||
*/
|
||||
cached
|
||||
predicate edge(TApiNode pred, string lbl, TApiNode succ) {
|
||||
/* There's an edge from the root node for each imported module. */
|
||||
exists(string m |
|
||||
pred = MkRoot() and
|
||||
lbl = Label::mod(m)
|
||||
|
|
||||
succ = MkModuleImport(m) and
|
||||
// Only allow undotted names to count as base modules.
|
||||
not m.matches("%.%")
|
||||
)
|
||||
or
|
||||
/* Step from the dotted module name `foo.bar` to `foo.bar.baz` along an edge labeled `baz` */
|
||||
exists(string member |
|
||||
prefix_member(pred, member, succ) and
|
||||
lbl = Label::member(member)
|
||||
)
|
||||
or
|
||||
/* Every node that is a use of an API component is itself added to the API graph. */
|
||||
exists(DataFlow::LocalSourceNode ref |
|
||||
use(pred, lbl, ref) and
|
||||
succ = MkUse(ref)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if there is an edge from `pred` to `succ` in the API graph.
|
||||
*/
|
||||
private predicate edge(TApiNode pred, TApiNode succ) { edge(pred, _, succ) }
|
||||
|
||||
/** Gets the shortest distance from the root to `nd` in the API graph. */
|
||||
cached
|
||||
int distanceFromRoot(TApiNode nd) = shortestDistances(MkRoot/0, edge/2)(_, nd, result)
|
||||
}
|
||||
}
|
||||
|
||||
private module Label {
|
||||
/** Gets the edge label for the module `m`. */
|
||||
bindingset[m]
|
||||
bindingset[result]
|
||||
string mod(string m) { result = "moduleImport(\"" + m + "\")" }
|
||||
|
||||
/** Gets the `member` edge label for member `m`. */
|
||||
bindingset[m]
|
||||
bindingset[result]
|
||||
string member(string m) { result = "getMember(\"" + m + "\")" }
|
||||
|
||||
/** Gets the `member` edge label for the unknown member. */
|
||||
string unknownMember() { result = "getUnknownMember()" }
|
||||
|
||||
/** Gets the `member` edge label for the given attribute reference. */
|
||||
string memberFromRef(DataFlow::AttrRef pr) {
|
||||
result = member(pr.getAttributeName())
|
||||
or
|
||||
not exists(pr.getAttributeName()) and
|
||||
result = unknownMember()
|
||||
}
|
||||
|
||||
/** Gets the `return` edge label. */
|
||||
string return() { result = "getReturn()" }
|
||||
|
||||
/** Gets the `subclass` edge label. */
|
||||
string subclass() { result = "getASubclass()" }
|
||||
|
||||
/** Gets the `await` edge label. */
|
||||
string await() { result = "getAwaited()" }
|
||||
}
|
||||
@@ -1,173 +0,0 @@
|
||||
import python
|
||||
|
||||
/** Syntactic node (Class, Function, Module, Expr, Stmt or Comprehension) corresponding to a flow node */
|
||||
abstract class AstNode extends AstNode_ {
|
||||
/*
|
||||
* Special comment for documentation generation.
|
||||
* All subclasses of `AstNode` that represent concrete syntax should have
|
||||
* a comment of the form:
|
||||
*/
|
||||
|
||||
/* syntax: ... */
|
||||
/** Gets the scope that this node occurs in */
|
||||
abstract Scope getScope();
|
||||
|
||||
/**
|
||||
* Gets a flow node corresponding directly to this node.
|
||||
* NOTE: For some statements and other purely syntactic elements,
|
||||
* there may not be a `ControlFlowNode`
|
||||
*/
|
||||
ControlFlowNode getAFlowNode() { py_flow_bb_node(result, this, _, _) }
|
||||
|
||||
/** Gets the location for this AST node */
|
||||
Location getLocation() { none() }
|
||||
|
||||
/**
|
||||
* Whether this syntactic element is artificial, that is it is generated
|
||||
* by the compiler and is not present in the source
|
||||
*/
|
||||
predicate isArtificial() { none() }
|
||||
|
||||
/**
|
||||
* Gets a child node of this node in the AST. This predicate exists to aid exploration of the AST
|
||||
* and other experiments. The child-parent relation may not be meaningful.
|
||||
* For a more meaningful relation in terms of dependency use
|
||||
* Expr.getASubExpression(), Stmt.getASubStatement(), Stmt.getASubExpression() or
|
||||
* Scope.getAStmt().
|
||||
*/
|
||||
abstract AstNode getAChildNode();
|
||||
|
||||
/**
|
||||
* Gets the parent node of this node in the AST. This predicate exists to aid exploration of the AST
|
||||
* and other experiments. The child-parent relation may not be meaningful.
|
||||
* For a more meaningful relation in terms of dependency use
|
||||
* Expr.getASubExpression(), Stmt.getASubStatement(), Stmt.getASubExpression() or
|
||||
* Scope.getAStmt() applied to the parent.
|
||||
*/
|
||||
AstNode getParentNode() { result.getAChildNode() = this }
|
||||
|
||||
/** Whether this contains `inner` syntactically */
|
||||
predicate contains(AstNode inner) { this.getAChildNode+() = inner }
|
||||
|
||||
pragma[noinline]
|
||||
private predicate containsInScope(AstNode inner, Scope scope) {
|
||||
this.contains(inner) and
|
||||
not inner instanceof Scope and
|
||||
scope = this.getScope()
|
||||
}
|
||||
|
||||
/** Whether this contains `inner` syntactically and `inner` has the same scope as `this` */
|
||||
predicate containsInScope(AstNode inner) { this.containsInScope(inner, inner.getScope()) }
|
||||
}
|
||||
|
||||
/* Parents */
|
||||
/** Internal implementation class */
|
||||
library class FunctionParent extends FunctionParent_ { }
|
||||
|
||||
/** Internal implementation class */
|
||||
library class ArgumentsParent extends ArgumentsParent_ { }
|
||||
|
||||
/** Internal implementation class */
|
||||
library class ExprListParent extends ExprListParent_ { }
|
||||
|
||||
/** Internal implementation class */
|
||||
library class ExprContextParent extends ExprContextParent_ { }
|
||||
|
||||
/** Internal implementation class */
|
||||
library class StmtListParent extends StmtListParent_ { }
|
||||
|
||||
/** Internal implementation class */
|
||||
library class StrListParent extends StrListParent_ { }
|
||||
|
||||
/** Internal implementation class */
|
||||
library class ExprParent extends ExprParent_ { }
|
||||
|
||||
library class DictItem extends DictItem_, AstNode {
|
||||
override string toString() { result = DictItem_.super.toString() }
|
||||
|
||||
override AstNode getAChildNode() { none() }
|
||||
|
||||
override Scope getScope() { none() }
|
||||
}
|
||||
|
||||
/** A comprehension part, the 'for a in seq' part of [ a * a for a in seq ] */
|
||||
class Comprehension extends Comprehension_, AstNode {
|
||||
/** Gets the scope of this comprehension */
|
||||
override Scope getScope() {
|
||||
/* Comprehensions exists only in Python 2 list comprehensions, so their scope is that of the list comp. */
|
||||
exists(ListComp l | this = l.getAGenerator() | result = l.getScope())
|
||||
}
|
||||
|
||||
override string toString() { result = "Comprehension" }
|
||||
|
||||
override Location getLocation() { result = Comprehension_.super.getLocation() }
|
||||
|
||||
override AstNode getAChildNode() { result = this.getASubExpression() }
|
||||
|
||||
Expr getASubExpression() {
|
||||
result = this.getIter() or
|
||||
result = this.getAnIf() or
|
||||
result = this.getTarget()
|
||||
}
|
||||
}
|
||||
|
||||
class BytesOrStr extends BytesOrStr_ { }
|
||||
|
||||
/**
|
||||
* Part of a string literal formed by implicit concatenation.
|
||||
* For example the string literal "abc" expressed in the source as `"a" "b" "c"`
|
||||
* would be composed of three `StringPart`s.
|
||||
*/
|
||||
class StringPart extends StringPart_, AstNode {
|
||||
override Scope getScope() {
|
||||
exists(Bytes b | this = b.getAnImplicitlyConcatenatedPart() | result = b.getScope())
|
||||
or
|
||||
exists(Unicode u | this = u.getAnImplicitlyConcatenatedPart() | result = u.getScope())
|
||||
}
|
||||
|
||||
override AstNode getAChildNode() { none() }
|
||||
|
||||
override string toString() { result = StringPart_.super.toString() }
|
||||
|
||||
override Location getLocation() { result = StringPart_.super.getLocation() }
|
||||
}
|
||||
|
||||
class StringPartList extends StringPartList_ { }
|
||||
|
||||
/* **** Lists ***/
|
||||
/** A parameter list */
|
||||
class ParameterList extends @py_parameter_list {
|
||||
Function getParent() { py_parameter_lists(this, result) }
|
||||
|
||||
/** Gets a parameter */
|
||||
Parameter getAnItem() {
|
||||
/* Item can be a Name or a Tuple, both of which are expressions */
|
||||
py_exprs(result, _, this, _)
|
||||
}
|
||||
|
||||
/** Gets the nth parameter */
|
||||
Parameter getItem(int index) {
|
||||
/* Item can be a Name or a Tuple, both of which are expressions */
|
||||
py_exprs(result, _, this, index)
|
||||
}
|
||||
|
||||
string toString() { result = "ParameterList" }
|
||||
}
|
||||
|
||||
/** A list of Comprehensions (for generating parts of a set, list or dictionary comprehension) */
|
||||
class ComprehensionList extends ComprehensionList_ { }
|
||||
|
||||
/** A list of expressions */
|
||||
class ExprList extends ExprList_ {
|
||||
/* syntax: Expr, ... */
|
||||
}
|
||||
|
||||
library class DictItemList extends DictItemList_ { }
|
||||
|
||||
library class DictItemListParent extends DictItemListParent_ { }
|
||||
|
||||
/** A list of strings (the primitive type string not Bytes or Unicode) */
|
||||
class StringList extends StringList_ { }
|
||||
|
||||
/** A list of aliases in an import statement */
|
||||
class AliasList extends AliasList_ { }
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,175 +0,0 @@
|
||||
/**
|
||||
* Provides classes representing Python classes.
|
||||
*/
|
||||
|
||||
import python
|
||||
|
||||
/**
|
||||
* An (artificial) expression corresponding to a class definition.
|
||||
* It is recommended to use `ClassDef` instead.
|
||||
*/
|
||||
class ClassExpr extends ClassExpr_ {
|
||||
/** Gets the metaclass expression */
|
||||
Expr getMetaClass() {
|
||||
if major_version() = 3
|
||||
then
|
||||
exists(Keyword metacls |
|
||||
this.getAKeyword() = metacls and
|
||||
metacls.getArg() = "metaclass" and
|
||||
result = metacls.getValue()
|
||||
)
|
||||
else
|
||||
exists(Assign a |
|
||||
a = this.getInnerScope().getAStmt() and
|
||||
a.getATarget().(Name).getId() = "__metaclass__" and
|
||||
result = a.getValue()
|
||||
)
|
||||
}
|
||||
|
||||
/** Gets the nth keyword argument of this class definition. */
|
||||
override DictUnpackingOrKeyword getKeyword(int index) {
|
||||
result = this.getKeywords().getItem(index)
|
||||
}
|
||||
|
||||
/** Gets a keyword argument of this class definition. */
|
||||
override DictUnpackingOrKeyword getAKeyword() { result = this.getKeywords().getAnItem() }
|
||||
|
||||
override Expr getASubExpression() {
|
||||
result = this.getABase() or
|
||||
result = this.getAKeyword().getValue() or
|
||||
result = this.getKwargs() or
|
||||
result = this.getStarargs()
|
||||
}
|
||||
|
||||
/** Gets a call corresponding to a decorator of this class definition. */
|
||||
Call getADecoratorCall() {
|
||||
result.getArg(0) = this or
|
||||
result.getArg(0) = this.getADecoratorCall()
|
||||
}
|
||||
|
||||
/** Gets a decorator of this function expression */
|
||||
Expr getADecorator() { result = this.getADecoratorCall().getFunc() }
|
||||
|
||||
override AstNode getAChildNode() {
|
||||
result = this.getASubExpression()
|
||||
or
|
||||
result = this.getInnerScope()
|
||||
}
|
||||
|
||||
/** Gets a tuple (*) argument of this class definition. */
|
||||
Expr getStarargs() { result = this.getABase().(Starred).getValue() }
|
||||
|
||||
/** Gets a dictionary (**) argument of this class definition. */
|
||||
Expr getKwargs() { result = this.getAKeyword().(DictUnpacking).getValue() }
|
||||
}
|
||||
|
||||
/** A class statement. Note that ClassDef extends Assign as a class definition binds the newly created class */
|
||||
class ClassDef extends Assign {
|
||||
/* syntax: class name(...): ... */
|
||||
ClassDef() {
|
||||
/* This is an artificial assignment the rhs of which is a (possibly decorated) ClassExpr */
|
||||
exists(ClassExpr c | this.getValue() = c or this.getValue() = c.getADecoratorCall())
|
||||
}
|
||||
|
||||
override string toString() { result = "ClassDef" }
|
||||
|
||||
/** Gets the class for this statement */
|
||||
Class getDefinedClass() {
|
||||
exists(ClassExpr c | this.getValue() = c or this.getValue() = c.getADecoratorCall() |
|
||||
result = c.getInnerScope()
|
||||
)
|
||||
}
|
||||
|
||||
override Stmt getLastStatement() { result = this.getDefinedClass().getLastStatement() }
|
||||
}
|
||||
|
||||
/** The scope of a class. This is the scope of all the statements within the class definition */
|
||||
class Class extends Class_, Scope, AstNode {
|
||||
/**
|
||||
* Use getADecorator() instead of getDefinition().getADecorator()
|
||||
* Use getMetaClass() instead of getDefinition().getMetaClass()
|
||||
*/
|
||||
deprecated ClassExpr getDefinition() { result = this.getParent() }
|
||||
|
||||
/** Gets a defined init method of this class */
|
||||
Function getInitMethod() { result.getScope() = this and result.isInitMethod() }
|
||||
|
||||
/** Gets a method defined in this class */
|
||||
Function getAMethod() { result.getScope() = this }
|
||||
|
||||
override Location getLocation() { py_scope_location(result, this) }
|
||||
|
||||
/** Gets the scope (module, class or function) in which this class is defined */
|
||||
override Scope getEnclosingScope() { result = this.getParent().getScope() }
|
||||
|
||||
/** Use getEnclosingScope() instead */
|
||||
override Scope getScope() { result = this.getParent().getScope() }
|
||||
|
||||
override string toString() { result = "Class " + this.getName() }
|
||||
|
||||
/** Gets the statements forming the body of this class */
|
||||
override StmtList getBody() { result = Class_.super.getBody() }
|
||||
|
||||
/** Gets the nth statement in the class */
|
||||
override Stmt getStmt(int index) { result = Class_.super.getStmt(index) }
|
||||
|
||||
/** Gets a statement in the class */
|
||||
override Stmt getAStmt() { result = Class_.super.getAStmt() }
|
||||
|
||||
/** Gets the name used to define this class */
|
||||
override string getName() { result = Class_.super.getName() }
|
||||
|
||||
/** Holds if this expression may have a side effect (as determined purely from its syntax). */
|
||||
predicate hasSideEffects() { any() }
|
||||
|
||||
/** Holds if this is probably a mixin (has 'mixin' or similar in name or docstring) */
|
||||
predicate isProbableMixin() {
|
||||
(
|
||||
this.getName().toLowerCase().matches("%mixin%")
|
||||
or
|
||||
this.getDocString().getText().toLowerCase().matches("%mixin%")
|
||||
or
|
||||
this.getDocString().getText().toLowerCase().matches("%mix-in%")
|
||||
)
|
||||
}
|
||||
|
||||
override AstNode getAChildNode() { result = this.getAStmt() }
|
||||
|
||||
/** Gets a decorator of this class. */
|
||||
Expr getADecorator() { result = this.getParent().getADecorator() }
|
||||
|
||||
/** Gets the metaclass expression */
|
||||
Expr getMetaClass() { result = this.getParent().getMetaClass() }
|
||||
|
||||
/** Gets the ClassObject corresponding to this class */
|
||||
ClassObject getClassObject() { result.getOrigin() = this.getParent() }
|
||||
|
||||
/** Gets the nth base of this class definition. */
|
||||
Expr getBase(int index) { result = this.getParent().getBase(index) }
|
||||
|
||||
/** Gets a base of this class definition. */
|
||||
Expr getABase() { result = this.getParent().getABase() }
|
||||
|
||||
/** Gets the metrics for this class */
|
||||
ClassMetrics getMetrics() { result = this }
|
||||
|
||||
/**
|
||||
* Gets the qualified name for this class.
|
||||
* Should return the same name as the `__qualname__` attribute on classes in Python 3.
|
||||
*/
|
||||
string getQualifiedName() {
|
||||
this.getScope() instanceof Module and result = this.getName()
|
||||
or
|
||||
exists(string enclosing_name |
|
||||
enclosing_name = this.getScope().(Function).getQualifiedName()
|
||||
or
|
||||
enclosing_name = this.getScope().(Class).getQualifiedName()
|
||||
|
|
||||
result = enclosing_name + "." + this.getName()
|
||||
)
|
||||
}
|
||||
|
||||
override predicate containsInScope(AstNode inner) { Scope.super.containsInScope(inner) }
|
||||
|
||||
override predicate contains(AstNode inner) { Scope.super.contains(inner) }
|
||||
}
|
||||
@@ -1,104 +0,0 @@
|
||||
/**
|
||||
* Provides classes representing comments in Python.
|
||||
*/
|
||||
|
||||
import python
|
||||
|
||||
/** A source code comment */
|
||||
class Comment extends @py_comment {
|
||||
/** Gets the full text of the comment including the leading '#' */
|
||||
string getText() { py_comments(this, result, _) }
|
||||
|
||||
/** Gets the contents of the comment excluding the leading '#' */
|
||||
string getContents() { result = this.getText().suffix(1) }
|
||||
|
||||
Location getLocation() { py_comments(this, _, result) }
|
||||
|
||||
/** Gets a textual representation of this element. */
|
||||
string toString() { result = "Comment " + this.getText() }
|
||||
|
||||
/**
|
||||
* Gets this immediately following comment.
|
||||
* Blanks line are allowed between this comment and the following comment,
|
||||
* but code or other comments are not.
|
||||
*/
|
||||
Comment getFollowing() {
|
||||
exists(File f, int n | this.file_line(f, n) |
|
||||
result.file_line(f, n + 1)
|
||||
or
|
||||
result.file_line(f, n + 2) and f.emptyLine(n + 1)
|
||||
or
|
||||
result.file_line(f, n + 3) and f.emptyLine(n + 2) and f.emptyLine(n + 1)
|
||||
)
|
||||
}
|
||||
|
||||
private predicate file_line(File f, int n) {
|
||||
this.getLocation().getFile() = f and
|
||||
this.getLocation().getStartLine() = n
|
||||
}
|
||||
}
|
||||
|
||||
private predicate comment_block_part(Comment start, Comment part, int i) {
|
||||
not exists(Comment prev | prev.getFollowing() = part) and
|
||||
exists(Comment following | part.getFollowing() = following) and
|
||||
start = part and
|
||||
i = 1
|
||||
or
|
||||
exists(Comment prev |
|
||||
comment_block_part(start, prev, i - 1) and
|
||||
part = prev.getFollowing()
|
||||
)
|
||||
}
|
||||
|
||||
/** A block of consecutive comments */
|
||||
class CommentBlock extends @py_comment {
|
||||
CommentBlock() { comment_block_part(this, _, _) }
|
||||
|
||||
private Comment last() { comment_block_part(this, result, this.length()) }
|
||||
|
||||
/** Gets a textual representation of this element. */
|
||||
string toString() { result = "Comment block" }
|
||||
|
||||
/** The length of this comment block (in comments) */
|
||||
int length() { result = max(int i | comment_block_part(this, _, i)) }
|
||||
|
||||
/**
|
||||
* Holds if this element is at the specified location.
|
||||
* The location spans column `startcolumn` of line `startline` to
|
||||
* column `endcolumn` of line `endline` in file `filepath`.
|
||||
* For more information, see
|
||||
* [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html).
|
||||
*/
|
||||
predicate hasLocationInfo(
|
||||
string filepath, int startline, int startcolumn, int endline, int endcolumn
|
||||
) {
|
||||
this.(Comment).getLocation().hasLocationInfo(filepath, startline, startcolumn, _, _) and
|
||||
exists(Comment end | end = this.last() |
|
||||
end.getLocation().hasLocationInfo(_, _, _, endline, endcolumn)
|
||||
)
|
||||
}
|
||||
|
||||
/** Holds if this comment block contains `c`. */
|
||||
predicate contains(Comment c) {
|
||||
comment_block_part(this, c, _)
|
||||
or
|
||||
this = c
|
||||
}
|
||||
|
||||
/** Gets a string representation of this comment block. */
|
||||
string getContents() {
|
||||
result =
|
||||
concat(Comment c, int i |
|
||||
comment_block_part(this, c, i)
|
||||
or
|
||||
this = c and i = 0
|
||||
|
|
||||
c.getContents() order by i
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/** A type-hint comment. Any comment that starts with `# type:` */
|
||||
class TypeHintComment extends Comment {
|
||||
TypeHintComment() { this.getText().regexpMatch("# +type:.*") }
|
||||
}
|
||||
@@ -1,524 +0,0 @@
|
||||
/**
|
||||
* Provides classes representing comparison operators.
|
||||
*/
|
||||
|
||||
import python
|
||||
|
||||
/** A class representing the six comparison operators, ==, !=, <, <=, > and >=. */
|
||||
class CompareOp extends int {
|
||||
CompareOp() { this in [1 .. 6] }
|
||||
|
||||
/** Gets the logical inverse operator */
|
||||
CompareOp invert() {
|
||||
this = eq() and result = ne()
|
||||
or
|
||||
this = ne() and result = eq()
|
||||
or
|
||||
this = lt() and result = ge()
|
||||
or
|
||||
this = gt() and result = le()
|
||||
or
|
||||
this = le() and result = gt()
|
||||
or
|
||||
this = ge() and result = lt()
|
||||
}
|
||||
|
||||
/** Gets the reverse operator (swapping the operands) */
|
||||
CompareOp reverse() {
|
||||
this = eq() and result = eq()
|
||||
or
|
||||
this = ne() and result = ne()
|
||||
or
|
||||
this = lt() and result = gt()
|
||||
or
|
||||
this = gt() and result = lt()
|
||||
or
|
||||
this = le() and result = ge()
|
||||
or
|
||||
this = ge() and result = le()
|
||||
}
|
||||
|
||||
/** Gets the textual representation of `this`. */
|
||||
string repr() {
|
||||
this = eq() and result = "=="
|
||||
or
|
||||
this = ne() and result = "!="
|
||||
or
|
||||
this = lt() and result = "<"
|
||||
or
|
||||
this = gt() and result = ">"
|
||||
or
|
||||
this = le() and result = "<="
|
||||
or
|
||||
this = ge() and result = ">="
|
||||
}
|
||||
|
||||
/** Holds if `op` is the `Cmpop` corresponding to `this`. */
|
||||
predicate forOp(Cmpop op) {
|
||||
op instanceof Eq and this = eq()
|
||||
or
|
||||
op instanceof NotEq and this = ne()
|
||||
or
|
||||
op instanceof Lt and this = lt()
|
||||
or
|
||||
op instanceof LtE and this = le()
|
||||
or
|
||||
op instanceof Gt and this = gt()
|
||||
or
|
||||
op instanceof GtE and this = ge()
|
||||
}
|
||||
|
||||
/** Return this if isTrue is true, otherwise returns the inverse */
|
||||
CompareOp conditional(boolean isTrue) {
|
||||
result = this and isTrue = true
|
||||
or
|
||||
result = this.invert() and isTrue = false
|
||||
}
|
||||
}
|
||||
|
||||
/** The `CompareOp` for "equals". */
|
||||
CompareOp eq() { result = 1 }
|
||||
|
||||
/** The `CompareOp` for "not equals". */
|
||||
CompareOp ne() { result = 2 }
|
||||
|
||||
/** The `CompareOp` for "less than". */
|
||||
CompareOp lt() { result = 3 }
|
||||
|
||||
/** The `CompareOp` for "less than or equal to". */
|
||||
CompareOp le() { result = 4 }
|
||||
|
||||
/** The `CompareOp` for "greater than". */
|
||||
CompareOp gt() { result = 5 }
|
||||
|
||||
/** The `CompareOp` for "greater than or equal to". */
|
||||
CompareOp ge() { result = 6 }
|
||||
|
||||
/* Workaround precision limits in floating point numbers */
|
||||
bindingset[x]
|
||||
private predicate ok_magnitude(float x) {
|
||||
x > -9007199254740992.0 and // -2**53
|
||||
x < 9007199254740992.0 // 2**53
|
||||
}
|
||||
|
||||
bindingset[x, y]
|
||||
private float add(float x, float y) {
|
||||
ok_magnitude(x) and
|
||||
ok_magnitude(y) and
|
||||
ok_magnitude(result) and
|
||||
result = x + y
|
||||
}
|
||||
|
||||
bindingset[x, y]
|
||||
private float sub(float x, float y) {
|
||||
ok_magnitude(x) and
|
||||
ok_magnitude(y) and
|
||||
ok_magnitude(result) and
|
||||
result = x - y
|
||||
}
|
||||
|
||||
/** Normalise equality cmp into the form `left op right + k`. */
|
||||
private predicate test(
|
||||
ControlFlowNode cmp, ControlFlowNode left, CompareOp op, ControlFlowNode right, float k
|
||||
) {
|
||||
simple_test(cmp, left, op, right) and k = 0
|
||||
or
|
||||
add_test(cmp, left, op, right, k)
|
||||
or
|
||||
not_test(cmp, left, op, right, k)
|
||||
or
|
||||
subtract_test(cmp, left, op, right, k)
|
||||
or
|
||||
exists(float c | test(cmp, right, op.reverse(), left, c) and k = -c)
|
||||
}
|
||||
|
||||
/** Various simple tests in left op right + k form. */
|
||||
private predicate simple_test(CompareNode cmp, ControlFlowNode l, CompareOp cmpop, ControlFlowNode r) {
|
||||
exists(Cmpop op | cmp.operands(l, op, r) and cmpop.forOp(op))
|
||||
}
|
||||
|
||||
private predicate add_test_left(
|
||||
CompareNode cmp, ControlFlowNode l, CompareOp op, ControlFlowNode r, float k
|
||||
) {
|
||||
exists(BinaryExprNode lhs, float c, float x, Num n |
|
||||
lhs.getNode().getOp() instanceof Add and
|
||||
test(cmp, lhs, op, r, c) and
|
||||
x = n.getN().toFloat() and
|
||||
k = sub(c, x)
|
||||
|
|
||||
l = lhs.getLeft() and n = lhs.getRight().getNode()
|
||||
or
|
||||
l = lhs.getRight() and n = lhs.getLeft().getNode()
|
||||
)
|
||||
}
|
||||
|
||||
private predicate add_test_right(
|
||||
CompareNode cmp, ControlFlowNode l, CompareOp op, ControlFlowNode r, float k
|
||||
) {
|
||||
exists(BinaryExprNode rhs, float c, float x, Num n |
|
||||
rhs.getNode().getOp() instanceof Add and
|
||||
test(cmp, l, op, rhs, c) and
|
||||
x = n.getN().toFloat() and
|
||||
k = add(c, x)
|
||||
|
|
||||
r = rhs.getLeft() and n = rhs.getRight().getNode()
|
||||
or
|
||||
r = rhs.getRight() and n = rhs.getLeft().getNode()
|
||||
)
|
||||
}
|
||||
|
||||
/*
|
||||
* left + x op right + c => left op right + (c-x)
|
||||
* left op (right + x) + c => left op right + (c+x)
|
||||
*/
|
||||
|
||||
private predicate add_test(
|
||||
CompareNode cmp, ControlFlowNode l, CompareOp op, ControlFlowNode r, float k
|
||||
) {
|
||||
add_test_left(cmp, l, op, r, k)
|
||||
or
|
||||
add_test_right(cmp, l, op, r, k)
|
||||
}
|
||||
|
||||
private predicate subtract_test_left(
|
||||
CompareNode cmp, ControlFlowNode l, CompareOp op, ControlFlowNode r, float k
|
||||
) {
|
||||
exists(BinaryExprNode lhs, float c, float x, Num n |
|
||||
lhs.getNode().getOp() instanceof Sub and
|
||||
test(cmp, lhs, op, r, c) and
|
||||
l = lhs.getLeft() and
|
||||
n = lhs.getRight().getNode() and
|
||||
x = n.getN().toFloat()
|
||||
|
|
||||
k = add(c, x)
|
||||
)
|
||||
}
|
||||
|
||||
private predicate subtract_test_right(
|
||||
CompareNode cmp, ControlFlowNode l, CompareOp op, ControlFlowNode r, float k
|
||||
) {
|
||||
exists(BinaryExprNode rhs, float c, float x, Num n |
|
||||
rhs.getNode().getOp() instanceof Sub and
|
||||
test(cmp, l, op, rhs, c) and
|
||||
r = rhs.getRight() and
|
||||
n = rhs.getLeft().getNode() and
|
||||
x = n.getN().toFloat()
|
||||
|
|
||||
k = sub(c, x)
|
||||
)
|
||||
}
|
||||
|
||||
/*
|
||||
* left - x op right + c => left op right + (c+x)
|
||||
* left op (right - x) + c => left op right + (c-x)
|
||||
*/
|
||||
|
||||
private predicate subtract_test(
|
||||
CompareNode cmp, ControlFlowNode l, CompareOp op, ControlFlowNode r, float k
|
||||
) {
|
||||
subtract_test_left(cmp, l, op, r, k)
|
||||
or
|
||||
subtract_test_right(cmp, l, op, r, k)
|
||||
}
|
||||
|
||||
private predicate not_test(
|
||||
UnaryExprNode u, ControlFlowNode l, CompareOp op, ControlFlowNode r, float k
|
||||
) {
|
||||
u.getNode().getOp() instanceof Not and
|
||||
test(u.getOperand(), l, op.invert(), r, k)
|
||||
}
|
||||
|
||||
/**
|
||||
* A comparison which can be simplified to the canonical form `x OP y + k` where `x` and `y` are `ControlFlowNode`s,
|
||||
* `k` is a floating point constant and `OP` is one of `<=`, `>`, `==` or `!=`.
|
||||
*/
|
||||
class Comparison extends ControlFlowNode {
|
||||
Comparison() { test(this, _, _, _, _) }
|
||||
|
||||
/** Whether this condition tests `l op r + k` */
|
||||
predicate tests(ControlFlowNode l, CompareOp op, ControlFlowNode r, float k) {
|
||||
test(this, l, op, r, k)
|
||||
}
|
||||
|
||||
/** Whether this condition tests `l op k` */
|
||||
predicate tests(ControlFlowNode l, CompareOp op, float k) {
|
||||
exists(ControlFlowNode r, float x, float c | test(this, l, op, r, c) |
|
||||
x = r.getNode().(Num).getN().toFloat() and
|
||||
k = add(c, x)
|
||||
)
|
||||
}
|
||||
|
||||
/*
|
||||
* The following predicates determine whether this test, when its result is `thisIsTrue`,
|
||||
* is equivalent to the predicate `v OP k` or `v1 OP v2 + k`.
|
||||
* For example, the test `x <= y` being false, is equivalent to the predicate `x > y`.
|
||||
*/
|
||||
|
||||
private predicate equivalentToEq(boolean thisIsTrue, SsaVariable v, float k) {
|
||||
this.tests(v.getAUse(), eq().conditional(thisIsTrue), k)
|
||||
}
|
||||
|
||||
private predicate equivalentToNotEq(boolean thisIsTrue, SsaVariable v, float k) {
|
||||
this.tests(v.getAUse(), ne().conditional(thisIsTrue), k)
|
||||
}
|
||||
|
||||
private predicate equivalentToLt(boolean thisIsTrue, SsaVariable v, float k) {
|
||||
this.tests(v.getAUse(), lt().conditional(thisIsTrue), k)
|
||||
}
|
||||
|
||||
private predicate equivalentToLtEq(boolean thisIsTrue, SsaVariable v, float k) {
|
||||
this.tests(v.getAUse(), le().conditional(thisIsTrue), k)
|
||||
}
|
||||
|
||||
private predicate equivalentToGt(boolean thisIsTrue, SsaVariable v, float k) {
|
||||
this.tests(v.getAUse(), gt().conditional(thisIsTrue), k)
|
||||
}
|
||||
|
||||
private predicate equivalentToGtEq(boolean thisIsTrue, SsaVariable v, float k) {
|
||||
this.tests(v.getAUse(), ge().conditional(thisIsTrue), k)
|
||||
}
|
||||
|
||||
private predicate equivalentToEq(boolean thisIsTrue, SsaVariable v1, SsaVariable v2, float k) {
|
||||
this.tests(v1.getAUse(), eq().conditional(thisIsTrue), v2.getAUse(), k)
|
||||
}
|
||||
|
||||
private predicate equivalentToNotEq(boolean thisIsTrue, SsaVariable v1, SsaVariable v2, float k) {
|
||||
this.tests(v1.getAUse(), ne().conditional(thisIsTrue), v2.getAUse(), k)
|
||||
}
|
||||
|
||||
private predicate equivalentToLt(boolean thisIsTrue, SsaVariable v1, SsaVariable v2, float k) {
|
||||
this.tests(v1.getAUse(), lt().conditional(thisIsTrue), v2.getAUse(), k)
|
||||
}
|
||||
|
||||
private predicate equivalentToLtEq(boolean thisIsTrue, SsaVariable v1, SsaVariable v2, float k) {
|
||||
this.tests(v1.getAUse(), le().conditional(thisIsTrue), v2.getAUse(), k)
|
||||
}
|
||||
|
||||
private predicate equivalentToGt(boolean thisIsTrue, SsaVariable v1, SsaVariable v2, float k) {
|
||||
this.tests(v1.getAUse(), gt().conditional(thisIsTrue), v2.getAUse(), k)
|
||||
}
|
||||
|
||||
private predicate equivalentToGtEq(boolean thisIsTrue, SsaVariable v1, SsaVariable v2, float k) {
|
||||
this.tests(v1.getAUse(), ge().conditional(thisIsTrue), v2.getAUse(), k)
|
||||
}
|
||||
|
||||
/**
|
||||
* Whether the result of this comparison being `thisIsTrue` implies that the result of `that` is `isThatTrue`.
|
||||
* In other words, does the predicate that is equivalent to the result of `this` being `thisIsTrue`
|
||||
* imply the predicate that is equivalent to the result of `that` being `thatIsTrue`.
|
||||
* For example, assume that there are two tests, which when normalised have the form `x < y` and `x > y + 1`.
|
||||
* Then the test `x < y` having a true result, implies that the test `x > y + 1` will have a false result.
|
||||
* (`x < y` having a false result implies nothing about `x > y + 1`)
|
||||
*/
|
||||
predicate impliesThat(boolean thisIsTrue, Comparison that, boolean thatIsTrue) {
|
||||
/* `v == k` => `v == k` */
|
||||
exists(SsaVariable v, float k1, float k2 |
|
||||
this.equivalentToEq(thisIsTrue, v, k1) and
|
||||
that.equivalentToEq(thatIsTrue, v, k2) and
|
||||
eq(k1, k2)
|
||||
or
|
||||
this.equivalentToNotEq(thisIsTrue, v, k1) and
|
||||
that.equivalentToNotEq(thatIsTrue, v, k2) and
|
||||
eq(k1, k2)
|
||||
)
|
||||
or
|
||||
exists(SsaVariable v, float k1, float k2 |
|
||||
/* `v < k1` => `v != k2` iff k1 <= k2 */
|
||||
this.equivalentToLt(thisIsTrue, v, k1) and
|
||||
that.equivalentToNotEq(thatIsTrue, v, k2) and
|
||||
le(k1, k2)
|
||||
or
|
||||
/* `v <= k1` => `v != k2` iff k1 < k2 */
|
||||
this.equivalentToLtEq(thisIsTrue, v, k1) and
|
||||
that.equivalentToNotEq(thatIsTrue, v, k2) and
|
||||
lt(k1, k2)
|
||||
or
|
||||
/* `v > k1` => `v != k2` iff k1 >= k2 */
|
||||
this.equivalentToGt(thisIsTrue, v, k1) and
|
||||
that.equivalentToNotEq(thatIsTrue, v, k2) and
|
||||
ge(k1, k2)
|
||||
or
|
||||
/* `v >= k1` => `v != k2` iff k1 > k2 */
|
||||
this.equivalentToGtEq(thisIsTrue, v, k1) and
|
||||
that.equivalentToNotEq(thatIsTrue, v, k2) and
|
||||
gt(k1, k2)
|
||||
)
|
||||
or
|
||||
exists(SsaVariable v, float k1, float k2 |
|
||||
/* `v < k1` => `v < k2` iff k1 <= k2 */
|
||||
this.equivalentToLt(thisIsTrue, v, k1) and
|
||||
that.equivalentToLt(thatIsTrue, v, k2) and
|
||||
le(k1, k2)
|
||||
or
|
||||
/* `v < k1` => `v <= k2` iff k1 <= k2 */
|
||||
this.equivalentToLt(thisIsTrue, v, k1) and
|
||||
that.equivalentToLtEq(thatIsTrue, v, k2) and
|
||||
le(k1, k2)
|
||||
or
|
||||
/* `v <= k1` => `v < k2` iff k1 < k2 */
|
||||
this.equivalentToLtEq(thisIsTrue, v, k1) and
|
||||
that.equivalentToLt(thatIsTrue, v, k2) and
|
||||
lt(k1, k2)
|
||||
or
|
||||
/* `v <= k1` => `v <= k2` iff k1 <= k2 */
|
||||
this.equivalentToLtEq(thisIsTrue, v, k1) and
|
||||
that.equivalentToLtEq(thatIsTrue, v, k2) and
|
||||
le(k1, k2)
|
||||
)
|
||||
or
|
||||
exists(SsaVariable v, float k1, float k2 |
|
||||
/* `v > k1` => `v >= k2` iff k1 >= k2 */
|
||||
this.equivalentToGt(thisIsTrue, v, k1) and
|
||||
that.equivalentToGt(thatIsTrue, v, k2) and
|
||||
ge(k1, k2)
|
||||
or
|
||||
/* `v > k1` => `v >= k2` iff k1 >= k2 */
|
||||
this.equivalentToGt(thisIsTrue, v, k1) and
|
||||
that.equivalentToGtEq(thatIsTrue, v, k2) and
|
||||
ge(k1, k2)
|
||||
or
|
||||
/* `v >= k1` => `v > k2` iff k1 > k2 */
|
||||
this.equivalentToGtEq(thisIsTrue, v, k1) and
|
||||
that.equivalentToGt(thatIsTrue, v, k2) and
|
||||
gt(k1, k2)
|
||||
or
|
||||
/* `v >= k1` => `v >= k2` iff k1 >= k2 */
|
||||
this.equivalentToGtEq(thisIsTrue, v, k1) and
|
||||
that.equivalentToGtEq(thatIsTrue, v, k2) and
|
||||
ge(k1, k2)
|
||||
)
|
||||
or
|
||||
exists(SsaVariable v1, SsaVariable v2, float k |
|
||||
/* `v1 == v2 + k` => `v1 == v2 + k` */
|
||||
this.equivalentToEq(thisIsTrue, v1, v2, k) and
|
||||
that.equivalentToEq(thatIsTrue, v1, v2, k)
|
||||
or
|
||||
this.equivalentToNotEq(thisIsTrue, v1, v2, k) and
|
||||
that.equivalentToNotEq(thatIsTrue, v1, v2, k)
|
||||
)
|
||||
or
|
||||
exists(SsaVariable v1, SsaVariable v2, float k1, float k2 |
|
||||
/* `v1 < v2 + k1` => `v1 != v2 + k2` iff k1 <= k2 */
|
||||
this.equivalentToLt(thisIsTrue, v1, v2, k1) and
|
||||
that.equivalentToNotEq(thatIsTrue, v1, v2, k2) and
|
||||
le(k1, k2)
|
||||
or
|
||||
/* `v1 <= v2 + k1` => `v1 != v2 + k2` iff k1 < k2 */
|
||||
this.equivalentToLtEq(thisIsTrue, v1, v2, k1) and
|
||||
that.equivalentToNotEq(thatIsTrue, v1, v2, k2) and
|
||||
lt(k1, k2)
|
||||
or
|
||||
/* `v1 > v2 + k1` => `v1 != v2 + k2` iff k1 >= k2 */
|
||||
this.equivalentToGt(thisIsTrue, v1, v2, k1) and
|
||||
that.equivalentToNotEq(thatIsTrue, v1, v2, k2) and
|
||||
ge(k1, k2)
|
||||
or
|
||||
/* `v1 >= v2 + k1` => `v1 != v2 + k2` iff k1 > k2 */
|
||||
this.equivalentToGtEq(thisIsTrue, v1, v2, k1) and
|
||||
that.equivalentToNotEq(thatIsTrue, v1, v2, k2) and
|
||||
gt(k1, k2)
|
||||
)
|
||||
or
|
||||
exists(SsaVariable v1, SsaVariable v2, float k1, float k2 |
|
||||
/* `v1 <= v2 + k1` => `v1 <= v2 + k2` iff k1 <= k2 */
|
||||
this.equivalentToLtEq(thisIsTrue, v1, v2, k1) and
|
||||
that.equivalentToLtEq(thatIsTrue, v1, v2, k2) and
|
||||
le(k1, k2)
|
||||
or
|
||||
/* `v1 < v2 + k1` => `v1 <= v2 + k2` iff k1 <= k2 */
|
||||
this.equivalentToLt(thisIsTrue, v1, v2, k1) and
|
||||
that.equivalentToLtEq(thatIsTrue, v1, v2, k2) and
|
||||
le(k1, k2)
|
||||
or
|
||||
/* `v1 <= v2 + k1` => `v1 < v2 + k2` iff k1 < k2 */
|
||||
this.equivalentToLtEq(thisIsTrue, v1, v2, k1) and
|
||||
that.equivalentToLt(thatIsTrue, v1, v2, k2) and
|
||||
lt(k1, k2)
|
||||
or
|
||||
/* `v1 <= v2 + k1` => `v1 <= v2 + k2` iff k1 <= k2 */
|
||||
this.equivalentToLtEq(thisIsTrue, v1, v2, k1) and
|
||||
that.equivalentToLtEq(thatIsTrue, v1, v2, k2) and
|
||||
le(k1, k2)
|
||||
)
|
||||
or
|
||||
exists(SsaVariable v1, SsaVariable v2, float k1, float k2 |
|
||||
/* `v1 > v2 + k1` => `v1 > v2 + k2` iff k1 >= k2 */
|
||||
this.equivalentToGt(thisIsTrue, v1, v2, k1) and
|
||||
that.equivalentToGt(thatIsTrue, v1, v2, k2) and
|
||||
ge(k1, k2)
|
||||
or
|
||||
/* `v1 > v2 + k1` => `v2 >= v2 + k2` iff k1 >= k2 */
|
||||
this.equivalentToGt(thisIsTrue, v1, v2, k1) and
|
||||
that.equivalentToGtEq(thatIsTrue, v1, v2, k2) and
|
||||
ge(k1, k2)
|
||||
or
|
||||
/* `v1 >= v2 + k1` => `v2 > v2 + k2` iff k1 > k2 */
|
||||
this.equivalentToGtEq(thisIsTrue, v1, v2, k1) and
|
||||
that.equivalentToGt(thatIsTrue, v1, v2, k2) and
|
||||
gt(k1, k2)
|
||||
or
|
||||
/* `v1 >= v2 + k1` => `v2 >= v2 + k2` iff k1 >= k2 */
|
||||
this.equivalentToGtEq(thisIsTrue, v1, v2, k1) and
|
||||
that.equivalentToGtEq(thatIsTrue, v1, v2, k2) and
|
||||
ge(k1, k2)
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/* Work around differences in floating-point comparisons between Python and QL */
|
||||
private predicate is_zero(float x) {
|
||||
x = 0.0
|
||||
or
|
||||
x = -0.0
|
||||
}
|
||||
|
||||
bindingset[x, y]
|
||||
private predicate lt(float x, float y) { if is_zero(x) then y > 0 else x < y }
|
||||
|
||||
bindingset[x, y]
|
||||
private predicate eq(float x, float y) { if is_zero(x) then is_zero(y) else x = y }
|
||||
|
||||
bindingset[x, y]
|
||||
private predicate gt(float x, float y) { lt(y, x) }
|
||||
|
||||
bindingset[x, y]
|
||||
private predicate le(float x, float y) { lt(x, y) or eq(x, y) }
|
||||
|
||||
bindingset[x, y]
|
||||
private predicate ge(float x, float y) { lt(y, x) or eq(x, y) }
|
||||
|
||||
/**
|
||||
* A basic block which terminates in a condition, splitting the subsequent control flow,
|
||||
* in which the condition is an instance of `Comparison`
|
||||
*/
|
||||
class ComparisonControlBlock extends ConditionBlock {
|
||||
ComparisonControlBlock() { this.getLastNode() instanceof Comparison }
|
||||
|
||||
/** Whether this conditional guard determines that, in block `b`, `l == r + k` if `eq` is true, or `l != r + k` if `eq` is false, */
|
||||
predicate controls(ControlFlowNode l, CompareOp op, ControlFlowNode r, float k, BasicBlock b) {
|
||||
exists(boolean control |
|
||||
this.controls(b, control) and this.getTest().tests(l, op, r, k) and control = true
|
||||
or
|
||||
this.controls(b, control) and this.getTest().tests(l, op.invert(), r, k) and control = false
|
||||
)
|
||||
}
|
||||
|
||||
/** Whether this conditional guard determines that, in block `b`, `l == r + k` if `eq` is true, or `l != r + k` if `eq` is false, */
|
||||
predicate controls(ControlFlowNode l, CompareOp op, float k, BasicBlock b) {
|
||||
exists(boolean control |
|
||||
this.controls(b, control) and this.getTest().tests(l, op, k) and control = true
|
||||
or
|
||||
this.controls(b, control) and this.getTest().tests(l, op.invert(), k) and control = false
|
||||
)
|
||||
}
|
||||
|
||||
Comparison getTest() { this.getLastNode() = result }
|
||||
|
||||
/** Whether this conditional guard implies that, in block `b`, the result of `that` is `thatIsTrue` */
|
||||
predicate impliesThat(BasicBlock b, Comparison that, boolean thatIsTrue) {
|
||||
exists(boolean controlSense |
|
||||
this.controls(b, controlSense) and
|
||||
this.getTest().impliesThat(controlSense, that, thatIsTrue)
|
||||
)
|
||||
}
|
||||
}
|
||||
@@ -1,122 +0,0 @@
|
||||
import python
|
||||
|
||||
/** Base class for list, set and dictionary comprehensions, and generator expressions. */
|
||||
abstract class Comp extends Expr {
|
||||
abstract Function getFunction();
|
||||
|
||||
/** Gets the iterable of this set comprehension. */
|
||||
abstract Expr getIterable();
|
||||
|
||||
/** Gets the iteration variable for the nth innermost generator of this comprehension. */
|
||||
Variable getIterationVariable(int n) {
|
||||
result.getAnAccess() = this.getNthInnerLoop(n).getTarget()
|
||||
}
|
||||
|
||||
/** Gets the nth innermost For expression of this comprehension. */
|
||||
For getNthInnerLoop(int n) {
|
||||
n = 0 and result = this.getFunction().getStmt(0)
|
||||
or
|
||||
result = this.getNthInnerLoop(n - 1).getStmt(0)
|
||||
}
|
||||
|
||||
/** Gets the iteration variable for a generator of this list comprehension. */
|
||||
Variable getAnIterationVariable() { result = this.getIterationVariable(_) }
|
||||
|
||||
/** Gets the scope in which the body of this list comprehension evaluates. */
|
||||
Scope getEvaluatingScope() { result = this.getFunction() }
|
||||
|
||||
/** Gets the expression for elements of this comprehension. */
|
||||
Expr getElt() {
|
||||
exists(Yield yield, Stmt body |
|
||||
result = yield.getValue() and
|
||||
body = this.getNthInnerLoop(_).getAStmt()
|
||||
|
|
||||
yield = body.(ExprStmt).getValue()
|
||||
or
|
||||
yield = body.(If).getStmt(0).(ExprStmt).getValue()
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/** A list comprehension, such as `[ chr(x) for x in range(ord('A'), ord('Z')+1) ]` */
|
||||
class ListComp extends ListComp_, Comp {
|
||||
override Expr getASubExpression() {
|
||||
result = this.getAGenerator().getASubExpression() or
|
||||
result = this.getElt() or
|
||||
result = this.getIterable()
|
||||
}
|
||||
|
||||
override AstNode getAChildNode() {
|
||||
result = this.getAGenerator() or
|
||||
result = this.getIterable() or
|
||||
result = this.getFunction()
|
||||
}
|
||||
|
||||
override predicate hasSideEffects() { any() }
|
||||
|
||||
/** Gets the scope in which the body of this list comprehension evaluates. */
|
||||
override Scope getEvaluatingScope() {
|
||||
major_version() = 2 and result = this.getScope()
|
||||
or
|
||||
major_version() = 3 and result = this.getFunction()
|
||||
}
|
||||
|
||||
/** Gets the iteration variable for the nth innermost generator of this list comprehension */
|
||||
override Variable getIterationVariable(int n) { result = Comp.super.getIterationVariable(n) }
|
||||
|
||||
override Function getFunction() { result = ListComp_.super.getFunction() }
|
||||
|
||||
override Expr getIterable() { result = ListComp_.super.getIterable() }
|
||||
|
||||
override string toString() { result = ListComp_.super.toString() }
|
||||
|
||||
override Expr getElt() { result = Comp.super.getElt() }
|
||||
}
|
||||
|
||||
/** A set comprehension such as `{ v for v in "0123456789" }` */
|
||||
class SetComp extends SetComp_, Comp {
|
||||
override Expr getASubExpression() { result = this.getIterable() }
|
||||
|
||||
override AstNode getAChildNode() {
|
||||
result = this.getASubExpression() or
|
||||
result = this.getFunction()
|
||||
}
|
||||
|
||||
override predicate hasSideEffects() { any() }
|
||||
|
||||
override Function getFunction() { result = SetComp_.super.getFunction() }
|
||||
|
||||
override Expr getIterable() { result = SetComp_.super.getIterable() }
|
||||
}
|
||||
|
||||
/** A dictionary comprehension, such as `{ k:v for k, v in enumerate("0123456789") }` */
|
||||
class DictComp extends DictComp_, Comp {
|
||||
override Expr getASubExpression() { result = this.getIterable() }
|
||||
|
||||
override AstNode getAChildNode() {
|
||||
result = this.getASubExpression() or
|
||||
result = this.getFunction()
|
||||
}
|
||||
|
||||
override predicate hasSideEffects() { any() }
|
||||
|
||||
override Function getFunction() { result = DictComp_.super.getFunction() }
|
||||
|
||||
override Expr getIterable() { result = DictComp_.super.getIterable() }
|
||||
}
|
||||
|
||||
/** A generator expression, such as `(var for var in iterable)` */
|
||||
class GeneratorExp extends GeneratorExp_, Comp {
|
||||
override Expr getASubExpression() { result = this.getIterable() }
|
||||
|
||||
override AstNode getAChildNode() {
|
||||
result = this.getASubExpression() or
|
||||
result = this.getFunction()
|
||||
}
|
||||
|
||||
override predicate hasSideEffects() { any() }
|
||||
|
||||
override Function getFunction() { result = GeneratorExp_.super.getFunction() }
|
||||
|
||||
override Expr getIterable() { result = GeneratorExp_.super.getIterable() }
|
||||
}
|
||||
@@ -1,863 +0,0 @@
|
||||
/**
|
||||
* Provides abstract classes representing generic concepts such as file system
|
||||
* access or system command execution, for which individual framework libraries
|
||||
* provide concrete subclasses.
|
||||
*/
|
||||
|
||||
private import python
|
||||
private import semmle.python.dataflow.new.DataFlow
|
||||
private import semmle.python.dataflow.new.RemoteFlowSources
|
||||
private import semmle.python.dataflow.new.TaintTracking
|
||||
private import semmle.python.Frameworks
|
||||
|
||||
/**
|
||||
* A data-flow node that executes an operating system command,
|
||||
* for instance by spawning a new process.
|
||||
*
|
||||
* Extend this class to refine existing API models. If you want to model new APIs,
|
||||
* extend `SystemCommandExecution::Range` instead.
|
||||
*/
|
||||
class SystemCommandExecution extends DataFlow::Node {
|
||||
SystemCommandExecution::Range range;
|
||||
|
||||
SystemCommandExecution() { this = range }
|
||||
|
||||
/** Gets the argument that specifies the command to be executed. */
|
||||
DataFlow::Node getCommand() { result = range.getCommand() }
|
||||
}
|
||||
|
||||
/** Provides a class for modeling new system-command execution APIs. */
|
||||
module SystemCommandExecution {
|
||||
/**
|
||||
* A data-flow node that executes an operating system command,
|
||||
* for instance by spawning a new process.
|
||||
*
|
||||
* Extend this class to model new APIs. If you want to refine existing API models,
|
||||
* extend `SystemCommandExecution` instead.
|
||||
*/
|
||||
abstract class Range extends DataFlow::Node {
|
||||
/** Gets the argument that specifies the command to be executed. */
|
||||
abstract DataFlow::Node getCommand();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A data flow node that performs a file system access, including reading and writing data,
|
||||
* creating and deleting files and folders, checking and updating permissions, and so on.
|
||||
*
|
||||
* Extend this class to refine existing API models. If you want to model new APIs,
|
||||
* extend `FileSystemAccess::Range` instead.
|
||||
*/
|
||||
class FileSystemAccess extends DataFlow::Node {
|
||||
FileSystemAccess::Range range;
|
||||
|
||||
FileSystemAccess() { this = range }
|
||||
|
||||
/** Gets an argument to this file system access that is interpreted as a path. */
|
||||
DataFlow::Node getAPathArgument() { result = range.getAPathArgument() }
|
||||
}
|
||||
|
||||
/** Provides a class for modeling new file system access APIs. */
|
||||
module FileSystemAccess {
|
||||
/**
|
||||
* A data-flow node that performs a file system access, including reading and writing data,
|
||||
* creating and deleting files and folders, checking and updating permissions, and so on.
|
||||
*
|
||||
* Extend this class to model new APIs. If you want to refine existing API models,
|
||||
* extend `FileSystemAccess` instead.
|
||||
*/
|
||||
abstract class Range extends DataFlow::Node {
|
||||
/** Gets an argument to this file system access that is interpreted as a path. */
|
||||
abstract DataFlow::Node getAPathArgument();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A data flow node that writes data to the file system access.
|
||||
*
|
||||
* Extend this class to refine existing API models. If you want to model new APIs,
|
||||
* extend `FileSystemWriteAccess::Range` instead.
|
||||
*/
|
||||
class FileSystemWriteAccess extends FileSystemAccess {
|
||||
override FileSystemWriteAccess::Range range;
|
||||
|
||||
/**
|
||||
* Gets a node that represents data to be written to the file system (possibly with
|
||||
* some transformation happening before it is written, like JSON encoding).
|
||||
*/
|
||||
DataFlow::Node getADataNode() { result = range.getADataNode() }
|
||||
}
|
||||
|
||||
/** Provides a class for modeling new file system writes. */
|
||||
module FileSystemWriteAccess {
|
||||
/**
|
||||
* A data flow node that writes data to the file system access.
|
||||
*
|
||||
* Extend this class to model new APIs. If you want to refine existing API models,
|
||||
* extend `FileSystemWriteAccess` instead.
|
||||
*/
|
||||
abstract class Range extends FileSystemAccess::Range {
|
||||
/**
|
||||
* Gets a node that represents data to be written to the file system (possibly with
|
||||
* some transformation happening before it is written, like JSON encoding).
|
||||
*/
|
||||
abstract DataFlow::Node getADataNode();
|
||||
}
|
||||
}
|
||||
|
||||
/** Provides classes for modeling path-related APIs. */
|
||||
module Path {
|
||||
/**
|
||||
* A data-flow node that performs path normalization. This is often needed in order
|
||||
* to safely access paths.
|
||||
*/
|
||||
class PathNormalization extends DataFlow::Node {
|
||||
PathNormalization::Range range;
|
||||
|
||||
PathNormalization() { this = range }
|
||||
}
|
||||
|
||||
/** Provides a class for modeling new path normalization APIs. */
|
||||
module PathNormalization {
|
||||
/**
|
||||
* A data-flow node that performs path normalization. This is often needed in order
|
||||
* to safely access paths.
|
||||
*/
|
||||
abstract class Range extends DataFlow::Node { }
|
||||
}
|
||||
|
||||
/** A data-flow node that checks that a path is safe to access. */
|
||||
class SafeAccessCheck extends DataFlow::BarrierGuard {
|
||||
SafeAccessCheck::Range range;
|
||||
|
||||
SafeAccessCheck() { this = range }
|
||||
|
||||
override predicate checks(ControlFlowNode node, boolean branch) { range.checks(node, branch) }
|
||||
}
|
||||
|
||||
/** Provides a class for modeling new path safety checks. */
|
||||
module SafeAccessCheck {
|
||||
/** A data-flow node that checks that a path is safe to access. */
|
||||
abstract class Range extends DataFlow::BarrierGuard { }
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A data-flow node that decodes data from a binary or textual format. This
|
||||
* is intended to include deserialization, unmarshalling, decoding, unpickling,
|
||||
* decompressing, decrypting, parsing etc.
|
||||
*
|
||||
* A decoding (automatically) preserves taint from input to output. However, it can
|
||||
* also be a problem in itself, for example if it allows code execution or could result
|
||||
* in denial-of-service.
|
||||
*
|
||||
* Extend this class to refine existing API models. If you want to model new APIs,
|
||||
* extend `Decoding::Range` instead.
|
||||
*/
|
||||
class Decoding extends DataFlow::Node {
|
||||
Decoding::Range range;
|
||||
|
||||
Decoding() { this = range }
|
||||
|
||||
/** Holds if this call may execute code embedded in its input. */
|
||||
predicate mayExecuteInput() { range.mayExecuteInput() }
|
||||
|
||||
/** Gets an input that is decoded by this function. */
|
||||
DataFlow::Node getAnInput() { result = range.getAnInput() }
|
||||
|
||||
/** Gets the output that contains the decoded data produced by this function. */
|
||||
DataFlow::Node getOutput() { result = range.getOutput() }
|
||||
|
||||
/** Gets an identifier for the format this function decodes from, such as "JSON". */
|
||||
string getFormat() { result = range.getFormat() }
|
||||
}
|
||||
|
||||
/** Provides a class for modeling new decoding mechanisms. */
|
||||
module Decoding {
|
||||
/**
|
||||
* A data-flow node that decodes data from a binary or textual format. This
|
||||
* is intended to include deserialization, unmarshalling, decoding, unpickling,
|
||||
* decompressing, decrypting, parsing etc.
|
||||
*
|
||||
* A decoding (automatically) preserves taint from input to output. However, it can
|
||||
* also be a problem in itself, for example if it allows code execution or could result
|
||||
* in denial-of-service.
|
||||
*
|
||||
* Extend this class to model new APIs. If you want to refine existing API models,
|
||||
* extend `Decoding` instead.
|
||||
*/
|
||||
abstract class Range extends DataFlow::Node {
|
||||
/** Holds if this call may execute code embedded in its input. */
|
||||
abstract predicate mayExecuteInput();
|
||||
|
||||
/** Gets an input that is decoded by this function. */
|
||||
abstract DataFlow::Node getAnInput();
|
||||
|
||||
/** Gets the output that contains the decoded data produced by this function. */
|
||||
abstract DataFlow::Node getOutput();
|
||||
|
||||
/** Gets an identifier for the format this function decodes from, such as "JSON". */
|
||||
abstract string getFormat();
|
||||
}
|
||||
}
|
||||
|
||||
private class DecodingAdditionalTaintStep extends TaintTracking::AdditionalTaintStep {
|
||||
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
|
||||
exists(Decoding decoding |
|
||||
nodeFrom = decoding.getAnInput() and
|
||||
nodeTo = decoding.getOutput()
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A data-flow node that encodes data to a binary or textual format. This
|
||||
* is intended to include serialization, marshalling, encoding, pickling,
|
||||
* compressing, encrypting, etc.
|
||||
*
|
||||
* An encoding (automatically) preserves taint from input to output.
|
||||
*
|
||||
* Extend this class to refine existing API models. If you want to model new APIs,
|
||||
* extend `Encoding::Range` instead.
|
||||
*/
|
||||
class Encoding extends DataFlow::Node {
|
||||
Encoding::Range range;
|
||||
|
||||
Encoding() { this = range }
|
||||
|
||||
/** Gets an input that is encoded by this function. */
|
||||
DataFlow::Node getAnInput() { result = range.getAnInput() }
|
||||
|
||||
/** Gets the output that contains the encoded data produced by this function. */
|
||||
DataFlow::Node getOutput() { result = range.getOutput() }
|
||||
|
||||
/** Gets an identifier for the format this function decodes from, such as "JSON". */
|
||||
string getFormat() { result = range.getFormat() }
|
||||
}
|
||||
|
||||
/** Provides a class for modeling new encoding mechanisms. */
|
||||
module Encoding {
|
||||
/**
|
||||
* A data-flow node that encodes data to a binary or textual format. This
|
||||
* is intended to include serialization, marshalling, encoding, pickling,
|
||||
* compressing, encrypting, etc.
|
||||
*
|
||||
* An encoding (automatically) preserves taint from input to output.
|
||||
*
|
||||
* Extend this class to model new APIs. If you want to refine existing API models,
|
||||
* extend `Encoding` instead.
|
||||
*/
|
||||
abstract class Range extends DataFlow::Node {
|
||||
/** Gets an input that is encoded by this function. */
|
||||
abstract DataFlow::Node getAnInput();
|
||||
|
||||
/** Gets the output that contains the encoded data produced by this function. */
|
||||
abstract DataFlow::Node getOutput();
|
||||
|
||||
/** Gets an identifier for the format this function decodes from, such as "JSON". */
|
||||
abstract string getFormat();
|
||||
}
|
||||
}
|
||||
|
||||
private class EncodingAdditionalTaintStep extends TaintTracking::AdditionalTaintStep {
|
||||
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
|
||||
exists(Encoding encoding |
|
||||
nodeFrom = encoding.getAnInput() and
|
||||
nodeTo = encoding.getOutput()
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A data-flow node that logs data.
|
||||
*
|
||||
* Extend this class to refine existing API models. If you want to model new APIs,
|
||||
* extend `Logging::Range` instead.
|
||||
*/
|
||||
class Logging extends DataFlow::Node {
|
||||
Logging::Range range;
|
||||
|
||||
Logging() { this = range }
|
||||
|
||||
/** Gets an input that is logged. */
|
||||
DataFlow::Node getAnInput() { result = range.getAnInput() }
|
||||
}
|
||||
|
||||
/** Provides a class for modeling new logging mechanisms. */
|
||||
module Logging {
|
||||
/**
|
||||
* A data-flow node that logs data.
|
||||
*
|
||||
* Extend this class to model new APIs. If you want to refine existing API models,
|
||||
* extend `Logging` instead.
|
||||
*/
|
||||
abstract class Range extends DataFlow::Node {
|
||||
/** Gets an input that is logged. */
|
||||
abstract DataFlow::Node getAnInput();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A data-flow node that dynamically executes Python code.
|
||||
*
|
||||
* Extend this class to refine existing API models. If you want to model new APIs,
|
||||
* extend `CodeExecution::Range` instead.
|
||||
*/
|
||||
class CodeExecution extends DataFlow::Node {
|
||||
CodeExecution::Range range;
|
||||
|
||||
CodeExecution() { this = range }
|
||||
|
||||
/** Gets the argument that specifies the code to be executed. */
|
||||
DataFlow::Node getCode() { result = range.getCode() }
|
||||
}
|
||||
|
||||
/** Provides a class for modeling new dynamic code execution APIs. */
|
||||
module CodeExecution {
|
||||
/**
|
||||
* A data-flow node that dynamically executes Python code.
|
||||
*
|
||||
* Extend this class to model new APIs. If you want to refine existing API models,
|
||||
* extend `CodeExecution` instead.
|
||||
*/
|
||||
abstract class Range extends DataFlow::Node {
|
||||
/** Gets the argument that specifies the code to be executed. */
|
||||
abstract DataFlow::Node getCode();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A data-flow node that executes SQL statements.
|
||||
*
|
||||
* Extend this class to refine existing API models. If you want to model new APIs,
|
||||
* extend `SqlExecution::Range` instead.
|
||||
*/
|
||||
class SqlExecution extends DataFlow::Node {
|
||||
SqlExecution::Range range;
|
||||
|
||||
SqlExecution() { this = range }
|
||||
|
||||
/** Gets the argument that specifies the SQL statements to be executed. */
|
||||
DataFlow::Node getSql() { result = range.getSql() }
|
||||
}
|
||||
|
||||
/** Provides a class for modeling new SQL execution APIs. */
|
||||
module SqlExecution {
|
||||
/**
|
||||
* A data-flow node that executes SQL statements.
|
||||
*
|
||||
* Extend this class to model new APIs. If you want to refine existing API models,
|
||||
* extend `SqlExecution` instead.
|
||||
*/
|
||||
abstract class Range extends DataFlow::Node {
|
||||
/** Gets the argument that specifies the SQL statements to be executed. */
|
||||
abstract DataFlow::Node getSql();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A data-flow node that escapes meta-characters, which could be used to prevent
|
||||
* injection attacks.
|
||||
*
|
||||
* Extend this class to refine existing API models. If you want to model new APIs,
|
||||
* extend `Escaping::Range` instead.
|
||||
*/
|
||||
class Escaping extends DataFlow::Node {
|
||||
Escaping::Range range;
|
||||
|
||||
Escaping() {
|
||||
this = range and
|
||||
// escapes that don't have _both_ input/output defined are not valid
|
||||
exists(range.getAnInput()) and
|
||||
exists(range.getOutput())
|
||||
}
|
||||
|
||||
/** Gets an input that will be escaped. */
|
||||
DataFlow::Node getAnInput() { result = range.getAnInput() }
|
||||
|
||||
/** Gets the output that contains the escaped data. */
|
||||
DataFlow::Node getOutput() { result = range.getOutput() }
|
||||
|
||||
/**
|
||||
* Gets the context that this function escapes for, such as `html`, or `url`.
|
||||
*/
|
||||
string getKind() { result = range.getKind() }
|
||||
}
|
||||
|
||||
/** Provides a class for modeling new escaping APIs. */
|
||||
module Escaping {
|
||||
/**
|
||||
* A data-flow node that escapes meta-characters, which could be used to prevent
|
||||
* injection attacks.
|
||||
*
|
||||
* Extend this class to model new APIs. If you want to refine existing API models,
|
||||
* extend `Escaping` instead.
|
||||
*/
|
||||
abstract class Range extends DataFlow::Node {
|
||||
/** Gets an input that will be escaped. */
|
||||
abstract DataFlow::Node getAnInput();
|
||||
|
||||
/** Gets the output that contains the escaped data. */
|
||||
abstract DataFlow::Node getOutput();
|
||||
|
||||
/**
|
||||
* Gets the context that this function escapes for.
|
||||
*
|
||||
* While kinds are represented as strings, this should not be relied upon. Use the
|
||||
* predicates in the `Escaping` module, such as `getHtmlKind`.
|
||||
*/
|
||||
abstract string getKind();
|
||||
}
|
||||
|
||||
/** Gets the escape-kind for escaping a string so it can safely be included in HTML. */
|
||||
string getHtmlKind() { result = "html" }
|
||||
// TODO: If adding an XML kind, update the modeling of the `MarkupSafe` PyPI package.
|
||||
//
|
||||
// Technically it claims to escape for both HTML and XML, but for now we don't have
|
||||
// anything that relies on XML escaping, so I'm going to defer deciding whether they
|
||||
// should be the same kind, or whether they deserve to be treated differently.
|
||||
}
|
||||
|
||||
/**
|
||||
* An escape of a string so it can be safely included in
|
||||
* the body of an HTML element, for example, replacing `{}` in
|
||||
* `<p>{}</p>`.
|
||||
*/
|
||||
class HtmlEscaping extends Escaping {
|
||||
HtmlEscaping() { range.getKind() = Escaping::getHtmlKind() }
|
||||
}
|
||||
|
||||
/** Provides classes for modeling HTTP-related APIs. */
|
||||
module HTTP {
|
||||
import semmle.python.web.HttpConstants
|
||||
|
||||
/** Provides classes for modeling HTTP servers. */
|
||||
module Server {
|
||||
/**
|
||||
* A data-flow node that sets up a route on a server.
|
||||
*
|
||||
* Extend this class to refine existing API models. If you want to model new APIs,
|
||||
* extend `RouteSetup::Range` instead.
|
||||
*/
|
||||
class RouteSetup extends DataFlow::Node {
|
||||
RouteSetup::Range range;
|
||||
|
||||
RouteSetup() { this = range }
|
||||
|
||||
/** Gets the URL pattern for this route, if it can be statically determined. */
|
||||
string getUrlPattern() { result = range.getUrlPattern() }
|
||||
|
||||
/**
|
||||
* Gets a function that will handle incoming requests for this route, if any.
|
||||
*
|
||||
* NOTE: This will be modified in the near future to have a `RequestHandler` result, instead of a `Function`.
|
||||
*/
|
||||
Function getARequestHandler() { result = range.getARequestHandler() }
|
||||
|
||||
/**
|
||||
* Gets a parameter that will receive parts of the url when handling incoming
|
||||
* requests for this route, if any. These automatically become a `RemoteFlowSource`.
|
||||
*/
|
||||
Parameter getARoutedParameter() { result = range.getARoutedParameter() }
|
||||
|
||||
/** Gets a string that identifies the framework used for this route setup. */
|
||||
string getFramework() { result = range.getFramework() }
|
||||
}
|
||||
|
||||
/** Provides a class for modeling new HTTP routing APIs. */
|
||||
module RouteSetup {
|
||||
/**
|
||||
* A data-flow node that sets up a route on a server.
|
||||
*
|
||||
* Extend this class to model new APIs. If you want to refine existing API models,
|
||||
* extend `RouteSetup` instead.
|
||||
*/
|
||||
abstract class Range extends DataFlow::Node {
|
||||
/** Gets the argument used to set the URL pattern. */
|
||||
abstract DataFlow::Node getUrlPatternArg();
|
||||
|
||||
/** Gets the URL pattern for this route, if it can be statically determined. */
|
||||
string getUrlPattern() {
|
||||
exists(StrConst str |
|
||||
this.getUrlPatternArg().getALocalSource() = DataFlow::exprNode(str) and
|
||||
result = str.getText()
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a function that will handle incoming requests for this route, if any.
|
||||
*
|
||||
* NOTE: This will be modified in the near future to have a `RequestHandler` result, instead of a `Function`.
|
||||
*/
|
||||
abstract Function getARequestHandler();
|
||||
|
||||
/**
|
||||
* Gets a parameter that will receive parts of the url when handling incoming
|
||||
* requests for this route, if any. These automatically become a `RemoteFlowSource`.
|
||||
*/
|
||||
abstract Parameter getARoutedParameter();
|
||||
|
||||
/** Gets a string that identifies the framework used for this route setup. */
|
||||
abstract string getFramework();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A function that will handle incoming HTTP requests.
|
||||
*
|
||||
* Extend this class to refine existing API models. If you want to model new APIs,
|
||||
* extend `RequestHandler::Range` instead.
|
||||
*/
|
||||
class RequestHandler extends Function {
|
||||
RequestHandler::Range range;
|
||||
|
||||
RequestHandler() { this = range }
|
||||
|
||||
/**
|
||||
* Gets a parameter that could receive parts of the url when handling incoming
|
||||
* requests, if any. These automatically become a `RemoteFlowSource`.
|
||||
*/
|
||||
Parameter getARoutedParameter() { result = range.getARoutedParameter() }
|
||||
|
||||
/** Gets a string that identifies the framework used for this route setup. */
|
||||
string getFramework() { result = range.getFramework() }
|
||||
}
|
||||
|
||||
/** Provides a class for modeling new HTTP request handlers. */
|
||||
module RequestHandler {
|
||||
/**
|
||||
* A function that will handle incoming HTTP requests.
|
||||
*
|
||||
* Extend this class to model new APIs. If you want to refine existing API models,
|
||||
* extend `RequestHandler` instead.
|
||||
*
|
||||
* Only extend this class if you can't provide a `RouteSetup`, since we handle that case automatically.
|
||||
*/
|
||||
abstract class Range extends Function {
|
||||
/**
|
||||
* Gets a parameter that could receive parts of the url when handling incoming
|
||||
* requests, if any. These automatically become a `RemoteFlowSource`.
|
||||
*/
|
||||
abstract Parameter getARoutedParameter();
|
||||
|
||||
/** Gets a string that identifies the framework used for this request handler. */
|
||||
abstract string getFramework();
|
||||
}
|
||||
}
|
||||
|
||||
private class RequestHandlerFromRouteSetup extends RequestHandler::Range {
|
||||
RouteSetup rs;
|
||||
|
||||
RequestHandlerFromRouteSetup() { this = rs.getARequestHandler() }
|
||||
|
||||
override Parameter getARoutedParameter() {
|
||||
result = rs.getARoutedParameter() and
|
||||
result in [this.getArg(_), this.getArgByName(_)]
|
||||
}
|
||||
|
||||
override string getFramework() { result = rs.getFramework() }
|
||||
}
|
||||
|
||||
/** A parameter that will receive parts of the url when handling an incoming request. */
|
||||
private class RoutedParameter extends RemoteFlowSource::Range, DataFlow::ParameterNode {
|
||||
RequestHandler handler;
|
||||
|
||||
RoutedParameter() { this.getParameter() = handler.getARoutedParameter() }
|
||||
|
||||
override string getSourceType() { result = handler.getFramework() + " RoutedParameter" }
|
||||
}
|
||||
|
||||
/**
|
||||
* A data-flow node that creates a HTTP response on a server.
|
||||
*
|
||||
* Note: we don't require that this response must be sent to a client (a kind of
|
||||
* "if a tree falls in a forest and nobody hears it" situation).
|
||||
*
|
||||
* Extend this class to refine existing API models. If you want to model new APIs,
|
||||
* extend `HttpResponse::Range` instead.
|
||||
*/
|
||||
class HttpResponse extends DataFlow::Node {
|
||||
HttpResponse::Range range;
|
||||
|
||||
HttpResponse() { this = range }
|
||||
|
||||
/** Gets the data-flow node that specifies the body of this HTTP response. */
|
||||
DataFlow::Node getBody() { result = range.getBody() }
|
||||
|
||||
/** Gets the mimetype of this HTTP response, if it can be statically determined. */
|
||||
string getMimetype() { result = range.getMimetype() }
|
||||
}
|
||||
|
||||
/** Provides a class for modeling new HTTP response APIs. */
|
||||
module HttpResponse {
|
||||
/**
|
||||
* A data-flow node that creates a HTTP response on a server.
|
||||
*
|
||||
* Note: we don't require that this response must be sent to a client (a kind of
|
||||
* "if a tree falls in a forest and nobody hears it" situation).
|
||||
*
|
||||
* Extend this class to model new APIs. If you want to refine existing API models,
|
||||
* extend `HttpResponse` instead.
|
||||
*/
|
||||
abstract class Range extends DataFlow::Node {
|
||||
/** Gets the data-flow node that specifies the body of this HTTP response. */
|
||||
abstract DataFlow::Node getBody();
|
||||
|
||||
/** Gets the data-flow node that specifies the content-type/mimetype of this HTTP response, if any. */
|
||||
abstract DataFlow::Node getMimetypeOrContentTypeArg();
|
||||
|
||||
/** Gets the default mimetype that should be used if `getMimetypeOrContentTypeArg` has no results. */
|
||||
abstract string getMimetypeDefault();
|
||||
|
||||
/** Gets the mimetype of this HTTP response, if it can be statically determined. */
|
||||
string getMimetype() {
|
||||
exists(StrConst str |
|
||||
this.getMimetypeOrContentTypeArg().getALocalSource() = DataFlow::exprNode(str) and
|
||||
result = str.getText().splitAt(";", 0)
|
||||
)
|
||||
or
|
||||
not exists(this.getMimetypeOrContentTypeArg()) and
|
||||
result = this.getMimetypeDefault()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A data-flow node that creates a HTTP redirect response on a server.
|
||||
*
|
||||
* Note: we don't require that this redirect must be sent to a client (a kind of
|
||||
* "if a tree falls in a forest and nobody hears it" situation).
|
||||
*
|
||||
* Extend this class to refine existing API models. If you want to model new APIs,
|
||||
* extend `HttpRedirectResponse::Range` instead.
|
||||
*/
|
||||
class HttpRedirectResponse extends HttpResponse {
|
||||
override HttpRedirectResponse::Range range;
|
||||
|
||||
HttpRedirectResponse() { this = range }
|
||||
|
||||
/** Gets the data-flow node that specifies the location of this HTTP redirect response. */
|
||||
DataFlow::Node getRedirectLocation() { result = range.getRedirectLocation() }
|
||||
}
|
||||
|
||||
/** Provides a class for modeling new HTTP redirect response APIs. */
|
||||
module HttpRedirectResponse {
|
||||
/**
|
||||
* A data-flow node that creates a HTTP redirect response on a server.
|
||||
*
|
||||
* Note: we don't require that this redirect must be sent to a client (a kind of
|
||||
* "if a tree falls in a forest and nobody hears it" situation).
|
||||
*
|
||||
* Extend this class to model new APIs. If you want to refine existing API models,
|
||||
* extend `HttpResponse` instead.
|
||||
*/
|
||||
abstract class Range extends HTTP::Server::HttpResponse::Range {
|
||||
/** Gets the data-flow node that specifies the location of this HTTP redirect response. */
|
||||
abstract DataFlow::Node getRedirectLocation();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A data-flow node that sets a cookie in an HTTP response.
|
||||
*
|
||||
* Extend this class to refine existing API models. If you want to model new APIs,
|
||||
* extend `HTTP::CookieWrite::Range` instead.
|
||||
*/
|
||||
class CookieWrite extends DataFlow::Node {
|
||||
CookieWrite::Range range;
|
||||
|
||||
CookieWrite() { this = range }
|
||||
|
||||
/**
|
||||
* Gets the argument, if any, specifying the raw cookie header.
|
||||
*/
|
||||
DataFlow::Node getHeaderArg() { result = range.getHeaderArg() }
|
||||
|
||||
/**
|
||||
* Gets the argument, if any, specifying the cookie name.
|
||||
*/
|
||||
DataFlow::Node getNameArg() { result = range.getNameArg() }
|
||||
|
||||
/**
|
||||
* Gets the argument, if any, specifying the cookie value.
|
||||
*/
|
||||
DataFlow::Node getValueArg() { result = range.getValueArg() }
|
||||
}
|
||||
|
||||
/** Provides a class for modeling new cookie writes on HTTP responses. */
|
||||
module CookieWrite {
|
||||
/**
|
||||
* A data-flow node that sets a cookie in an HTTP response.
|
||||
*
|
||||
* Note: we don't require that this redirect must be sent to a client (a kind of
|
||||
* "if a tree falls in a forest and nobody hears it" situation).
|
||||
*
|
||||
* Extend this class to model new APIs. If you want to refine existing API models,
|
||||
* extend `HttpResponse` instead.
|
||||
*/
|
||||
abstract class Range extends DataFlow::Node {
|
||||
/**
|
||||
* Gets the argument, if any, specifying the raw cookie header.
|
||||
*/
|
||||
abstract DataFlow::Node getHeaderArg();
|
||||
|
||||
/**
|
||||
* Gets the argument, if any, specifying the cookie name.
|
||||
*/
|
||||
abstract DataFlow::Node getNameArg();
|
||||
|
||||
/**
|
||||
* Gets the argument, if any, specifying the cookie value.
|
||||
*/
|
||||
abstract DataFlow::Node getValueArg();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Provides models for cryptographic things.
|
||||
*
|
||||
* Note: The `CryptographicAlgorithm` class currently doesn't take weak keys into
|
||||
* consideration for the `isWeak` member predicate. So RSA is always considered
|
||||
* secure, although using a low number of bits will actually make it insecure. We plan
|
||||
* to improve our libraries in the future to more precisely capture this aspect.
|
||||
*/
|
||||
module Cryptography {
|
||||
/** Provides models for public-key cryptography, also called asymmetric cryptography. */
|
||||
module PublicKey {
|
||||
/**
|
||||
* A data-flow node that generates a new key-pair for use with public-key cryptography.
|
||||
*
|
||||
* Extend this class to refine existing API models. If you want to model new APIs,
|
||||
* extend `KeyGeneration::Range` instead.
|
||||
*/
|
||||
class KeyGeneration extends DataFlow::Node {
|
||||
KeyGeneration::Range range;
|
||||
|
||||
KeyGeneration() { this = range }
|
||||
|
||||
/** Gets the name of the cryptographic algorithm (for example `"RSA"` or `"AES"`). */
|
||||
string getName() { result = range.getName() }
|
||||
|
||||
/** Gets the argument that specifies the size of the key in bits, if available. */
|
||||
DataFlow::Node getKeySizeArg() { result = range.getKeySizeArg() }
|
||||
|
||||
/**
|
||||
* Gets the size of the key generated (in bits), as well as the `origin` that
|
||||
* explains how we obtained this specific key size.
|
||||
*/
|
||||
int getKeySizeWithOrigin(DataFlow::Node origin) {
|
||||
result = range.getKeySizeWithOrigin(origin)
|
||||
}
|
||||
|
||||
/** Gets the minimum key size (in bits) for this algorithm to be considered secure. */
|
||||
int minimumSecureKeySize() { result = range.minimumSecureKeySize() }
|
||||
}
|
||||
|
||||
/** Provides classes for modeling new key-pair generation APIs. */
|
||||
module KeyGeneration {
|
||||
/** Gets a back-reference to the keysize argument `arg` that was used to generate a new key-pair. */
|
||||
private DataFlow::TypeTrackingNode keysizeBacktracker(
|
||||
DataFlow::TypeBackTracker t, DataFlow::Node arg
|
||||
) {
|
||||
t.start() and
|
||||
arg = any(KeyGeneration::Range r).getKeySizeArg() and
|
||||
result = arg.getALocalSource()
|
||||
or
|
||||
exists(DataFlow::TypeBackTracker t2 | result = keysizeBacktracker(t2, arg).backtrack(t2, t))
|
||||
}
|
||||
|
||||
/** Gets a back-reference to the keysize argument `arg` that was used to generate a new key-pair. */
|
||||
DataFlow::LocalSourceNode keysizeBacktracker(DataFlow::Node arg) {
|
||||
result = keysizeBacktracker(DataFlow::TypeBackTracker::end(), arg)
|
||||
}
|
||||
|
||||
/**
|
||||
* A data-flow node that generates a new key-pair for use with public-key cryptography.
|
||||
*
|
||||
* Extend this class to model new APIs. If you want to refine existing API models,
|
||||
* extend `KeyGeneration` instead.
|
||||
*/
|
||||
abstract class Range extends DataFlow::Node {
|
||||
/** Gets the name of the cryptographic algorithm (for example `"RSA"`). */
|
||||
abstract string getName();
|
||||
|
||||
/** Gets the argument that specifies the size of the key in bits, if available. */
|
||||
abstract DataFlow::Node getKeySizeArg();
|
||||
|
||||
/**
|
||||
* Gets the size of the key generated (in bits), as well as the `origin` that
|
||||
* explains how we obtained this specific key size.
|
||||
*/
|
||||
int getKeySizeWithOrigin(DataFlow::Node origin) {
|
||||
origin = keysizeBacktracker(this.getKeySizeArg()) and
|
||||
result = origin.asExpr().(IntegerLiteral).getValue()
|
||||
}
|
||||
|
||||
/** Gets the minimum key size (in bits) for this algorithm to be considered secure. */
|
||||
abstract int minimumSecureKeySize();
|
||||
}
|
||||
|
||||
/** A data-flow node that generates a new RSA key-pair. */
|
||||
abstract class RsaRange extends Range {
|
||||
final override string getName() { result = "RSA" }
|
||||
|
||||
final override int minimumSecureKeySize() { result = 2048 }
|
||||
}
|
||||
|
||||
/** A data-flow node that generates a new DSA key-pair. */
|
||||
abstract class DsaRange extends Range {
|
||||
final override string getName() { result = "DSA" }
|
||||
|
||||
final override int minimumSecureKeySize() { result = 2048 }
|
||||
}
|
||||
|
||||
/** A data-flow node that generates a new ECC key-pair. */
|
||||
abstract class EccRange extends Range {
|
||||
final override string getName() { result = "ECC" }
|
||||
|
||||
final override int minimumSecureKeySize() { result = 224 }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
import semmle.python.concepts.CryptoAlgorithms
|
||||
|
||||
/**
|
||||
* A data-flow node that is an application of a cryptographic algorithm. For example,
|
||||
* encryption, decryption, signature-validation.
|
||||
*
|
||||
* Extend this class to refine existing API models. If you want to model new APIs,
|
||||
* extend `CryptographicOperation::Range` instead.
|
||||
*/
|
||||
class CryptographicOperation extends DataFlow::Node {
|
||||
CryptographicOperation::Range range;
|
||||
|
||||
CryptographicOperation() { this = range }
|
||||
|
||||
/** Gets the algorithm used, if it matches a known `CryptographicAlgorithm`. */
|
||||
CryptographicAlgorithm getAlgorithm() { result = range.getAlgorithm() }
|
||||
|
||||
/** Gets an input the algorithm is used on, for example the plain text input to be encrypted. */
|
||||
DataFlow::Node getAnInput() { result = range.getAnInput() }
|
||||
}
|
||||
|
||||
/** Provides classes for modeling new applications of a cryptographic algorithms. */
|
||||
module CryptographicOperation {
|
||||
/**
|
||||
* A data-flow node that is an application of a cryptographic algorithm. For example,
|
||||
* encryption, decryption, signature-validation.
|
||||
*
|
||||
* Extend this class to model new APIs. If you want to refine existing API models,
|
||||
* extend `CryptographicOperation` instead.
|
||||
*/
|
||||
abstract class Range extends DataFlow::Node {
|
||||
/** Gets the algorithm used, if it matches a known `CryptographicAlgorithm`. */
|
||||
abstract CryptographicAlgorithm getAlgorithm();
|
||||
|
||||
/** Gets an input the algorithm is used on, for example the plain text input to be encrypted. */
|
||||
abstract DataFlow::Node getAnInput();
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,34 +0,0 @@
|
||||
/** Standard builtin types and modules */
|
||||
|
||||
import python
|
||||
|
||||
/** the Python major version number */
|
||||
int major_version() {
|
||||
explicit_major_version(result)
|
||||
or
|
||||
not explicit_major_version(_) and
|
||||
/* If there is more than one version, prefer 2 for backwards compatibilty */
|
||||
(if py_flags_versioned("version.major", "2", "2") then result = 2 else result = 3)
|
||||
}
|
||||
|
||||
/** the Python minor version number */
|
||||
int minor_version() {
|
||||
exists(string v | py_flags_versioned("version.minor", v, major_version().toString()) |
|
||||
result = v.toInt()
|
||||
)
|
||||
}
|
||||
|
||||
/** the Python micro version number */
|
||||
int micro_version() {
|
||||
exists(string v | py_flags_versioned("version.micro", v, major_version().toString()) |
|
||||
result = v.toInt()
|
||||
)
|
||||
}
|
||||
|
||||
private predicate explicit_major_version(int v) {
|
||||
exists(string version | py_flags_versioned("language.version", version, _) |
|
||||
version.charAt(0) = "2" and v = 2
|
||||
or
|
||||
version.charAt(0) = "3" and v = 3
|
||||
)
|
||||
}
|
||||
@@ -1,741 +0,0 @@
|
||||
import python
|
||||
private import semmle.python.pointsto.PointsTo
|
||||
private import semmle.python.objects.ObjectInternal
|
||||
|
||||
/** An expression */
|
||||
class Expr extends Expr_, AstNode {
|
||||
/** Gets the scope of this expression */
|
||||
override Scope getScope() { py_scopes(this, result) }
|
||||
|
||||
/** Gets a textual representation of this element. */
|
||||
override string toString() { result = "Expression" }
|
||||
|
||||
/** Gets the module in which this expression occurs */
|
||||
Module getEnclosingModule() { result = this.getScope().getEnclosingModule() }
|
||||
|
||||
/**
|
||||
* Whether this expression defines variable `v`
|
||||
* If doing dataflow, then consider using SsaVariable.getDefinition() for more precision.
|
||||
*/
|
||||
predicate defines(Variable v) { this.getASubExpression+().defines(v) }
|
||||
|
||||
/** Whether this expression may have a side effect (as determined purely from its syntax) */
|
||||
predicate hasSideEffects() {
|
||||
/* If an exception raised by this expression handled, count that as a side effect */
|
||||
this.getAFlowNode().getASuccessor().getNode() instanceof ExceptStmt
|
||||
or
|
||||
this.getASubExpression().hasSideEffects()
|
||||
}
|
||||
|
||||
/** Whether this expression is a constant */
|
||||
predicate isConstant() { not this.isVariable() }
|
||||
|
||||
/** Use isParenthesized instead. */
|
||||
deprecated override predicate isParenthesised() { this.isParenthesized() }
|
||||
|
||||
/** Whether the parenthesized property of this expression is true. */
|
||||
predicate isParenthesized() { Expr_.super.isParenthesised() }
|
||||
|
||||
private predicate isVariable() {
|
||||
this.hasSideEffects()
|
||||
or
|
||||
this instanceof Name
|
||||
or
|
||||
exists(Expr e | e = this.getASubExpression() and e.isVariable())
|
||||
}
|
||||
|
||||
override Location getLocation() { result = Expr_.super.getLocation() }
|
||||
|
||||
/** Gets an immediate (non-nested) sub-expression of this expression */
|
||||
Expr getASubExpression() { none() }
|
||||
|
||||
/** Use StrConst.getText() instead */
|
||||
deprecated string strValue() { none() }
|
||||
|
||||
override AstNode getAChildNode() { result = this.getASubExpression() }
|
||||
|
||||
/**
|
||||
* NOTE: `refersTo` will be deprecated in 2019. Use `pointsTo` instead.
|
||||
* Gets what this expression might "refer-to". Performs a combination of localized (intra-procedural) points-to
|
||||
* analysis and global module-level analysis. This points-to analysis favours precision over recall. It is highly
|
||||
* precise, but may not provide information for a significant number of flow-nodes.
|
||||
* If the class is unimportant then use `refersTo(value)` or `refersTo(value, origin)` instead.
|
||||
* NOTE: For complex dataflow, involving multiple stages of points-to analysis, it may be more precise to use
|
||||
* `ControlFlowNode.refersTo(...)` instead.
|
||||
*/
|
||||
predicate refersTo(Object obj, ClassObject cls, AstNode origin) {
|
||||
this.refersTo(_, obj, cls, origin)
|
||||
}
|
||||
|
||||
/**
|
||||
* NOTE: `refersTo` will be deprecated in 2019. Use `pointsTo` instead.
|
||||
* Gets what this expression might "refer-to" in the given `context`.
|
||||
*/
|
||||
predicate refersTo(Context context, Object obj, ClassObject cls, AstNode origin) {
|
||||
this.getAFlowNode().refersTo(context, obj, cls, origin.getAFlowNode())
|
||||
}
|
||||
|
||||
/**
|
||||
* NOTE: `refersTo` will be deprecated in 2019. Use `pointsTo` instead.
|
||||
* Holds if this expression might "refer-to" to `value` which is from `origin`
|
||||
* Unlike `this.refersTo(value, _, origin)`, this predicate includes results
|
||||
* where the class cannot be inferred.
|
||||
*/
|
||||
pragma[nomagic]
|
||||
predicate refersTo(Object obj, AstNode origin) {
|
||||
this.getAFlowNode().refersTo(obj, origin.getAFlowNode())
|
||||
}
|
||||
|
||||
/**
|
||||
* NOTE: `refersTo` will be deprecated in 2019. Use `pointsTo` instead.
|
||||
* Equivalent to `this.refersTo(value, _)`
|
||||
*/
|
||||
predicate refersTo(Object obj) { this.refersTo(obj, _) }
|
||||
|
||||
/**
|
||||
* Holds if this expression might "point-to" to `value` which is from `origin`
|
||||
* in the given `context`.
|
||||
*/
|
||||
predicate pointsTo(Context context, Value value, AstNode origin) {
|
||||
this.getAFlowNode().pointsTo(context, value, origin.getAFlowNode())
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if this expression might "point-to" to `value` which is from `origin`.
|
||||
*/
|
||||
predicate pointsTo(Value value, AstNode origin) {
|
||||
this.getAFlowNode().pointsTo(value, origin.getAFlowNode())
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if this expression might "point-to" to `value`.
|
||||
*/
|
||||
predicate pointsTo(Value value) { this.pointsTo(value, _) }
|
||||
|
||||
/** Gets a value that this expression might "point-to". */
|
||||
Value pointsTo() { this.pointsTo(result) }
|
||||
}
|
||||
|
||||
/** An assignment expression, such as `x := y` */
|
||||
class AssignExpr extends AssignExpr_ {
|
||||
override Expr getASubExpression() {
|
||||
result = this.getValue() or
|
||||
result = this.getTarget()
|
||||
}
|
||||
}
|
||||
|
||||
/** An attribute expression, such as `value.attr` */
|
||||
class Attribute extends Attribute_ {
|
||||
/* syntax: Expr.name */
|
||||
override Expr getASubExpression() { result = this.getObject() }
|
||||
|
||||
override AttrNode getAFlowNode() { result = super.getAFlowNode() }
|
||||
|
||||
/** Gets the name of this attribute. That is the `name` in `obj.name` */
|
||||
string getName() { result = Attribute_.super.getAttr() }
|
||||
|
||||
/** Gets the object of this attribute. That is the `obj` in `obj.name` */
|
||||
Expr getObject() { result = Attribute_.super.getValue() }
|
||||
|
||||
/**
|
||||
* Gets the expression corresponding to the object of the attribute, if the name of the attribute is `name`.
|
||||
* Equivalent to `this.getObject() and this.getName() = name`.
|
||||
*/
|
||||
Expr getObject(string name) {
|
||||
result = Attribute_.super.getValue() and
|
||||
name = Attribute_.super.getAttr()
|
||||
}
|
||||
}
|
||||
|
||||
/** A subscript expression, such as `value[slice]` */
|
||||
class Subscript extends Subscript_ {
|
||||
/* syntax: Expr[Expr] */
|
||||
override Expr getASubExpression() {
|
||||
result = this.getIndex()
|
||||
or
|
||||
result = this.getObject()
|
||||
}
|
||||
|
||||
Expr getObject() { result = Subscript_.super.getValue() }
|
||||
|
||||
override SubscriptNode getAFlowNode() { result = super.getAFlowNode() }
|
||||
}
|
||||
|
||||
/** A call expression, such as `func(...)` */
|
||||
class Call extends Call_ {
|
||||
/* syntax: Expr(...) */
|
||||
override Expr getASubExpression() {
|
||||
result = this.getAPositionalArg() or
|
||||
result = this.getAKeyword().getValue() or
|
||||
result = this.getFunc()
|
||||
}
|
||||
|
||||
override predicate hasSideEffects() { any() }
|
||||
|
||||
override string toString() { result = this.getFunc().toString() + "()" }
|
||||
|
||||
override CallNode getAFlowNode() { result = super.getAFlowNode() }
|
||||
|
||||
/** Gets a tuple (*) argument of this call. */
|
||||
Expr getStarargs() { result = this.getAPositionalArg().(Starred).getValue() }
|
||||
|
||||
/** Gets a dictionary (**) argument of this call. */
|
||||
Expr getKwargs() { result = this.getANamedArg().(DictUnpacking).getValue() }
|
||||
|
||||
/* Backwards compatibility */
|
||||
/**
|
||||
* Gets the nth keyword argument of this call expression, provided it is not preceded by a double-starred argument.
|
||||
* This exists primarily for backwards compatibility. You are recommended to use
|
||||
* Call.getNamedArg(index) instead.
|
||||
*/
|
||||
Keyword getKeyword(int index) {
|
||||
result = this.getNamedArg(index) and
|
||||
not exists(DictUnpacking d, int lower | d = this.getNamedArg(lower) and lower < index)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a keyword argument of this call expression, provided it is not preceded by a double-starred argument.
|
||||
* This exists primarily for backwards compatibility. You are recommended to use
|
||||
* Call.getANamedArg() instead.
|
||||
*/
|
||||
Keyword getAKeyword() { result = this.getKeyword(_) }
|
||||
|
||||
/**
|
||||
* Gets the positional argument at `index`, provided it is not preceded by a starred argument.
|
||||
* This exists primarily for backwards compatibility. You are recommended to use
|
||||
* Call.getPositionalArg(index) instead.
|
||||
*/
|
||||
Expr getArg(int index) {
|
||||
result = this.getPositionalArg(index) and
|
||||
not result instanceof Starred and
|
||||
not exists(Starred s, int lower | s = this.getPositionalArg(lower) and lower < index)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a positional argument, provided it is not preceded by a starred argument.
|
||||
* This exists primarily for backwards compatibility. You are recommended to use
|
||||
* Call.getAPositionalArg() instead.
|
||||
*/
|
||||
Expr getAnArg() { result = this.getArg(_) }
|
||||
|
||||
override AstNode getAChildNode() {
|
||||
result = this.getAPositionalArg() or
|
||||
result = this.getANamedArg() or
|
||||
result = this.getFunc()
|
||||
}
|
||||
|
||||
/** Gets the name of a named argument, including those passed in dict literals. */
|
||||
string getANamedArgumentName() {
|
||||
result = this.getAKeyword().getArg()
|
||||
or
|
||||
result = this.getKwargs().(Dict).getAKey().(StrConst).getText()
|
||||
}
|
||||
|
||||
/** Gets the positional argument count of this call, provided there is no more than one tuple (*) argument. */
|
||||
int getPositionalArgumentCount() {
|
||||
count(this.getStarargs()) < 2 and
|
||||
result = count(Expr arg | arg = this.getAPositionalArg() and not arg instanceof Starred)
|
||||
}
|
||||
|
||||
/** Gets the tuple (*) argument of this call, provided there is exactly one. */
|
||||
Expr getStarArg() {
|
||||
count(this.getStarargs()) < 2 and
|
||||
result = getStarargs()
|
||||
}
|
||||
}
|
||||
|
||||
/** A conditional expression such as, `body if test else orelse` */
|
||||
class IfExp extends IfExp_ {
|
||||
/* syntax: Expr if Expr else Expr */
|
||||
override Expr getASubExpression() {
|
||||
result = this.getTest() or result = this.getBody() or result = this.getOrelse()
|
||||
}
|
||||
|
||||
override IfExprNode getAFlowNode() { result = super.getAFlowNode() }
|
||||
}
|
||||
|
||||
/** A starred expression, such as the `*rest` in the assignment `first, *rest = seq` */
|
||||
class Starred extends Starred_ {
|
||||
/* syntax: *Expr */
|
||||
override Expr getASubExpression() { result = this.getValue() }
|
||||
}
|
||||
|
||||
/** A yield expression, such as `yield value` */
|
||||
class Yield extends Yield_ {
|
||||
/* syntax: yield Expr */
|
||||
override Expr getASubExpression() { result = this.getValue() }
|
||||
|
||||
override predicate hasSideEffects() { any() }
|
||||
}
|
||||
|
||||
/** A yield expression, such as `yield from value` */
|
||||
class YieldFrom extends YieldFrom_ {
|
||||
/* syntax: yield from Expr */
|
||||
override Expr getASubExpression() { result = this.getValue() }
|
||||
|
||||
override predicate hasSideEffects() { any() }
|
||||
}
|
||||
|
||||
/** A repr (backticks) expression, such as `` `value` `` */
|
||||
class Repr extends Repr_ {
|
||||
/* syntax: `Expr` */
|
||||
override Expr getASubExpression() { result = this.getValue() }
|
||||
|
||||
override predicate hasSideEffects() { any() }
|
||||
}
|
||||
|
||||
/* Constants */
|
||||
/**
|
||||
* A bytes constant, such as `b'ascii'`. Note that unadorned string constants such as
|
||||
* `"hello"` are treated as Bytes for Python2, but Unicode for Python3.
|
||||
*/
|
||||
class Bytes extends StrConst {
|
||||
/* syntax: b"hello" */
|
||||
Bytes() { not this.isUnicode() }
|
||||
|
||||
override Object getLiteralObject() {
|
||||
py_cobjecttypes(result, theBytesType()) and
|
||||
py_cobjectnames(result, this.quotedString())
|
||||
}
|
||||
|
||||
/**
|
||||
* The extractor puts quotes into the name of each string (to prevent "0" clashing with 0).
|
||||
* The following predicate help us match up a string/byte literals in the source
|
||||
* which the equivalent object.
|
||||
*/
|
||||
private string quotedString() {
|
||||
exists(string b_unquoted | b_unquoted = this.getS() | result = "b'" + b_unquoted + "'")
|
||||
}
|
||||
}
|
||||
|
||||
/** An ellipsis expression, such as `...` */
|
||||
class Ellipsis extends Ellipsis_ {
|
||||
/* syntax: ... */
|
||||
override Expr getASubExpression() { none() }
|
||||
}
|
||||
|
||||
/**
|
||||
* Immutable literal expressions (except tuples).
|
||||
* Consists of string (both unicode and byte) literals and numeric literals.
|
||||
*/
|
||||
abstract class ImmutableLiteral extends Expr {
|
||||
abstract Object getLiteralObject();
|
||||
|
||||
abstract boolean booleanValue();
|
||||
|
||||
final Value getLiteralValue() { result.(ConstantObjectInternal).getLiteral() = this }
|
||||
}
|
||||
|
||||
/** A numerical constant expression, such as `7` or `4.2` */
|
||||
abstract class Num extends Num_, ImmutableLiteral {
|
||||
override Expr getASubExpression() { none() }
|
||||
|
||||
/* We want to declare this abstract, but currently we cannot. */
|
||||
override string toString() { result = "Num with missing toString" }
|
||||
}
|
||||
|
||||
/** An integer numeric constant, such as `7` or `0x9` */
|
||||
class IntegerLiteral extends Num {
|
||||
/* syntax: 4 */
|
||||
IntegerLiteral() { not this instanceof FloatLiteral and not this instanceof ImaginaryLiteral }
|
||||
|
||||
/**
|
||||
* Gets the (integer) value of this constant. Will not return a result if the value does not fit into
|
||||
* a 32 bit signed value
|
||||
*/
|
||||
int getValue() { result = this.getN().toInt() }
|
||||
|
||||
override string toString() { result = "IntegerLiteral" }
|
||||
|
||||
override Object getLiteralObject() {
|
||||
py_cobjecttypes(result, theIntType()) and py_cobjectnames(result, this.getN())
|
||||
or
|
||||
py_cobjecttypes(result, theLongType()) and py_cobjectnames(result, this.getN())
|
||||
}
|
||||
|
||||
override boolean booleanValue() {
|
||||
this.getValue() = 0 and result = false
|
||||
or
|
||||
this.getValue() != 0 and result = true
|
||||
}
|
||||
}
|
||||
|
||||
/** A floating point numeric constant, such as `0.4` or `4e3` */
|
||||
class FloatLiteral extends Num {
|
||||
/* syntax: 4.2 */
|
||||
FloatLiteral() {
|
||||
not this instanceof ImaginaryLiteral and
|
||||
this.getN().regexpMatch(".*[.eE].*")
|
||||
}
|
||||
|
||||
float getValue() { result = this.getN().toFloat() }
|
||||
|
||||
override string toString() { result = "FloatLiteral" }
|
||||
|
||||
override Object getLiteralObject() {
|
||||
py_cobjecttypes(result, theFloatType()) and py_cobjectnames(result, this.getN())
|
||||
}
|
||||
|
||||
override boolean booleanValue() {
|
||||
this.getValue() = 0.0 and result = false
|
||||
or
|
||||
// In QL 0.0 != -0.0
|
||||
this.getValue() = -0.0 and result = false
|
||||
or
|
||||
this.getValue() != 0.0 and this.getValue() != -0.0 and result = true
|
||||
}
|
||||
}
|
||||
|
||||
/** An imaginary numeric constant, such as `3j` */
|
||||
class ImaginaryLiteral extends Num {
|
||||
private float value;
|
||||
|
||||
/* syntax: 1.0j */
|
||||
ImaginaryLiteral() { value = this.getN().regexpCapture("(.+)j.*", 1).toFloat() }
|
||||
|
||||
/** Gets the value of this constant as a floating point value */
|
||||
float getValue() { result = value }
|
||||
|
||||
override string toString() { result = "ImaginaryLiteral" }
|
||||
|
||||
override Object getLiteralObject() {
|
||||
py_cobjecttypes(result, theComplexType()) and py_cobjectnames(result, this.getN())
|
||||
}
|
||||
|
||||
override boolean booleanValue() {
|
||||
this.getValue() = 0.0 and result = false
|
||||
or
|
||||
// In QL 0.0 != -0.0
|
||||
this.getValue() = -0.0 and result = false
|
||||
or
|
||||
this.getValue() != 0.0 and this.getValue() != -0.0 and result = true
|
||||
}
|
||||
}
|
||||
|
||||
class NegativeIntegerLiteral extends ImmutableLiteral, UnaryExpr {
|
||||
NegativeIntegerLiteral() {
|
||||
this.getOp() instanceof USub and
|
||||
this.getOperand() instanceof IntegerLiteral
|
||||
}
|
||||
|
||||
override boolean booleanValue() { result = this.getOperand().(IntegerLiteral).booleanValue() }
|
||||
|
||||
override Object getLiteralObject() {
|
||||
(py_cobjecttypes(result, theIntType()) or py_cobjecttypes(result, theLongType())) and
|
||||
py_cobjectnames(result, "-" + this.getOperand().(IntegerLiteral).getN())
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the (integer) value of this constant. Will not return a result if the value does not fit into
|
||||
* a 32 bit signed value
|
||||
*/
|
||||
int getValue() { result = -this.getOperand().(IntegerLiteral).getValue() }
|
||||
}
|
||||
|
||||
/**
|
||||
* A unicode string expression, such as `u"\u20ac"`. Note that unadorned string constants such as
|
||||
* "hello" are treated as Bytes for Python2, but Unicode for Python3.
|
||||
*/
|
||||
class Unicode extends StrConst {
|
||||
/* syntax: "hello" */
|
||||
Unicode() { this.isUnicode() }
|
||||
|
||||
override Object getLiteralObject() {
|
||||
py_cobjecttypes(result, theUnicodeType()) and
|
||||
py_cobjectnames(result, this.quotedString())
|
||||
}
|
||||
|
||||
/**
|
||||
* The extractor puts quotes into the name of each string (to prevent "0" clashing with 0).
|
||||
* The following predicate help us match up a string/byte literals in the source
|
||||
* which the equivalent object.
|
||||
*/
|
||||
string quotedString() {
|
||||
exists(string u_unquoted | u_unquoted = this.getS() | result = "u'" + u_unquoted + "'")
|
||||
}
|
||||
}
|
||||
|
||||
/* Compound Values */
|
||||
/** A dictionary expression, such as `{'key':'value'}` */
|
||||
class Dict extends Dict_ {
|
||||
/* syntax: {Expr: Expr, ...} */
|
||||
/** Gets the value of an item of this dict display */
|
||||
Expr getAValue() { result = this.getAnItem().(DictDisplayItem).getValue() }
|
||||
|
||||
/**
|
||||
* Gets the key of an item of this dict display, for those items that have keys
|
||||
* E.g, in {'a':1, **b} this returns only 'a'
|
||||
*/
|
||||
Expr getAKey() { result = this.getAnItem().(KeyValuePair).getKey() }
|
||||
|
||||
override Expr getASubExpression() { result = this.getAValue() or result = this.getAKey() }
|
||||
|
||||
override AstNode getAChildNode() { result = this.getAnItem() }
|
||||
}
|
||||
|
||||
/** A list expression, such as `[ 1, 3, 5, 7, 9 ]` */
|
||||
class List extends List_ {
|
||||
/* syntax: [Expr, ...] */
|
||||
override Expr getASubExpression() { result = this.getAnElt() }
|
||||
}
|
||||
|
||||
/** A set expression such as `{ 1, 3, 5, 7, 9 }` */
|
||||
class Set extends Set_ {
|
||||
/* syntax: {Expr, ...} */
|
||||
override Expr getASubExpression() { result = this.getAnElt() }
|
||||
}
|
||||
|
||||
class PlaceHolder extends PlaceHolder_ {
|
||||
string getId() { result = this.getVariable().getId() }
|
||||
|
||||
override Expr getASubExpression() { none() }
|
||||
|
||||
override string toString() { result = "$" + this.getId() }
|
||||
|
||||
override NameNode getAFlowNode() { result = super.getAFlowNode() }
|
||||
}
|
||||
|
||||
/** A tuple expression such as `( 1, 3, 5, 7, 9 )` */
|
||||
class Tuple extends Tuple_ {
|
||||
/* syntax: (Expr, ...) */
|
||||
override Expr getASubExpression() { result = this.getAnElt() }
|
||||
}
|
||||
|
||||
/**
|
||||
* A (plain variable) name expression, such as `var`.
|
||||
* `None`, `True` and `False` are excluded.
|
||||
*/
|
||||
class Name extends Name_ {
|
||||
/* syntax: name */
|
||||
string getId() { result = this.getVariable().getId() }
|
||||
|
||||
/** Whether this expression is a definition */
|
||||
predicate isDefinition() {
|
||||
py_expr_contexts(_, 5, this)
|
||||
or
|
||||
/* Treat Param as a definition (which it is) */
|
||||
py_expr_contexts(_, 4, this)
|
||||
or
|
||||
/* The target in an augmented assignment is also a definition (and a use) */
|
||||
exists(AugAssign aa | aa.getTarget() = this)
|
||||
}
|
||||
|
||||
/**
|
||||
* Whether this expression defines variable `v`
|
||||
* If doing dataflow, then consider using SsaVariable.getDefinition() for more precision.
|
||||
*/
|
||||
override predicate defines(Variable v) {
|
||||
this.isDefinition() and
|
||||
v = this.getVariable()
|
||||
}
|
||||
|
||||
/** Whether this expression is a deletion */
|
||||
predicate isDeletion() { py_expr_contexts(_, 2, this) }
|
||||
|
||||
/**
|
||||
* Whether this expression deletes variable `v`.
|
||||
* If doing dataflow, then consider using SsaVariable.getDefinition() for more precision.
|
||||
*/
|
||||
predicate deletes(Variable v) {
|
||||
this.isDeletion() and
|
||||
v = this.getVariable()
|
||||
}
|
||||
|
||||
/** Whether this expression is a use */
|
||||
predicate isUse() { py_expr_contexts(_, 3, this) }
|
||||
|
||||
/**
|
||||
* Whether this expression is a use of variable `v`
|
||||
* If doing dataflow, then consider using SsaVariable.getAUse() for more precision.
|
||||
*/
|
||||
predicate uses(Variable v) {
|
||||
this.isUse() and
|
||||
v = this.getVariable()
|
||||
}
|
||||
|
||||
override predicate isConstant() { none() }
|
||||
|
||||
override Expr getASubExpression() { none() }
|
||||
|
||||
override string toString() { result = this.getId() }
|
||||
|
||||
override NameNode getAFlowNode() { result = super.getAFlowNode() }
|
||||
|
||||
override predicate isArtificial() {
|
||||
/* Artificial variable names in comprehensions all start with "." */
|
||||
this.getId().charAt(0) = "."
|
||||
}
|
||||
}
|
||||
|
||||
class Filter extends Filter_ {
|
||||
override Expr getASubExpression() {
|
||||
result = this.getFilter()
|
||||
or
|
||||
result = this.getValue()
|
||||
}
|
||||
}
|
||||
|
||||
/** A slice. E.g `0:1` in the expression `x[0:1]` */
|
||||
class Slice extends Slice_ {
|
||||
override Expr getASubExpression() {
|
||||
result = this.getStart() or
|
||||
result = this.getStop() or
|
||||
result = this.getStep()
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns all string prefixes in the database that are explicitly marked as Unicode strings.
|
||||
*
|
||||
* Helper predicate for `StrConst::isUnicode`.
|
||||
*/
|
||||
pragma[nomagic]
|
||||
private string unicode_prefix() {
|
||||
result = any(Str_ s).getPrefix() and
|
||||
result.charAt(_) in ["u", "U"]
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns all string prefixes in the database that are _not_ explicitly marked as bytestrings.
|
||||
*
|
||||
* Helper predicate for `StrConst::isUnicode`.
|
||||
*/
|
||||
pragma[nomagic]
|
||||
private string non_byte_prefix() {
|
||||
result = any(Str_ s).getPrefix() and
|
||||
not result.charAt(_) in ["b", "B"]
|
||||
}
|
||||
|
||||
/** A string constant. */
|
||||
class StrConst extends Str_, ImmutableLiteral {
|
||||
/* syntax: "hello" */
|
||||
predicate isUnicode() {
|
||||
this.getPrefix() = unicode_prefix()
|
||||
or
|
||||
this.getPrefix() = non_byte_prefix() and
|
||||
(
|
||||
major_version() = 3
|
||||
or
|
||||
this.getEnclosingModule().hasFromFuture("unicode_literals")
|
||||
)
|
||||
}
|
||||
|
||||
deprecated override string strValue() { result = this.getS() }
|
||||
|
||||
override Expr getASubExpression() { none() }
|
||||
|
||||
override AstNode getAChildNode() { result = this.getAnImplicitlyConcatenatedPart() }
|
||||
|
||||
/** Gets the text of this str constant */
|
||||
string getText() { result = this.getS() }
|
||||
|
||||
/** Whether this is a docstring */
|
||||
predicate isDocString() { exists(Scope s | s.getDocString() = this) }
|
||||
|
||||
override boolean booleanValue() {
|
||||
this.getText() = "" and result = false
|
||||
or
|
||||
this.getText() != "" and result = true
|
||||
}
|
||||
|
||||
override Object getLiteralObject() { none() }
|
||||
}
|
||||
|
||||
private predicate name_consts(Name_ n, string id) {
|
||||
exists(Variable v | py_variables(v, n) and id = v.getId() |
|
||||
id = "True" or id = "False" or id = "None"
|
||||
)
|
||||
}
|
||||
|
||||
/** A named constant, one of `None`, `True` or `False` */
|
||||
abstract class NameConstant extends Name, ImmutableLiteral {
|
||||
NameConstant() { name_consts(this, _) }
|
||||
|
||||
override Expr getASubExpression() { none() }
|
||||
|
||||
override string toString() { name_consts(this, result) }
|
||||
|
||||
override predicate isConstant() { any() }
|
||||
|
||||
override NameConstantNode getAFlowNode() { result = Name.super.getAFlowNode() }
|
||||
|
||||
override predicate isArtificial() { none() }
|
||||
}
|
||||
|
||||
/** A boolean named constant, either `True` or `False` */
|
||||
abstract class BooleanLiteral extends NameConstant { }
|
||||
|
||||
/** The boolean named constant `True` */
|
||||
class True extends BooleanLiteral {
|
||||
/* syntax: True */
|
||||
True() { name_consts(this, "True") }
|
||||
|
||||
override Object getLiteralObject() { name_consts(this, "True") and result = theTrueObject() }
|
||||
|
||||
override boolean booleanValue() { result = true }
|
||||
}
|
||||
|
||||
/** The boolean named constant `False` */
|
||||
class False extends BooleanLiteral {
|
||||
/* syntax: False */
|
||||
False() { name_consts(this, "False") }
|
||||
|
||||
override Object getLiteralObject() { name_consts(this, "False") and result = theFalseObject() }
|
||||
|
||||
override boolean booleanValue() { result = false }
|
||||
}
|
||||
|
||||
/** `None` */
|
||||
class None extends NameConstant {
|
||||
/* syntax: None */
|
||||
None() { name_consts(this, "None") }
|
||||
|
||||
override Object getLiteralObject() { name_consts(this, "None") and result = theNoneObject() }
|
||||
|
||||
override boolean booleanValue() { result = false }
|
||||
}
|
||||
|
||||
/** An await expression such as `await coro`. */
|
||||
class Await extends Await_ {
|
||||
/* syntax: await Expr */
|
||||
override Expr getASubExpression() { result = this.getValue() }
|
||||
}
|
||||
|
||||
/** A formatted string literal expression, such as `f'hello {world!s}'` */
|
||||
class Fstring extends Fstring_ {
|
||||
/* syntax: f"Yes!" */
|
||||
override Expr getASubExpression() { result = this.getAValue() }
|
||||
}
|
||||
|
||||
/**
|
||||
* A formatted value (within a formatted string literal).
|
||||
* For example, in the string `f'hello {world!s}'` the formatted value is `world!s`.
|
||||
*/
|
||||
class FormattedValue extends FormattedValue_ {
|
||||
override Expr getASubExpression() {
|
||||
result = this.getValue() or
|
||||
result = this.getFormatSpec()
|
||||
}
|
||||
}
|
||||
|
||||
/* Expression Contexts */
|
||||
/** A context in which an expression used */
|
||||
class ExprContext extends ExprContext_ { }
|
||||
|
||||
/** Load context, the context of var in len(var) */
|
||||
class Load extends Load_ { }
|
||||
|
||||
/** Store context, the context of var in var = 0 */
|
||||
class Store extends Store_ { }
|
||||
|
||||
/** Delete context, the context of var in del var */
|
||||
class Del extends Del_ { }
|
||||
|
||||
/** This is an artifact of the Python grammar which includes an AugLoad context, even though it is never used. */
|
||||
library class AugLoad extends AugLoad_ { }
|
||||
|
||||
/** Augmented store context, the context of var in var += 1 */
|
||||
class AugStore extends AugStore_ { }
|
||||
|
||||
/** Parameter context, the context of var in def f(var): pass */
|
||||
class Param extends Param_ { }
|
||||
@@ -1,523 +0,0 @@
|
||||
import python
|
||||
|
||||
/** A file */
|
||||
class File extends Container {
|
||||
File() { files(this, _, _, _, _) }
|
||||
|
||||
/** DEPRECATED: Use `getAbsolutePath` instead. */
|
||||
deprecated override string getName() { result = this.getAbsolutePath() }
|
||||
|
||||
/** DEPRECATED: Use `getAbsolutePath` instead. */
|
||||
deprecated string getFullName() { result = this.getAbsolutePath() }
|
||||
|
||||
/**
|
||||
* Holds if this element is at the specified location.
|
||||
* The location spans column `startcolumn` of line `startline` to
|
||||
* column `endcolumn` of line `endline` in file `filepath`.
|
||||
* For more information, see
|
||||
* [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html).
|
||||
*/
|
||||
predicate hasLocationInfo(
|
||||
string filepath, int startline, int startcolumn, int endline, int endcolumn
|
||||
) {
|
||||
this.getAbsolutePath() = filepath and
|
||||
startline = 0 and
|
||||
startcolumn = 0 and
|
||||
endline = 0 and
|
||||
endcolumn = 0
|
||||
}
|
||||
|
||||
/** Whether this file is a source code file. */
|
||||
predicate fromSource() {
|
||||
/* If we start to analyse .pyc files, then this will have to change. */
|
||||
any()
|
||||
}
|
||||
|
||||
/** Gets a short name for this file (just the file name) */
|
||||
string getShortName() {
|
||||
exists(string simple, string ext | files(this, _, simple, ext, _) | result = simple + ext)
|
||||
}
|
||||
|
||||
private int lastLine() {
|
||||
result = max(int i | exists(Location l | l.getFile() = this and l.getEndLine() = i))
|
||||
}
|
||||
|
||||
/** Whether line n is empty (it contains neither code nor comment). */
|
||||
predicate emptyLine(int n) {
|
||||
n in [0 .. this.lastLine()] and
|
||||
not occupied_line(this, n)
|
||||
}
|
||||
|
||||
string getSpecifiedEncoding() {
|
||||
exists(Comment c, Location l | l = c.getLocation() and l.getFile() = this |
|
||||
l.getStartLine() < 3 and
|
||||
result = c.getText().regexpCapture(".*coding[:=]\\s*([-\\w.]+).*", 1)
|
||||
)
|
||||
}
|
||||
|
||||
override string getAbsolutePath() { files(this, result, _, _, _) }
|
||||
|
||||
/** Gets the URL of this file. */
|
||||
override string getURL() { result = "file://" + this.getAbsolutePath() + ":0:0:0:0" }
|
||||
|
||||
override Container getImportRoot(int n) {
|
||||
/* File stem must be a legal Python identifier */
|
||||
this.getStem().regexpMatch("[^\\d\\W]\\w*") and
|
||||
result = this.getParent().getImportRoot(n)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the contents of this file as a string.
|
||||
* This will only work for those non-python files that
|
||||
* are specified to be extracted.
|
||||
*/
|
||||
string getContents() { file_contents(this, result) }
|
||||
|
||||
/** Holds if this file is likely to get executed directly, and thus act as an entry point for execution. */
|
||||
predicate isPossibleEntryPoint() {
|
||||
// Only consider files in the source code, and not things like the standard library
|
||||
exists(this.getRelativePath()) and
|
||||
(
|
||||
// The file doesn't have the extension `.py` but still contains Python statements
|
||||
not this.getExtension().matches("py%") and
|
||||
exists(Stmt s | s.getLocation().getFile() = this)
|
||||
or
|
||||
// The file contains the usual `if __name__ == '__main__':` construction
|
||||
exists(If i, Name name, StrConst main, Cmpop op |
|
||||
i.getScope().(Module).getFile() = this and
|
||||
op instanceof Eq and
|
||||
i.getTest().(Compare).compares(name, op, main) and
|
||||
name.getId() = "__name__" and
|
||||
main.getText() = "__main__"
|
||||
) and
|
||||
// Exclude files named `__main__.py`. These are often _not_ meant to be run directly, but
|
||||
// contain this construct anyway.
|
||||
//
|
||||
// Their presence in a package (say, `foo`) means one can execute the package directly using
|
||||
// `python -m foo` (which will run the `foo/__main__.py` file). Since being an entry point for
|
||||
// execution means treating imports as absolute, this causes trouble, since when run with
|
||||
// `python -m`, the interpreter uses the usual package semantics.
|
||||
not this.getShortName() = "__main__.py"
|
||||
or
|
||||
// The file contains a `#!` line referencing the python interpreter
|
||||
exists(Comment c |
|
||||
c.getLocation().getFile() = this and
|
||||
c.getLocation().getStartLine() = 1 and
|
||||
c.getText().regexpMatch("^#! */.*python(2|3)?[ \\\\t]*$")
|
||||
)
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
private predicate occupied_line(File f, int n) {
|
||||
exists(Location l | l.getFile() = f |
|
||||
l.getStartLine() = n
|
||||
or
|
||||
exists(StrConst s | s.getLocation() = l | n in [l.getStartLine() .. l.getEndLine()])
|
||||
)
|
||||
}
|
||||
|
||||
/** A folder (directory) */
|
||||
class Folder extends Container {
|
||||
Folder() { folders(this, _, _) }
|
||||
|
||||
/** DEPRECATED: Use `getAbsolutePath` instead. */
|
||||
deprecated override string getName() { result = this.getAbsolutePath() }
|
||||
|
||||
/** DEPRECATED: Use `getBaseName` instead. */
|
||||
deprecated string getSimple() { folders(this, _, result) }
|
||||
|
||||
/**
|
||||
* Holds if this element is at the specified location.
|
||||
* The location spans column `startcolumn` of line `startline` to
|
||||
* column `endcolumn` of line `endline` in file `filepath`.
|
||||
* For more information, see
|
||||
* [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html).
|
||||
*/
|
||||
predicate hasLocationInfo(
|
||||
string filepath, int startline, int startcolumn, int endline, int endcolumn
|
||||
) {
|
||||
this.getAbsolutePath() = filepath and
|
||||
startline = 0 and
|
||||
startcolumn = 0 and
|
||||
endline = 0 and
|
||||
endcolumn = 0
|
||||
}
|
||||
|
||||
override string getAbsolutePath() { folders(this, result, _) }
|
||||
|
||||
/** Gets the URL of this folder. */
|
||||
override string getURL() { result = "folder://" + this.getAbsolutePath() }
|
||||
|
||||
override Container getImportRoot(int n) {
|
||||
this.isImportRoot(n) and result = this
|
||||
or
|
||||
/* Folder must be a legal Python identifier */
|
||||
this.getBaseName().regexpMatch("[^\\d\\W]\\w*") and
|
||||
result = this.getParent().getImportRoot(n)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A container is an abstract representation of a file system object that can
|
||||
* hold elements of interest.
|
||||
*/
|
||||
abstract class Container extends @container {
|
||||
Container getParent() { containerparent(result, this) }
|
||||
|
||||
/** Gets a child of this container */
|
||||
deprecated Container getChild() { containerparent(this, result) }
|
||||
|
||||
/**
|
||||
* Gets a textual representation of the path of this container.
|
||||
*
|
||||
* This is the absolute path of the container.
|
||||
*/
|
||||
string toString() { result = this.getAbsolutePath() }
|
||||
|
||||
/** Gets the name of this container */
|
||||
abstract string getName();
|
||||
|
||||
/**
|
||||
* Gets the relative path of this file or folder from the root folder of the
|
||||
* analyzed source location. The relative path of the root folder itself is
|
||||
* the empty string.
|
||||
*
|
||||
* This has no result if the container is outside the source root, that is,
|
||||
* if the root folder is not a reflexive, transitive parent of this container.
|
||||
*/
|
||||
string getRelativePath() {
|
||||
exists(string absPath, string pref |
|
||||
absPath = this.getAbsolutePath() and sourceLocationPrefix(pref)
|
||||
|
|
||||
absPath = pref and result = ""
|
||||
or
|
||||
absPath = pref.regexpReplaceAll("/$", "") + "/" + result and
|
||||
not result.matches("/%")
|
||||
)
|
||||
}
|
||||
|
||||
/** Whether this file or folder is part of the standard library */
|
||||
predicate inStdlib() { this.inStdlib(_, _) }
|
||||
|
||||
/**
|
||||
* Whether this file or folder is part of the standard library
|
||||
* for version `major.minor`
|
||||
*/
|
||||
predicate inStdlib(int major, int minor) {
|
||||
exists(Module m |
|
||||
m.getPath() = this and
|
||||
m.inStdLib(major, minor)
|
||||
)
|
||||
}
|
||||
|
||||
/* Standard cross-language API */
|
||||
/** Gets a file or sub-folder in this container. */
|
||||
Container getAChildContainer() { containerparent(this, result) }
|
||||
|
||||
/** Gets a file in this container. */
|
||||
File getAFile() { result = this.getAChildContainer() }
|
||||
|
||||
/** Gets a sub-folder in this container. */
|
||||
Folder getAFolder() { result = this.getAChildContainer() }
|
||||
|
||||
/**
|
||||
* Gets the absolute, canonical path of this container, using forward slashes
|
||||
* as path separator.
|
||||
*
|
||||
* The path starts with a _root prefix_ followed by zero or more _path
|
||||
* segments_ separated by forward slashes.
|
||||
*
|
||||
* The root prefix is of one of the following forms:
|
||||
*
|
||||
* 1. A single forward slash `/` (Unix-style)
|
||||
* 2. An upper-case drive letter followed by a colon and a forward slash,
|
||||
* such as `C:/` (Windows-style)
|
||||
* 3. Two forward slashes, a computer name, and then another forward slash,
|
||||
* such as `//FileServer/` (UNC-style)
|
||||
*
|
||||
* Path segments are never empty (that is, absolute paths never contain two
|
||||
* contiguous slashes, except as part of a UNC-style root prefix). Also, path
|
||||
* segments never contain forward slashes, and no path segment is of the
|
||||
* form `.` (one dot) or `..` (two dots).
|
||||
*
|
||||
* Note that an absolute path never ends with a forward slash, except if it is
|
||||
* a bare root prefix, that is, the path has no path segments. A container
|
||||
* whose absolute path has no segments is always a `Folder`, not a `File`.
|
||||
*/
|
||||
abstract string getAbsolutePath();
|
||||
|
||||
/**
|
||||
* Gets the base name of this container including extension, that is, the last
|
||||
* segment of its absolute path, or the empty string if it has no segments.
|
||||
*
|
||||
* Here are some examples of absolute paths and the corresponding base names
|
||||
* (surrounded with quotes to avoid ambiguity):
|
||||
*
|
||||
* <table border="1">
|
||||
* <tr><th>Absolute path</th><th>Base name</th></tr>
|
||||
* <tr><td>"/tmp/tst.py"</td><td>"tst.py"</td></tr>
|
||||
* <tr><td>"C:/Program Files (x86)"</td><td>"Program Files (x86)"</td></tr>
|
||||
* <tr><td>"/"</td><td>""</td></tr>
|
||||
* <tr><td>"C:/"</td><td>""</td></tr>
|
||||
* <tr><td>"D:/"</td><td>""</td></tr>
|
||||
* <tr><td>"//FileServer/"</td><td>""</td></tr>
|
||||
* </table>
|
||||
*/
|
||||
string getBaseName() {
|
||||
result = getAbsolutePath().regexpCapture(".*/(([^/]*?)(?:\\.([^.]*))?)", 1)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the extension of this container, that is, the suffix of its base name
|
||||
* after the last dot character, if any.
|
||||
*
|
||||
* In particular,
|
||||
*
|
||||
* - if the name does not include a dot, there is no extension, so this
|
||||
* predicate has no result;
|
||||
* - if the name ends in a dot, the extension is the empty string;
|
||||
* - if the name contains multiple dots, the extension follows the last dot.
|
||||
*
|
||||
* Here are some examples of absolute paths and the corresponding extensions
|
||||
* (surrounded with quotes to avoid ambiguity):
|
||||
*
|
||||
* <table border="1">
|
||||
* <tr><th>Absolute path</th><th>Extension</th></tr>
|
||||
* <tr><td>"/tmp/tst.py"</td><td>"py"</td></tr>
|
||||
* <tr><td>"/tmp/.gitignore"</td><td>"gitignore"</td></tr>
|
||||
* <tr><td>"/bin/bash"</td><td>not defined</td></tr>
|
||||
* <tr><td>"/tmp/tst2."</td><td>""</td></tr>
|
||||
* <tr><td>"/tmp/x.tar.gz"</td><td>"gz"</td></tr>
|
||||
* </table>
|
||||
*/
|
||||
string getExtension() { result = getAbsolutePath().regexpCapture(".*/([^/]*?)(\\.([^.]*))?", 3) }
|
||||
|
||||
/**
|
||||
* Gets the stem of this container, that is, the prefix of its base name up to
|
||||
* (but not including) the last dot character if there is one, or the entire
|
||||
* base name if there is not.
|
||||
*
|
||||
* Here are some examples of absolute paths and the corresponding stems
|
||||
* (surrounded with quotes to avoid ambiguity):
|
||||
*
|
||||
* <table border="1">
|
||||
* <tr><th>Absolute path</th><th>Stem</th></tr>
|
||||
* <tr><td>"/tmp/tst.py"</td><td>"tst"</td></tr>
|
||||
* <tr><td>"/tmp/.gitignore"</td><td>""</td></tr>
|
||||
* <tr><td>"/bin/bash"</td><td>"bash"</td></tr>
|
||||
* <tr><td>"/tmp/tst2."</td><td>"tst2"</td></tr>
|
||||
* <tr><td>"/tmp/x.tar.gz"</td><td>"x.tar"</td></tr>
|
||||
* </table>
|
||||
*/
|
||||
string getStem() { result = getAbsolutePath().regexpCapture(".*/([^/]*?)(?:\\.([^.]*))?", 1) }
|
||||
|
||||
File getFile(string baseName) {
|
||||
result = this.getAFile() and
|
||||
result.getBaseName() = baseName
|
||||
}
|
||||
|
||||
Folder getFolder(string baseName) {
|
||||
result = this.getAFolder() and
|
||||
result.getBaseName() = baseName
|
||||
}
|
||||
|
||||
Container getParentContainer() { this = result.getAChildContainer() }
|
||||
|
||||
Container getChildContainer(string baseName) {
|
||||
result = this.getAChildContainer() and
|
||||
result.getBaseName() = baseName
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a URL representing the location of this container.
|
||||
*
|
||||
* For more information see [Providing URLs](https://help.semmle.com/QL/learn-ql/ql/locations.html#providing-urls).
|
||||
*/
|
||||
abstract string getURL();
|
||||
|
||||
/** Holds if this folder is on the import path. */
|
||||
predicate isImportRoot() { this.isImportRoot(_) }
|
||||
|
||||
/**
|
||||
* Holds if this folder is on the import path, at index `n` in the list of
|
||||
* paths. The list of paths is composed of the paths passed to the extractor and
|
||||
* `sys.path`.
|
||||
*/
|
||||
predicate isImportRoot(int n) { this.getName() = import_path_element(n) }
|
||||
|
||||
/** Holds if this folder is the root folder for the standard library. */
|
||||
predicate isStdLibRoot(int major, int minor) {
|
||||
major = major_version() and
|
||||
minor = minor_version() and
|
||||
this.isStdLibRoot()
|
||||
}
|
||||
|
||||
/** Holds if this folder is the root folder for the standard library. */
|
||||
predicate isStdLibRoot() {
|
||||
/*
|
||||
* Look for a standard lib module and find its import path
|
||||
* We use `os` as it is the most likely to be imported and
|
||||
* `tty` because it is small for testing.
|
||||
*/
|
||||
|
||||
exists(Module m | m.getName() = "os" or m.getName() = "tty" |
|
||||
m.getFile().getImportRoot() = this
|
||||
)
|
||||
}
|
||||
|
||||
/** Gets the path element from which this container would be loaded. */
|
||||
Container getImportRoot() {
|
||||
exists(int n |
|
||||
result = this.getImportRoot(n) and
|
||||
not exists(int m |
|
||||
exists(this.getImportRoot(m)) and
|
||||
m < n
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
/** Gets the path element from which this container would be loaded, given the index into the list of possible paths `n`. */
|
||||
abstract Container getImportRoot(int n);
|
||||
}
|
||||
|
||||
private string import_path_element(int n) {
|
||||
exists(string path, string pathsep, int k |
|
||||
path = get_path("extractor.path") and k = 0
|
||||
or
|
||||
path = get_path("sys.path") and k = count(get_path("extractor.path").splitAt(pathsep))
|
||||
|
|
||||
py_flags_versioned("os.pathsep", pathsep, _) and
|
||||
result = path.splitAt(pathsep, n - k).replaceAll("\\", "/")
|
||||
)
|
||||
}
|
||||
|
||||
private string get_path(string name) { py_flags_versioned(name, result, _) }
|
||||
|
||||
class Location extends @location {
|
||||
/** Gets the file for this location */
|
||||
File getFile() { result = this.getPath() }
|
||||
|
||||
private Container getPath() {
|
||||
locations_default(this, result, _, _, _, _)
|
||||
or
|
||||
exists(Module m | locations_ast(this, m, _, _, _, _) | result = m.getPath())
|
||||
}
|
||||
|
||||
/** Gets the 1-based line number (inclusive) where this location starts. */
|
||||
int getStartLine() {
|
||||
locations_default(this, _, result, _, _, _) or
|
||||
locations_ast(this, _, result, _, _, _)
|
||||
}
|
||||
|
||||
/** Gets the 1-based column number (inclusive) where this location starts. */
|
||||
int getStartColumn() {
|
||||
locations_default(this, _, _, result, _, _) or
|
||||
locations_ast(this, _, _, result, _, _)
|
||||
}
|
||||
|
||||
/** Gets the 1-based line number (inclusive) where this location ends. */
|
||||
int getEndLine() {
|
||||
locations_default(this, _, _, _, result, _) or
|
||||
locations_ast(this, _, _, _, result, _)
|
||||
}
|
||||
|
||||
/** Gets the 1-based column number (inclusive) where this location ends. */
|
||||
int getEndColumn() {
|
||||
locations_default(this, _, _, _, _, result) or
|
||||
locations_ast(this, _, _, _, _, result)
|
||||
}
|
||||
|
||||
/** Gets a textual representation of this element. */
|
||||
string toString() {
|
||||
result = this.getPath().getAbsolutePath() + ":" + this.getStartLine().toString()
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if this element is at the specified location.
|
||||
* The location spans column `startcolumn` of line `startline` to
|
||||
* column `endcolumn` of line `endline` in file `filepath`.
|
||||
* For more information, see
|
||||
* [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html).
|
||||
*/
|
||||
predicate hasLocationInfo(
|
||||
string filepath, int startline, int startcolumn, int endline, int endcolumn
|
||||
) {
|
||||
exists(File f | f.getAbsolutePath() = filepath |
|
||||
locations_default(this, f, startline, startcolumn, endline, endcolumn)
|
||||
or
|
||||
exists(Module m | m.getFile() = f |
|
||||
locations_ast(this, m, startline, startcolumn, endline, endcolumn)
|
||||
)
|
||||
)
|
||||
or
|
||||
// Packages have no suitable filepath, so we use just the path instead.
|
||||
exists(Module m | not exists(m.getFile()) |
|
||||
filepath = m.getPath().getAbsolutePath() and
|
||||
locations_ast(this, m, startline, startcolumn, endline, endcolumn)
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/** A non-empty line in the source code */
|
||||
class Line extends @py_line {
|
||||
/**
|
||||
* Holds if this element is at the specified location.
|
||||
* The location spans column `startcolumn` of line `startline` to
|
||||
* column `endcolumn` of line `endline` in file `filepath`.
|
||||
* For more information, see
|
||||
* [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html).
|
||||
*/
|
||||
predicate hasLocationInfo(
|
||||
string filepath, int startline, int startcolumn, int endline, int endcolumn
|
||||
) {
|
||||
exists(Module m |
|
||||
m.getFile().getAbsolutePath() = filepath and
|
||||
endline = startline and
|
||||
startcolumn = 1 and
|
||||
py_line_lengths(this, m, startline, endcolumn)
|
||||
)
|
||||
}
|
||||
|
||||
/** Gets a textual representation of this element. */
|
||||
string toString() {
|
||||
exists(Module m | py_line_lengths(this, m, _, _) |
|
||||
result = m.getFile().getShortName() + ":" + this.getLineNumber().toString()
|
||||
)
|
||||
}
|
||||
|
||||
/** Gets the line number of this line */
|
||||
int getLineNumber() { py_line_lengths(this, _, result, _) }
|
||||
|
||||
/** Gets the length of this line */
|
||||
int getLength() { py_line_lengths(this, _, _, result) }
|
||||
|
||||
/** Gets the file for this line */
|
||||
Module getModule() { py_line_lengths(this, result, _, _) }
|
||||
}
|
||||
|
||||
/**
|
||||
* A syntax error. Note that if there is a syntax error in a module,
|
||||
* much information about that module will be lost
|
||||
*/
|
||||
class SyntaxError extends Location {
|
||||
SyntaxError() { py_syntax_error_versioned(this, _, major_version().toString()) }
|
||||
|
||||
override string toString() { result = "Syntax Error" }
|
||||
|
||||
/** Gets the message corresponding to this syntax error */
|
||||
string getMessage() { py_syntax_error_versioned(this, result, major_version().toString()) }
|
||||
}
|
||||
|
||||
/**
|
||||
* An encoding error. Note that if there is an encoding error in a module,
|
||||
* much information about that module will be lost
|
||||
*/
|
||||
class EncodingError extends SyntaxError {
|
||||
EncodingError() {
|
||||
/* Leave spaces around 'decode' in unlikely event it occurs as a name in a syntax error */
|
||||
this.getMessage().toLowerCase().matches("% decode %")
|
||||
}
|
||||
|
||||
override string toString() { result = "Encoding Error" }
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,33 +0,0 @@
|
||||
/**
|
||||
* Helper file that imports all framework modeling.
|
||||
*/
|
||||
|
||||
// If you add modeling of a new framework/library, remember to add it it to the docs in
|
||||
// `docs/codeql/support/reusables/frameworks.rst`
|
||||
private import semmle.python.frameworks.Aioch
|
||||
private import semmle.python.frameworks.Aiohttp
|
||||
private import semmle.python.frameworks.ClickhouseDriver
|
||||
private import semmle.python.frameworks.Cryptodome
|
||||
private import semmle.python.frameworks.Cryptography
|
||||
private import semmle.python.frameworks.Dill
|
||||
private import semmle.python.frameworks.Django
|
||||
private import semmle.python.frameworks.Fabric
|
||||
private import semmle.python.frameworks.Flask
|
||||
private import semmle.python.frameworks.Idna
|
||||
private import semmle.python.frameworks.Invoke
|
||||
private import semmle.python.frameworks.Jmespath
|
||||
private import semmle.python.frameworks.MarkupSafe
|
||||
private import semmle.python.frameworks.Multidict
|
||||
private import semmle.python.frameworks.Mysql
|
||||
private import semmle.python.frameworks.MySQLdb
|
||||
private import semmle.python.frameworks.Psycopg2
|
||||
private import semmle.python.frameworks.PyMySQL
|
||||
private import semmle.python.frameworks.Rsa
|
||||
private import semmle.python.frameworks.Simplejson
|
||||
private import semmle.python.frameworks.Stdlib
|
||||
private import semmle.python.frameworks.Tornado
|
||||
private import semmle.python.frameworks.Peewee
|
||||
private import semmle.python.frameworks.Twisted
|
||||
private import semmle.python.frameworks.Ujson
|
||||
private import semmle.python.frameworks.Yaml
|
||||
private import semmle.python.frameworks.Yarl
|
||||
@@ -1,380 +0,0 @@
|
||||
import python
|
||||
|
||||
/**
|
||||
* A function, independent of defaults and binding.
|
||||
* It is the syntactic entity that is compiled to a code object.
|
||||
*/
|
||||
class Function extends Function_, Scope, AstNode {
|
||||
/** The expression defining this function */
|
||||
CallableExpr getDefinition() { result = this.getParent() }
|
||||
|
||||
/**
|
||||
* The scope in which this function occurs, will be a class for a method,
|
||||
* another function for nested functions, generator expressions or comprehensions,
|
||||
* or a module for a plain function.
|
||||
*/
|
||||
override Scope getEnclosingScope() { result = this.getParent().(Expr).getScope() }
|
||||
|
||||
override Scope getScope() { result = this.getEnclosingScope() }
|
||||
|
||||
/** Whether this function is declared in a class */
|
||||
predicate isMethod() { exists(Class cls | this.getEnclosingScope() = cls) }
|
||||
|
||||
/** Whether this is a special method, that is does its name have the form `__xxx__` (except `__init__`) */
|
||||
predicate isSpecialMethod() {
|
||||
this.isMethod() and
|
||||
exists(string name | this.getName() = name |
|
||||
name.matches("\\_\\_%\\_\\_") and
|
||||
name != "__init__"
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Whether this function is a generator function,
|
||||
* that is whether it contains a yield or yield-from expression
|
||||
*/
|
||||
predicate isGenerator() {
|
||||
exists(Yield y | y.getScope() = this)
|
||||
or
|
||||
exists(YieldFrom y | y.getScope() = this)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if this function represents a lambda.
|
||||
*
|
||||
* The extractor reifies each lambda expression as a (local) function with the name
|
||||
* "lambda". As `lambda` is a keyword in Python, it's impossible to create a function with this
|
||||
* name otherwise, and so it's impossible to get a non-lambda function accidentally
|
||||
* classified as a lambda.
|
||||
*/
|
||||
predicate isLambda() { this.getName() = "lambda" }
|
||||
|
||||
/** Whether this function is declared in a class and is named `__init__` */
|
||||
predicate isInitMethod() { this.isMethod() and this.getName() = "__init__" }
|
||||
|
||||
/** Gets a decorator of this function */
|
||||
Expr getADecorator() { result = this.getDefinition().(FunctionExpr).getADecorator() }
|
||||
|
||||
/** Gets the name of the nth argument (for simple arguments) */
|
||||
string getArgName(int index) { result = this.getArg(index).(Name).getId() }
|
||||
|
||||
Parameter getArgByName(string name) {
|
||||
(
|
||||
result = this.getAnArg()
|
||||
or
|
||||
result = this.getAKeywordOnlyArg()
|
||||
) and
|
||||
result.(Name).getId() = name
|
||||
}
|
||||
|
||||
override Location getLocation() { py_scope_location(result, this) }
|
||||
|
||||
override string toString() { result = "Function " + this.getName() }
|
||||
|
||||
/** Gets the statements forming the body of this function */
|
||||
override StmtList getBody() { result = Function_.super.getBody() }
|
||||
|
||||
/** Gets the nth statement in the function */
|
||||
override Stmt getStmt(int index) { result = Function_.super.getStmt(index) }
|
||||
|
||||
/** Gets a statement in the function */
|
||||
override Stmt getAStmt() { result = Function_.super.getAStmt() }
|
||||
|
||||
/** Gets the name used to define this function */
|
||||
override string getName() { result = Function_.super.getName() }
|
||||
|
||||
/** Gets the metrics for this function */
|
||||
FunctionMetrics getMetrics() { result = this }
|
||||
|
||||
/** Gets the FunctionObject corresponding to this function */
|
||||
FunctionObject getFunctionObject() { result.getOrigin() = this.getDefinition() }
|
||||
|
||||
/**
|
||||
* Whether this function is a procedure, that is, it has no explicit return statement and always returns None.
|
||||
* Note that generator and async functions are not procedures as they return generators and coroutines respectively.
|
||||
*/
|
||||
predicate isProcedure() {
|
||||
not exists(this.getReturnNode()) and
|
||||
exists(this.getFallthroughNode()) and
|
||||
not this.isGenerator() and
|
||||
not this.isAsync()
|
||||
}
|
||||
|
||||
/** Gets the number of positional parameters */
|
||||
int getPositionalParameterCount() { result = count(this.getAnArg()) }
|
||||
|
||||
/** Gets the number of keyword-only parameters */
|
||||
int getKeywordOnlyParameterCount() { result = count(this.getAKeywordOnlyArg()) }
|
||||
|
||||
/** Whether this function accepts a variable number of arguments. That is, whether it has a starred (*arg) parameter. */
|
||||
predicate hasVarArg() { exists(this.getVararg()) }
|
||||
|
||||
/** Whether this function accepts arbitrary keyword arguments. That is, whether it has a double-starred (**kwarg) parameter. */
|
||||
predicate hasKwArg() { exists(this.getKwarg()) }
|
||||
|
||||
override AstNode getAChildNode() {
|
||||
result = this.getAStmt() or
|
||||
result = this.getAnArg() or
|
||||
result = this.getVararg() or
|
||||
result = this.getAKeywordOnlyArg() or
|
||||
result = this.getKwarg()
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the qualified name for this function.
|
||||
* Should return the same name as the `__qualname__` attribute on functions in Python 3.
|
||||
*/
|
||||
string getQualifiedName() {
|
||||
this.getEnclosingScope() instanceof Module and result = this.getName()
|
||||
or
|
||||
exists(string enclosing_name |
|
||||
enclosing_name = this.getEnclosingScope().(Function).getQualifiedName()
|
||||
or
|
||||
enclosing_name = this.getEnclosingScope().(Class).getQualifiedName()
|
||||
|
|
||||
result = enclosing_name + "." + this.getName()
|
||||
)
|
||||
}
|
||||
|
||||
/** Gets the nth keyword-only parameter of this function. */
|
||||
Name getKeywordOnlyArg(int n) { result = Function_.super.getKwonlyarg(n) }
|
||||
|
||||
/** Gets a keyword-only parameter of this function. */
|
||||
Name getAKeywordOnlyArg() { result = this.getKeywordOnlyArg(_) }
|
||||
|
||||
override Scope getEvaluatingScope() {
|
||||
major_version() = 2 and
|
||||
exists(Comp comp | comp.getFunction() = this | result = comp.getEvaluatingScope())
|
||||
or
|
||||
not exists(Comp comp | comp.getFunction() = this) and result = this
|
||||
or
|
||||
major_version() = 3 and result = this
|
||||
}
|
||||
|
||||
override predicate containsInScope(AstNode inner) { Scope.super.containsInScope(inner) }
|
||||
|
||||
override predicate contains(AstNode inner) { Scope.super.contains(inner) }
|
||||
|
||||
/** Gets a control flow node for a return value of this function */
|
||||
ControlFlowNode getAReturnValueFlowNode() {
|
||||
exists(Return ret |
|
||||
ret.getScope() = this and
|
||||
ret.getValue() = result.getNode()
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/** A def statement. Note that FunctionDef extends Assign as a function definition binds the newly created function */
|
||||
class FunctionDef extends Assign {
|
||||
/* syntax: def name(...): ... */
|
||||
FunctionDef() {
|
||||
/* This is an artificial assignment the rhs of which is a (possibly decorated) FunctionExpr */
|
||||
exists(FunctionExpr f | this.getValue() = f or this.getValue() = f.getADecoratorCall())
|
||||
}
|
||||
|
||||
override string toString() { result = "FunctionDef" }
|
||||
|
||||
/** Gets the function for this statement */
|
||||
Function getDefinedFunction() {
|
||||
exists(FunctionExpr func | this.containsInScope(func) and result = func.getInnerScope())
|
||||
}
|
||||
|
||||
override Stmt getLastStatement() { result = this.getDefinedFunction().getLastStatement() }
|
||||
}
|
||||
|
||||
class FastLocalsFunction extends Function {
|
||||
/** A function that uses 'fast' locals, stored in the frame not in a dictionary. */
|
||||
FastLocalsFunction() {
|
||||
not exists(ImportStar i | i.getScope() = this) and
|
||||
not exists(Exec e | e.getScope() = this)
|
||||
}
|
||||
}
|
||||
|
||||
/** A parameter. Either a Tuple or a Name (always a Name for Python 3) */
|
||||
class Parameter extends Parameter_ {
|
||||
Parameter() {
|
||||
/* Parameter_ is just defined as a Name or Tuple, narrow to actual parameters */
|
||||
exists(ParameterList pl | py_exprs(this, _, pl, _))
|
||||
or
|
||||
exists(Function f |
|
||||
f.getVararg() = this
|
||||
or
|
||||
f.getKwarg() = this
|
||||
or
|
||||
f.getAKeywordOnlyArg() = this
|
||||
)
|
||||
}
|
||||
|
||||
Location getLocation() {
|
||||
result = this.asName().getLocation()
|
||||
or
|
||||
result = this.asTuple().getLocation()
|
||||
}
|
||||
|
||||
/** Gets this parameter if it is a Name (not a Tuple) */
|
||||
Name asName() { result = this }
|
||||
|
||||
/** Gets this parameter if it is a Tuple (not a Name) */
|
||||
Tuple asTuple() { result = this }
|
||||
|
||||
/** Gets the expression for the default value of this parameter */
|
||||
Expr getDefault() {
|
||||
exists(Function f, int i, Arguments args | args = f.getDefinition().getArgs() |
|
||||
// positional (normal)
|
||||
f.getArg(i) = this and
|
||||
result = args.getDefault(i)
|
||||
)
|
||||
or
|
||||
exists(Function f, int i, Arguments args | args = f.getDefinition().getArgs() |
|
||||
// keyword-only
|
||||
f.getKeywordOnlyArg(i) = this and
|
||||
result = args.getKwDefault(i)
|
||||
)
|
||||
}
|
||||
|
||||
/** Gets the annotation expression of this parameter */
|
||||
Expr getAnnotation() {
|
||||
exists(Function f, int i, Arguments args | args = f.getDefinition().getArgs() |
|
||||
// positional (normal)
|
||||
f.getArg(i) = this and
|
||||
result = args.getAnnotation(i)
|
||||
)
|
||||
or
|
||||
exists(Function f, int i, Arguments args | args = f.getDefinition().getArgs() |
|
||||
// keyword-only
|
||||
f.getKeywordOnlyArg(i) = this and
|
||||
result = args.getKwAnnotation(i)
|
||||
)
|
||||
or
|
||||
exists(Function f, Arguments args | args = f.getDefinition().getArgs() |
|
||||
f.getKwarg() = this and
|
||||
result = args.getKwargannotation()
|
||||
or
|
||||
f.getVararg() = this and
|
||||
result = args.getVarargannotation()
|
||||
)
|
||||
}
|
||||
|
||||
Variable getVariable() { result.getAnAccess() = this.asName() }
|
||||
|
||||
/**
|
||||
* Gets the position of this parameter (if any).
|
||||
* No result if this is a "varargs", "kwargs", or keyword-only parameter.
|
||||
*/
|
||||
int getPosition() { exists(Function f | f.getArg(result) = this) }
|
||||
|
||||
/** Gets the name of this parameter */
|
||||
string getName() { result = this.asName().getId() }
|
||||
|
||||
/** Holds if this parameter is the first parameter of a method. It is not necessarily called "self" */
|
||||
predicate isSelf() {
|
||||
exists(Function f |
|
||||
f.getArg(0) = this and
|
||||
f.isMethod()
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if this parameter is a "varargs" parameter.
|
||||
* The `varargs` in `f(a, b, *varargs)`.
|
||||
*/
|
||||
predicate isVarargs() { exists(Function func | func.getVararg() = this) }
|
||||
|
||||
/**
|
||||
* Holds if this parameter is a "kwargs" parameter.
|
||||
* The `kwargs` in `f(a, b, **kwargs)`.
|
||||
*/
|
||||
predicate isKwargs() { exists(Function func | func.getKwarg() = this) }
|
||||
}
|
||||
|
||||
/** An expression that generates a callable object, either a function expression or a lambda */
|
||||
abstract class CallableExpr extends Expr {
|
||||
/**
|
||||
* Gets The default values and annotations (type-hints) for the arguments of this callable.
|
||||
*
|
||||
* This predicate is called getArgs(), rather than getParameters() for compatibility with Python's AST module.
|
||||
*/
|
||||
abstract Arguments getArgs();
|
||||
|
||||
/** Gets the function scope of this code expression. */
|
||||
abstract Function getInnerScope();
|
||||
}
|
||||
|
||||
/** An (artificial) expression corresponding to a function definition. */
|
||||
class FunctionExpr extends FunctionExpr_, CallableExpr {
|
||||
override Expr getASubExpression() {
|
||||
result = this.getArgs().getASubExpression() or
|
||||
result = this.getReturns()
|
||||
}
|
||||
|
||||
override predicate hasSideEffects() { any() }
|
||||
|
||||
Call getADecoratorCall() {
|
||||
result.getArg(0) = this or
|
||||
result.getArg(0) = this.getADecoratorCall()
|
||||
}
|
||||
|
||||
/** Gets a decorator of this function expression */
|
||||
Expr getADecorator() { result = this.getADecoratorCall().getFunc() }
|
||||
|
||||
override AstNode getAChildNode() {
|
||||
result = this.getASubExpression()
|
||||
or
|
||||
result = this.getInnerScope()
|
||||
}
|
||||
|
||||
override Function getInnerScope() { result = FunctionExpr_.super.getInnerScope() }
|
||||
|
||||
override Arguments getArgs() { result = FunctionExpr_.super.getArgs() }
|
||||
}
|
||||
|
||||
/** A lambda expression, such as `lambda x: x+1` */
|
||||
class Lambda extends Lambda_, CallableExpr {
|
||||
/** Gets the expression to the right of the colon in this lambda expression */
|
||||
Expr getExpression() {
|
||||
exists(Return ret | ret = this.getInnerScope().getStmt(0) | result = ret.getValue())
|
||||
}
|
||||
|
||||
override Expr getASubExpression() { result = this.getArgs().getASubExpression() }
|
||||
|
||||
override AstNode getAChildNode() {
|
||||
result = this.getASubExpression() or
|
||||
result = this.getInnerScope()
|
||||
}
|
||||
|
||||
override Function getInnerScope() { result = Lambda_.super.getInnerScope() }
|
||||
|
||||
override Arguments getArgs() { result = Lambda_.super.getArgs() }
|
||||
}
|
||||
|
||||
/**
|
||||
* The default values and annotations (type hints) for the arguments in a function definition.
|
||||
*
|
||||
* Annotations (PEP 3107) is a general mechanism for providing annotations for a function,
|
||||
* that is generally only used for type hints today (PEP 484).
|
||||
*/
|
||||
class Arguments extends Arguments_ {
|
||||
Expr getASubExpression() {
|
||||
result = this.getADefault() or
|
||||
result = this.getAKwDefault() or
|
||||
//
|
||||
result = this.getAnAnnotation() or
|
||||
result = this.getVarargannotation() or
|
||||
result = this.getAKwAnnotation() or
|
||||
result = this.getKwargannotation()
|
||||
}
|
||||
|
||||
// The following 4 methods are overwritten to provide better QLdoc. Since the
|
||||
// Arguments_ is auto-generated, we can't change the poor auto-generated docs there :(
|
||||
/** Gets the default value for the `index`'th positional parameter. */
|
||||
override Expr getDefault(int index) { result = super.getDefault(index) }
|
||||
|
||||
/** Gets the default value for the `index`'th keyword-only parameter. */
|
||||
override Expr getKwDefault(int index) { result = super.getKwDefault(index) }
|
||||
|
||||
/** Gets the annotation for the `index`'th positional parameter. */
|
||||
override Expr getAnnotation(int index) { result = super.getAnnotation(index) }
|
||||
|
||||
/** Gets the annotation for the `index`'th keyword-only parameter. */
|
||||
override Expr getKwAnnotation(int index) { result = super.getKwAnnotation(index) }
|
||||
}
|
||||
@@ -1,68 +0,0 @@
|
||||
import python
|
||||
|
||||
/** A basic block which terminates in a condition, splitting the subsequent control flow */
|
||||
class ConditionBlock extends BasicBlock {
|
||||
ConditionBlock() {
|
||||
exists(ControlFlowNode succ |
|
||||
succ = this.getATrueSuccessor() or succ = this.getAFalseSuccessor()
|
||||
)
|
||||
}
|
||||
|
||||
/** Basic blocks controlled by this condition, i.e. those BBs for which the condition is testIsTrue */
|
||||
predicate controls(BasicBlock controlled, boolean testIsTrue) {
|
||||
/*
|
||||
* For this block to control the block 'controlled' with 'testIsTrue' the following must be true:
|
||||
* Execution must have passed through the test i.e. 'this' must strictly dominate 'controlled'.
|
||||
* Execution must have passed through the 'testIsTrue' edge leaving 'this'.
|
||||
*
|
||||
* Although "passed through the true edge" implies that this.getATrueSuccessor() dominates 'controlled',
|
||||
* the reverse is not true, as flow may have passed through another edge to get to this.getATrueSuccessor()
|
||||
* so we need to assert that this.getATrueSuccessor() dominates 'controlled' *and* that
|
||||
* all predecessors of this.getATrueSuccessor() are either this or dominated by this.getATrueSuccessor().
|
||||
*
|
||||
* For example, in the following python snippet:
|
||||
* <code>
|
||||
* if x:
|
||||
* controlled
|
||||
* false_successor
|
||||
* uncontrolled
|
||||
* </code>
|
||||
* false_successor dominates uncontrolled, but not all of its predecessors are this (if x)
|
||||
* or dominated by itself. Whereas in the following code:
|
||||
* <code>
|
||||
* if x:
|
||||
* while controlled:
|
||||
* also_controlled
|
||||
* false_successor
|
||||
* uncontrolled
|
||||
* </code>
|
||||
* the block 'while controlled' is controlled because all of its predecessors are this (if x)
|
||||
* or (in the case of 'also_controlled') dominated by itself.
|
||||
*
|
||||
* The additional constraint on the predecessors of the test successor implies
|
||||
* that `this` strictly dominates `controlled` so that isn't necessary to check
|
||||
* directly.
|
||||
*/
|
||||
|
||||
exists(BasicBlock succ |
|
||||
testIsTrue = true and succ = this.getATrueSuccessor()
|
||||
or
|
||||
testIsTrue = false and succ = this.getAFalseSuccessor()
|
||||
|
|
||||
succ.dominates(controlled) and
|
||||
forall(BasicBlock pred | pred.getASuccessor() = succ | pred = this or succ.dominates(pred))
|
||||
)
|
||||
}
|
||||
|
||||
/** Holds if this condition controls the edge `pred->succ`, i.e. those edges for which the condition is `testIsTrue`. */
|
||||
predicate controlsEdge(BasicBlock pred, BasicBlock succ, boolean testIsTrue) {
|
||||
this.controls(pred, testIsTrue) and succ = pred.getASuccessor()
|
||||
or
|
||||
pred = this and
|
||||
(
|
||||
testIsTrue = true and succ = this.getATrueSuccessor()
|
||||
or
|
||||
testIsTrue = false and succ = this.getAFalseSuccessor()
|
||||
)
|
||||
}
|
||||
}
|
||||
@@ -1,251 +0,0 @@
|
||||
import python
|
||||
private import semmle.python.types.Builtins
|
||||
|
||||
/**
|
||||
* An alias in an import statement, the `mod as name` part of `import mod as name`. May be artificial;
|
||||
* `import x` is transformed into `import x as x`
|
||||
*/
|
||||
class Alias extends Alias_ {
|
||||
Location getLocation() { result = this.getValue().getLocation() }
|
||||
}
|
||||
|
||||
private predicate valid_module_name(string name) {
|
||||
exists(Module m | m.getName() = name)
|
||||
or
|
||||
exists(Builtin cmod | cmod.getClass() = Builtin::special("ModuleType") and cmod.getName() = name)
|
||||
}
|
||||
|
||||
/** An artificial expression representing an import */
|
||||
class ImportExpr extends ImportExpr_ {
|
||||
private string basePackageName(int n) {
|
||||
n = 1 and result = this.getEnclosingModule().getPackageName()
|
||||
or
|
||||
exists(string bpnm1 |
|
||||
bpnm1 = this.basePackageName(n - 1) and
|
||||
bpnm1.matches("%.%") and
|
||||
result = bpnm1.regexpReplaceAll("\\.[^.]*$", "")
|
||||
)
|
||||
}
|
||||
|
||||
private predicate implicitRelativeImportsAllowed() {
|
||||
// relative imports are no longer allowed in Python 3
|
||||
major_version() < 3 and
|
||||
// and can be explicitly turned off in later versions of Python 2
|
||||
not getEnclosingModule().hasFromFuture("absolute_import")
|
||||
}
|
||||
|
||||
/**
|
||||
* The language specifies level as -1 if relative imports are to be tried first, 0 for absolute imports,
|
||||
* and level > 0 for explicit relative imports.
|
||||
*/
|
||||
override int getLevel() {
|
||||
exists(int l | l = super.getLevel() |
|
||||
l > 0 and result = l
|
||||
or
|
||||
/* The extractor may set level to 0 even though relative imports apply */
|
||||
l = 0 and
|
||||
(if this.implicitRelativeImportsAllowed() then result = -1 else result = 0)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* If this import is relative, and relative imports are allowed, compute
|
||||
* the name of the topmost module that will be imported.
|
||||
*/
|
||||
private string relativeTopName() {
|
||||
getLevel() = -1 and
|
||||
result = basePackageName(1) + "." + this.getTopName() and
|
||||
valid_module_name(result)
|
||||
}
|
||||
|
||||
private string qualifiedTopName() {
|
||||
if this.getLevel() <= 0
|
||||
then result = this.getTopName()
|
||||
else (
|
||||
result = basePackageName(this.getLevel()) and
|
||||
valid_module_name(result)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the name by which the lowest level module or package is imported.
|
||||
* NOTE: This is the name that used to import the module,
|
||||
* which may not be the name of the module.
|
||||
*/
|
||||
string bottomModuleName() {
|
||||
result = relativeTopName() + this.remainderOfName()
|
||||
or
|
||||
not exists(relativeTopName()) and
|
||||
result = this.qualifiedTopName() + this.remainderOfName()
|
||||
}
|
||||
|
||||
/** Gets the name of topmost module or package being imported */
|
||||
string topModuleName() {
|
||||
result = relativeTopName()
|
||||
or
|
||||
not exists(relativeTopName()) and
|
||||
result = this.qualifiedTopName()
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the full name of the module resulting from evaluating this import.
|
||||
* NOTE: This is the name that used to import the module,
|
||||
* which may not be the name of the module.
|
||||
*/
|
||||
string getImportedModuleName() {
|
||||
exists(string bottomName | bottomName = this.bottomModuleName() |
|
||||
if this.isTop() then result = topModuleName() else result = bottomName
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the names of the modules that may be imported by this import.
|
||||
* For example this predicate would return 'x' and 'x.y' for `import x.y`
|
||||
*/
|
||||
string getAnImportedModuleName() {
|
||||
result = this.bottomModuleName()
|
||||
or
|
||||
result = this.getAnImportedModuleName().regexpReplaceAll("\\.[^.]*$", "")
|
||||
}
|
||||
|
||||
override Expr getASubExpression() { none() }
|
||||
|
||||
override predicate hasSideEffects() { any() }
|
||||
|
||||
private string getTopName() { result = this.getName().regexpReplaceAll("\\..*", "") }
|
||||
|
||||
private string remainderOfName() {
|
||||
not exists(this.getName()) and result = ""
|
||||
or
|
||||
this.getLevel() <= 0 and result = this.getName().regexpReplaceAll("^[^\\.]*", "")
|
||||
or
|
||||
this.getLevel() > 0 and result = "." + this.getName()
|
||||
}
|
||||
|
||||
/**
|
||||
* Whether this import is relative, that is not absolute.
|
||||
* See https://www.python.org/dev/peps/pep-0328/
|
||||
*/
|
||||
predicate isRelative() {
|
||||
/* Implicit */
|
||||
exists(this.relativeTopName())
|
||||
or
|
||||
/* Explicit */
|
||||
this.getLevel() > 0
|
||||
}
|
||||
}
|
||||
|
||||
/** A `from ... import ...` expression */
|
||||
class ImportMember extends ImportMember_ {
|
||||
override Expr getASubExpression() { result = this.getModule() }
|
||||
|
||||
override predicate hasSideEffects() {
|
||||
/* Strictly this only has side-effects if the module is a package */
|
||||
any()
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the full name of the module resulting from evaluating this import.
|
||||
* NOTE: This is the name that used to import the module,
|
||||
* which may not be the name of the module.
|
||||
*/
|
||||
string getImportedModuleName() {
|
||||
result = this.getModule().(ImportExpr).getImportedModuleName() + "." + this.getName()
|
||||
}
|
||||
|
||||
override ImportMemberNode getAFlowNode() { result = super.getAFlowNode() }
|
||||
}
|
||||
|
||||
/** An import statement */
|
||||
class Import extends Import_ {
|
||||
/* syntax: import modname */
|
||||
private ImportExpr getAModuleExpr() {
|
||||
result = this.getAName().getValue()
|
||||
or
|
||||
result = this.getAName().getValue().(ImportMember).getModule()
|
||||
}
|
||||
|
||||
/**
|
||||
* Use getAnImportedModuleName(),
|
||||
* possibly combined with ModuleObject.importedAs()
|
||||
* Gets a module imported by this import statement
|
||||
*/
|
||||
deprecated Module getAModule() { result.getName() = this.getAnImportedModuleName() }
|
||||
|
||||
/** Whether this a `from ... import ...` statement */
|
||||
predicate isFromImport() { this.getAName().getValue() instanceof ImportMember }
|
||||
|
||||
override Expr getASubExpression() {
|
||||
result = this.getAModuleExpr() or
|
||||
result = this.getAName().getAsname() or
|
||||
result = this.getAName().getValue()
|
||||
}
|
||||
|
||||
override Stmt getASubStatement() { none() }
|
||||
|
||||
/**
|
||||
* Gets the name of an imported module.
|
||||
* For example, for the import statement `import bar` which
|
||||
* is a relative import in package "foo", this would return
|
||||
* "foo.bar".
|
||||
* The import statment `from foo import bar` would return
|
||||
* `foo` and `foo.bar`
|
||||
*/
|
||||
string getAnImportedModuleName() {
|
||||
result = this.getAModuleExpr().getAnImportedModuleName()
|
||||
or
|
||||
exists(ImportMember m, string modname |
|
||||
m = this.getAName().getValue() and
|
||||
modname = m.getModule().(ImportExpr).getImportedModuleName()
|
||||
|
|
||||
result = modname
|
||||
or
|
||||
result = modname + "." + m.getName()
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/** An import * statement */
|
||||
class ImportStar extends ImportStar_ {
|
||||
/* syntax: from modname import * */
|
||||
ImportExpr getModuleExpr() {
|
||||
result = this.getModule()
|
||||
or
|
||||
result = this.getModule().(ImportMember).getModule()
|
||||
}
|
||||
|
||||
override string toString() { result = "from " + this.getModuleExpr().getName() + " import *" }
|
||||
|
||||
/**
|
||||
* Use getAnImportedModuleName(),
|
||||
* possibly combined with ModuleObject.importedAs()
|
||||
* Gets the module imported by this import * statement
|
||||
*/
|
||||
deprecated Module getTheModule() { result.getName() = this.getImportedModuleName() }
|
||||
|
||||
override Expr getASubExpression() { result = this.getModule() }
|
||||
|
||||
override Stmt getASubStatement() { none() }
|
||||
|
||||
/** Gets the name of the imported module. */
|
||||
string getImportedModuleName() { result = this.getModuleExpr().getImportedModuleName() }
|
||||
}
|
||||
|
||||
/**
|
||||
* A statement that imports a module. This can be any statement that includes the `import` keyword,
|
||||
* such as `import sys`, `from sys import version` or `from sys import *`.
|
||||
*/
|
||||
class ImportingStmt extends Stmt {
|
||||
ImportingStmt() {
|
||||
this instanceof Import
|
||||
or
|
||||
this instanceof ImportStar
|
||||
}
|
||||
|
||||
/** Gets the name of an imported module. */
|
||||
string getAnImportedModuleName() {
|
||||
result = this.(Import).getAnImportedModuleName()
|
||||
or
|
||||
result = this.(ImportStar).getImportedModuleName()
|
||||
}
|
||||
}
|
||||
@@ -1,60 +0,0 @@
|
||||
import python
|
||||
|
||||
class KeyValuePair extends KeyValuePair_, DictDisplayItem {
|
||||
/* syntax: Expr : Expr */
|
||||
override Location getLocation() { result = KeyValuePair_.super.getLocation() }
|
||||
|
||||
override string toString() { result = KeyValuePair_.super.toString() }
|
||||
|
||||
/** Gets the value of this dictionary unpacking. */
|
||||
override Expr getValue() { result = KeyValuePair_.super.getValue() }
|
||||
|
||||
override Scope getScope() { result = this.getValue().getScope() }
|
||||
|
||||
override AstNode getAChildNode() {
|
||||
result = this.getKey()
|
||||
or
|
||||
result = this.getValue()
|
||||
}
|
||||
}
|
||||
|
||||
/** A double-starred expression in a call or dict literal. */
|
||||
class DictUnpacking extends DictUnpacking_, DictUnpackingOrKeyword, DictDisplayItem {
|
||||
override Location getLocation() { result = DictUnpacking_.super.getLocation() }
|
||||
|
||||
override string toString() { result = DictUnpacking_.super.toString() }
|
||||
|
||||
/** Gets the value of this dictionary unpacking. */
|
||||
override Expr getValue() { result = DictUnpacking_.super.getValue() }
|
||||
|
||||
override Scope getScope() { result = this.getValue().getScope() }
|
||||
|
||||
override AstNode getAChildNode() { result = this.getValue() }
|
||||
}
|
||||
|
||||
abstract class DictUnpackingOrKeyword extends DictItem {
|
||||
abstract Expr getValue();
|
||||
|
||||
override string toString() { result = "DictUnpackingOrKeyword with missing toString" }
|
||||
}
|
||||
|
||||
abstract class DictDisplayItem extends DictItem {
|
||||
abstract Expr getValue();
|
||||
|
||||
override string toString() { result = "DictDisplayItem with missing toString" }
|
||||
}
|
||||
|
||||
/** A keyword argument in a call. For example `arg=expr` in `foo(0, arg=expr)` */
|
||||
class Keyword extends Keyword_, DictUnpackingOrKeyword {
|
||||
/* syntax: name = Expr */
|
||||
override Location getLocation() { result = Keyword_.super.getLocation() }
|
||||
|
||||
override string toString() { result = Keyword_.super.toString() }
|
||||
|
||||
/** Gets the value of this keyword argument. */
|
||||
override Expr getValue() { result = Keyword_.super.getValue() }
|
||||
|
||||
override Scope getScope() { result = this.getValue().getScope() }
|
||||
|
||||
override AstNode getAChildNode() { result = this.getValue() }
|
||||
}
|
||||
@@ -1,333 +0,0 @@
|
||||
import python
|
||||
|
||||
/** The metrics for a function */
|
||||
class FunctionMetrics extends Function {
|
||||
/**
|
||||
* Gets the total number of lines (including blank lines)
|
||||
* from the definition to the end of the function
|
||||
*/
|
||||
int getNumberOfLines() { py_alllines(this, result) }
|
||||
|
||||
/** Gets the number of lines of code in the function */
|
||||
int getNumberOfLinesOfCode() { py_codelines(this, result) }
|
||||
|
||||
/** Gets the number of lines of comments in the function */
|
||||
int getNumberOfLinesOfComments() { py_commentlines(this, result) }
|
||||
|
||||
/** Gets the number of lines of docstring in the function */
|
||||
int getNumberOfLinesOfDocStrings() { py_docstringlines(this, result) }
|
||||
|
||||
/**
|
||||
* Cyclomatic complexity:
|
||||
* The number of linearly independent paths through the source code.
|
||||
* Computed as E - N + 2P,
|
||||
* where
|
||||
* E = the number of edges of the graph.
|
||||
* N = the number of nodes of the graph.
|
||||
* P = the number of connected components, which for a single function is 1.
|
||||
*/
|
||||
int getCyclomaticComplexity() {
|
||||
exists(int E, int N |
|
||||
N = count(BasicBlock b | b = this.getABasicBlock() and b.likelyReachable()) and
|
||||
E =
|
||||
count(BasicBlock b1, BasicBlock b2 |
|
||||
b1 = this.getABasicBlock() and
|
||||
b1.likelyReachable() and
|
||||
b2 = this.getABasicBlock() and
|
||||
b2.likelyReachable() and
|
||||
b2 = b1.getASuccessor() and
|
||||
not b1.unlikelySuccessor(b2)
|
||||
)
|
||||
|
|
||||
result = E - N + 2
|
||||
)
|
||||
}
|
||||
|
||||
private BasicBlock getABasicBlock() {
|
||||
result = this.getEntryNode().getBasicBlock()
|
||||
or
|
||||
exists(BasicBlock mid | mid = this.getABasicBlock() and result = mid.getASuccessor())
|
||||
}
|
||||
|
||||
/**
|
||||
* Dependency of Callables
|
||||
* One callable "this" depends on another callable "result"
|
||||
* if "this" makes some call to a method that may end up being "result".
|
||||
*/
|
||||
FunctionMetrics getADependency() {
|
||||
result != this and
|
||||
not non_coupling_method(result) and
|
||||
exists(Call call | call.getScope() = this |
|
||||
exists(FunctionObject callee | callee.getFunction() = result |
|
||||
call.getAFlowNode().getFunction().refersTo(callee)
|
||||
)
|
||||
or
|
||||
exists(Attribute a | call.getFunc() = a |
|
||||
unique_root_method(result, a.getName())
|
||||
or
|
||||
exists(Name n | a.getObject() = n and n.getId() = "self" |
|
||||
result.getScope() = this.getScope() and
|
||||
result.getName() = a.getName()
|
||||
)
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Afferent Coupling
|
||||
* the number of callables that depend on this method.
|
||||
* This is sometimes called the "fan-in" of a method.
|
||||
*/
|
||||
int getAfferentCoupling() { result = count(FunctionMetrics m | m.getADependency() = this) }
|
||||
|
||||
/**
|
||||
* Efferent Coupling
|
||||
* the number of methods that this method depends on
|
||||
* This is sometimes called the "fan-out" of a method.
|
||||
*/
|
||||
int getEfferentCoupling() { result = count(FunctionMetrics m | this.getADependency() = m) }
|
||||
|
||||
int getNumberOfParametersWithoutDefault() {
|
||||
result =
|
||||
this.getPositionalParameterCount() -
|
||||
count(this.getDefinition().(FunctionExpr).getArgs().getADefault())
|
||||
}
|
||||
|
||||
int getStatementNestingDepth() { result = max(Stmt s | s.getScope() = this | getNestingDepth(s)) }
|
||||
|
||||
int getNumberOfCalls() { result = count(Call c | c.getScope() = this) }
|
||||
}
|
||||
|
||||
/** The metrics for a class */
|
||||
class ClassMetrics extends Class {
|
||||
/**
|
||||
* Gets the total number of lines (including blank lines)
|
||||
* from the definition to the end of the class
|
||||
*/
|
||||
int getNumberOfLines() { py_alllines(this, result) }
|
||||
|
||||
/** Gets the number of lines of code in the class */
|
||||
int getNumberOfLinesOfCode() { py_codelines(this, result) }
|
||||
|
||||
/** Gets the number of lines of comments in the class */
|
||||
int getNumberOfLinesOfComments() { py_commentlines(this, result) }
|
||||
|
||||
/** Gets the number of lines of docstrings in the class */
|
||||
int getNumberOfLinesOfDocStrings() { py_docstringlines(this, result) }
|
||||
|
||||
private predicate dependsOn(Class other) {
|
||||
other != this and
|
||||
(
|
||||
exists(FunctionMetrics f1, FunctionMetrics f2 | f1.getADependency() = f2 |
|
||||
f1.getScope() = this and f2.getScope() = other
|
||||
)
|
||||
or
|
||||
exists(Function f, Call c, ClassObject cls | c.getScope() = f and f.getScope() = this |
|
||||
c.getFunc().refersTo(cls) and
|
||||
cls.getPyClass() = other
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* The afferent coupling of a class is the number of classes that
|
||||
* directly depend on it.
|
||||
*/
|
||||
int getAfferentCoupling() { result = count(ClassMetrics t | t.dependsOn(this)) }
|
||||
|
||||
/**
|
||||
* The efferent coupling of a class is the number of classes that
|
||||
* it directly depends on.
|
||||
*/
|
||||
int getEfferentCoupling() { result = count(ClassMetrics t | this.dependsOn(t)) }
|
||||
|
||||
int getInheritanceDepth() {
|
||||
exists(ClassObject cls | cls.getPyClass() = this | result = max(classInheritanceDepth(cls)))
|
||||
}
|
||||
|
||||
/* -------- CHIDAMBER AND KEMERER LACK OF COHESION IN METHODS ------------ */
|
||||
/*
|
||||
* The aim of this metric is to try and determine whether a class
|
||||
* represents one abstraction (good) or multiple abstractions (bad).
|
||||
* If a class represents multiple abstractions, it should be split
|
||||
* up into multiple classes.
|
||||
*
|
||||
* In the Chidamber and Kemerer method, this is measured as follows:
|
||||
* n1 = number of pairs of distinct methods in a class that do *not*
|
||||
* have at least one commonly accessed field
|
||||
* n2 = number of pairs of distinct methods in a class that do
|
||||
* have at least one commonly accessed field
|
||||
* lcom = ((n1 - n2)/2 max 0)
|
||||
*
|
||||
* We divide by 2 because each pair (m1,m2) is counted twice in n1 and n2.
|
||||
*/
|
||||
|
||||
/** should function f be excluded from the cohesion computation? */
|
||||
predicate ignoreLackOfCohesion(Function f) { f.isInitMethod() or f.isSpecialMethod() }
|
||||
|
||||
private predicate methodPair(Function m1, Function m2) {
|
||||
m1.getScope() = this and
|
||||
m2.getScope() = this and
|
||||
not this.ignoreLackOfCohesion(m1) and
|
||||
not this.ignoreLackOfCohesion(m2) and
|
||||
m1 != m2
|
||||
}
|
||||
|
||||
private predicate one_accesses_other(Function m1, Function m2) {
|
||||
this.methodPair(m1, m2) and
|
||||
(
|
||||
exists(SelfAttributeRead sa |
|
||||
sa.getName() = m1.getName() and
|
||||
sa.getScope() = m2
|
||||
)
|
||||
or
|
||||
exists(SelfAttributeRead sa |
|
||||
sa.getName() = m2.getName() and
|
||||
sa.getScope() = m1
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
/** do m1 and m2 access a common field or one calls the other? */
|
||||
private predicate shareField(Function m1, Function m2) {
|
||||
this.methodPair(m1, m2) and
|
||||
exists(string name |
|
||||
exists(SelfAttributeRead sa |
|
||||
sa.getName() = name and
|
||||
sa.getScope() = m1
|
||||
) and
|
||||
exists(SelfAttributeRead sa |
|
||||
sa.getName() = name and
|
||||
sa.getScope() = m2
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
private int similarMethodPairs() {
|
||||
result =
|
||||
count(Function m1, Function m2 |
|
||||
this.methodPair(m1, m2) and
|
||||
(this.shareField(m1, m2) or this.one_accesses_other(m1, m2))
|
||||
) / 2
|
||||
}
|
||||
|
||||
private int methodPairs() {
|
||||
result = count(Function m1, Function m2 | this.methodPair(m1, m2)) / 2
|
||||
}
|
||||
|
||||
/** return Chidamber and Kemerer Lack of Cohesion */
|
||||
int getLackOfCohesionCK() {
|
||||
exists(int n |
|
||||
n = this.methodPairs() - 2 * this.similarMethodPairs() and
|
||||
result = n.maximum(0)
|
||||
)
|
||||
}
|
||||
|
||||
private predicate similarMethodPairDag(Function m1, Function m2, int line) {
|
||||
(this.shareField(m1, m2) or this.one_accesses_other(m1, m2)) and
|
||||
line = m1.getLocation().getStartLine() and
|
||||
line < m2.getLocation().getStartLine()
|
||||
}
|
||||
|
||||
private predicate subgraph(Function m, int line) {
|
||||
this.similarMethodPairDag(m, _, line) and not this.similarMethodPairDag(_, m, _)
|
||||
or
|
||||
exists(Function other | this.subgraph(other, line) |
|
||||
this.similarMethodPairDag(other, m, _) or
|
||||
this.similarMethodPairDag(m, other, _)
|
||||
)
|
||||
}
|
||||
|
||||
predicate unionSubgraph(Function m, int line) { line = min(int l | this.subgraph(m, l)) }
|
||||
|
||||
/** return Hitz and Montazeri Lack of Cohesion */
|
||||
int getLackOfCohesionHM() { result = count(int line | this.unionSubgraph(_, line)) }
|
||||
}
|
||||
|
||||
private int classInheritanceDepth(ClassObject cls) {
|
||||
/* Prevent run-away recursion in case of circular inheritance */
|
||||
not cls.getASuperType() = cls and
|
||||
(
|
||||
exists(ClassObject sup | cls.getABaseType() = sup | result = classInheritanceDepth(sup) + 1)
|
||||
or
|
||||
not exists(cls.getABaseType()) and
|
||||
(
|
||||
major_version() = 2 and result = 0
|
||||
or
|
||||
major_version() > 2 and result = 1
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
class ModuleMetrics extends Module {
|
||||
/** Gets the total number of lines (including blank lines) in the module */
|
||||
int getNumberOfLines() { py_alllines(this, result) }
|
||||
|
||||
/** Gets the number of lines of code in the module */
|
||||
int getNumberOfLinesOfCode() { py_codelines(this, result) }
|
||||
|
||||
/** Gets the number of lines of comments in the module */
|
||||
int getNumberOfLinesOfComments() { py_commentlines(this, result) }
|
||||
|
||||
/** Gets the number of lines of docstrings in the module */
|
||||
int getNumberOfLinesOfDocStrings() { py_docstringlines(this, result) }
|
||||
|
||||
/**
|
||||
* The afferent coupling of a class is the number of classes that
|
||||
* directly depend on it.
|
||||
*/
|
||||
int getAfferentCoupling() { result = count(ModuleMetrics t | t.dependsOn(this)) }
|
||||
|
||||
/**
|
||||
* The efferent coupling of a class is the number of classes that
|
||||
* it directly depends on.
|
||||
*/
|
||||
int getEfferentCoupling() { result = count(ModuleMetrics t | this.dependsOn(t)) }
|
||||
|
||||
private predicate dependsOn(Module other) {
|
||||
other != this and
|
||||
(
|
||||
exists(FunctionMetrics f1, FunctionMetrics f2 | f1.getADependency() = f2 |
|
||||
f1.getEnclosingModule() = this and f2.getEnclosingModule() = other
|
||||
)
|
||||
or
|
||||
exists(Function f, Call c, ClassObject cls | c.getScope() = f and f.getScope() = this |
|
||||
c.getFunc().refersTo(cls) and
|
||||
cls.getPyClass().getEnclosingModule() = other
|
||||
)
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/** Helpers for coupling */
|
||||
predicate unique_root_method(Function func, string name) {
|
||||
name = func.getName() and
|
||||
not exists(FunctionObject f, FunctionObject other |
|
||||
f.getFunction() = func and
|
||||
other.getName() = name
|
||||
|
|
||||
not other.overrides(f)
|
||||
)
|
||||
}
|
||||
|
||||
predicate non_coupling_method(Function f) {
|
||||
f.isSpecialMethod() or
|
||||
f.isInitMethod() or
|
||||
f.getName() = "close" or
|
||||
f.getName() = "write" or
|
||||
f.getName() = "read" or
|
||||
f.getName() = "get" or
|
||||
f.getName() = "set"
|
||||
}
|
||||
|
||||
private int getNestingDepth(Stmt s) {
|
||||
not exists(Stmt outer | outer.getASubStatement() = s) and result = 1
|
||||
or
|
||||
exists(Stmt outer | outer.getASubStatement() = s |
|
||||
if s.(If).isElif() or s instanceof ExceptStmt
|
||||
then
|
||||
/* If statement is an `elif` or `except` then it is not indented relative to its parent */
|
||||
result = getNestingDepth(outer)
|
||||
else result = getNestingDepth(outer) + 1
|
||||
)
|
||||
}
|
||||
@@ -1,285 +0,0 @@
|
||||
import python
|
||||
private import semmle.python.objects.ObjectAPI
|
||||
private import semmle.python.objects.Modules
|
||||
|
||||
/**
|
||||
* A module. This is the top level element in an AST, corresponding to a source file.
|
||||
* It is also a Scope; the scope of global variables.
|
||||
*/
|
||||
class Module extends Module_, Scope, AstNode {
|
||||
override string toString() {
|
||||
result = this.getKind() + " " + this.getName()
|
||||
or
|
||||
/* No name is defined, which means that this module is not on an import path. So it must be a script */
|
||||
not exists(this.getName()) and
|
||||
not this.isPackage() and
|
||||
result = "Script " + this.getFile().getShortName()
|
||||
or
|
||||
/* Package missing name, so just use the path instead */
|
||||
not exists(this.getName()) and
|
||||
this.isPackage() and
|
||||
result = "Package at " + this.getPath().getAbsolutePath()
|
||||
}
|
||||
|
||||
/**
|
||||
* This method will be deprecated in the next release. Please use `getEnclosingScope()` instead.
|
||||
* The enclosing scope of this module (always none)
|
||||
*/
|
||||
override Scope getScope() { none() }
|
||||
|
||||
/** The enclosing scope of this module (always none) */
|
||||
override Scope getEnclosingScope() { none() }
|
||||
|
||||
/** Gets the statements forming the body of this module */
|
||||
override StmtList getBody() { result = Module_.super.getBody() }
|
||||
|
||||
/** Gets the nth statement of this module */
|
||||
override Stmt getStmt(int n) { result = Module_.super.getStmt(n) }
|
||||
|
||||
/** Gets a top-level statement in this module */
|
||||
override Stmt getAStmt() { result = Module_.super.getAStmt() }
|
||||
|
||||
/** Gets the name of this module */
|
||||
override string getName() {
|
||||
result = Module_.super.getName() and legalDottedName(result)
|
||||
or
|
||||
not exists(Module_.super.getName()) and
|
||||
result = moduleNameFromFile(this.getPath())
|
||||
}
|
||||
|
||||
/** Gets the short name of the module. For example the short name of module x.y.z is 'z' */
|
||||
string getShortName() {
|
||||
result = this.getName().suffix(this.getPackage().getName().length() + 1)
|
||||
or
|
||||
result = this.getName() and not exists(this.getPackage())
|
||||
}
|
||||
|
||||
/** Gets this module */
|
||||
override Module getEnclosingModule() { result = this }
|
||||
|
||||
/** Gets the __init__ module of this module if the module is a package and it has an __init__ module */
|
||||
Module getInitModule() {
|
||||
/* this.isPackage() and */ result.getName() = this.getName() + ".__init__"
|
||||
}
|
||||
|
||||
/** Whether this module is a package initializer */
|
||||
predicate isPackageInit() { this.getName().matches("%\\_\\_init\\_\\_") and not this.isPackage() }
|
||||
|
||||
/** Gets a name exported by this module, that is the names that will be added to a namespace by 'from this-module import *' */
|
||||
string getAnExport() {
|
||||
py_exports(this, result)
|
||||
or
|
||||
exists(ModuleObjectInternal mod | mod.getSource() = this.getEntryNode() |
|
||||
mod.(ModuleValue).exports(result)
|
||||
)
|
||||
}
|
||||
|
||||
/** Gets the source file for this module */
|
||||
File getFile() { py_module_path(this, result) }
|
||||
|
||||
/** Gets the source file or folder for this module or package */
|
||||
Container getPath() { py_module_path(this, result) }
|
||||
|
||||
/** Whether this is a package */
|
||||
predicate isPackage() { this.getPath() instanceof Folder }
|
||||
|
||||
/** Gets the package containing this module (or parent package if this is a package) */
|
||||
Module getPackage() {
|
||||
this.getName().matches("%.%") and
|
||||
result.getName() = getName().regexpReplaceAll("\\.[^.]*$", "")
|
||||
}
|
||||
|
||||
/** Gets the name of the package containing this module */
|
||||
string getPackageName() {
|
||||
this.getName().matches("%.%") and
|
||||
result = getName().regexpReplaceAll("\\.[^.]*$", "")
|
||||
}
|
||||
|
||||
/** Gets the metrics for this module */
|
||||
ModuleMetrics getMetrics() { result = this }
|
||||
|
||||
/**
|
||||
* Use ModuleObject.getAnImportedModule() instead.
|
||||
* Gets a module imported by this module
|
||||
*/
|
||||
deprecated Module getAnImportedModule() { result.getName() = this.getAnImportedModuleName() }
|
||||
|
||||
string getAnImportedModuleName() {
|
||||
exists(Import i | i.getEnclosingModule() = this | result = i.getAnImportedModuleName())
|
||||
or
|
||||
exists(ImportStar i | i.getEnclosingModule() = this | result = i.getImportedModuleName())
|
||||
}
|
||||
|
||||
override Location getLocation() {
|
||||
py_scope_location(result, this)
|
||||
or
|
||||
not py_scope_location(_, this) and
|
||||
locations_ast(result, this, 0, 0, 0, 0)
|
||||
}
|
||||
|
||||
/** Gets a child module or package of this package */
|
||||
Module getSubModule(string name) {
|
||||
result.getPackage() = this and
|
||||
name = result.getName().regexpReplaceAll(".*\\.", "")
|
||||
}
|
||||
|
||||
/** Whether name is declared in the __all__ list of this module */
|
||||
predicate declaredInAll(string name) {
|
||||
exists(AssignStmt a, GlobalVariable all |
|
||||
a.defines(all) and
|
||||
a.getScope() = this and
|
||||
all.getId() = "__all__" and
|
||||
(
|
||||
a.getValue().(List).getAnElt().(StrConst).getText() = name
|
||||
or
|
||||
a.getValue().(Tuple).getAnElt().(StrConst).getText() = name
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
override AstNode getAChildNode() { result = this.getAStmt() }
|
||||
|
||||
predicate hasFromFuture(string attr) {
|
||||
exists(Import i, ImportMember im, ImportExpr ie, Alias a, Name name |
|
||||
im.getModule() = ie and
|
||||
ie.getName() = "__future__" and
|
||||
a.getAsname() = name and
|
||||
name.getId() = attr and
|
||||
i.getASubExpression() = im and
|
||||
i.getAName() = a and
|
||||
i.getEnclosingModule() = this
|
||||
)
|
||||
}
|
||||
|
||||
/** Gets the path element from which this module was loaded. */
|
||||
Container getLoadPath() { result = this.getPath().getImportRoot() }
|
||||
|
||||
/** Holds if this module is in the standard library for version `major.minor` */
|
||||
predicate inStdLib(int major, int minor) { this.getLoadPath().isStdLibRoot(major, minor) }
|
||||
|
||||
/** Holds if this module is in the standard library */
|
||||
predicate inStdLib() { this.getLoadPath().isStdLibRoot() }
|
||||
|
||||
override predicate containsInScope(AstNode inner) { Scope.super.containsInScope(inner) }
|
||||
|
||||
override predicate contains(AstNode inner) { Scope.super.contains(inner) }
|
||||
|
||||
/** Gets the kind of this module. */
|
||||
override string getKind() {
|
||||
if this.isPackage()
|
||||
then result = "Package"
|
||||
else (
|
||||
not exists(Module_.super.getKind()) and result = "Module"
|
||||
or
|
||||
result = Module_.super.getKind()
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
bindingset[name]
|
||||
private predicate legalDottedName(string name) {
|
||||
name.regexpMatch("(\\p{L}|_)(\\p{L}|\\d|_)*(\\.(\\p{L}|_)(\\p{L}|\\d|_)*)*")
|
||||
}
|
||||
|
||||
bindingset[name]
|
||||
private predicate legalShortName(string name) { name.regexpMatch("(\\p{L}|_)(\\p{L}|\\d|_)*") }
|
||||
|
||||
/**
|
||||
* Holds if `f` is potentially a source package.
|
||||
* Does it have an __init__.py file (or --respect-init=False for Python 2) and is it within the source archive?
|
||||
*/
|
||||
private predicate isPotentialSourcePackage(Folder f) {
|
||||
f.getRelativePath() != "" and
|
||||
isPotentialPackage(f)
|
||||
}
|
||||
|
||||
private predicate isPotentialPackage(Folder f) {
|
||||
exists(f.getFile("__init__.py"))
|
||||
or
|
||||
py_flags_versioned("options.respect_init", "False", _) and major_version() = 2
|
||||
}
|
||||
|
||||
private string moduleNameFromBase(Container file) {
|
||||
isPotentialPackage(file) and result = file.getBaseName()
|
||||
or
|
||||
file instanceof File and result = file.getStem()
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `file` may be transitively imported from a file that may serve as the entry point of
|
||||
* the execution.
|
||||
*/
|
||||
private predicate transitively_imported_from_entry_point(File file) {
|
||||
file.getExtension().matches("%py%") and
|
||||
exists(File importer |
|
||||
// Only consider files that are in the source archive
|
||||
exists(importer.getRelativePath()) and
|
||||
importer.getParent() = file.getParent() and
|
||||
exists(ImportExpr i |
|
||||
i.getLocation().getFile() = importer and
|
||||
i.getName() = file.getStem() and
|
||||
// Disregard relative imports
|
||||
i.getLevel() = 0
|
||||
)
|
||||
|
|
||||
importer.isPossibleEntryPoint() or transitively_imported_from_entry_point(importer)
|
||||
)
|
||||
}
|
||||
|
||||
string moduleNameFromFile(Container file) {
|
||||
exists(string basename |
|
||||
basename = moduleNameFromBase(file) and
|
||||
legalShortName(basename)
|
||||
|
|
||||
result = moduleNameFromFile(file.getParent()) + "." + basename
|
||||
or
|
||||
// If `file` is a transitive import of a file that's executed directly, we allow references
|
||||
// to it by its `basename`.
|
||||
transitively_imported_from_entry_point(file) and
|
||||
result = basename
|
||||
)
|
||||
or
|
||||
isPotentialSourcePackage(file) and
|
||||
result = file.getStem() and
|
||||
(
|
||||
not isPotentialSourcePackage(file.getParent()) or
|
||||
not legalShortName(file.getParent().getBaseName())
|
||||
)
|
||||
or
|
||||
result = file.getStem() and file.getParent() = file.getImportRoot()
|
||||
or
|
||||
result = file.getStem() and isStubRoot(file.getParent())
|
||||
}
|
||||
|
||||
private predicate isStubRoot(Folder f) {
|
||||
not f.getParent*().isImportRoot() and
|
||||
f.getAbsolutePath().matches("%/data/python/stubs")
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if the Container `c` should be the preferred file or folder for
|
||||
* the given name when performing imports.
|
||||
* Trivially true for any container if it is the only one with its name.
|
||||
* However, if there are several modules with the same name, then
|
||||
* this is the module most likely to be imported under that name.
|
||||
*/
|
||||
predicate isPreferredModuleForName(Container c, string name) {
|
||||
exists(int p |
|
||||
p = min(int x | x = priorityForName(_, name)) and
|
||||
p = priorityForName(c, name)
|
||||
)
|
||||
}
|
||||
|
||||
private int priorityForName(Container c, string name) {
|
||||
name = moduleNameFromFile(c) and
|
||||
(
|
||||
// In the source
|
||||
exists(c.getRelativePath()) and result = -1
|
||||
or
|
||||
// On an import path
|
||||
exists(c.getImportRoot(result))
|
||||
or
|
||||
// Otherwise
|
||||
result = 10000
|
||||
)
|
||||
}
|
||||
@@ -1,239 +0,0 @@
|
||||
import python
|
||||
|
||||
/** Base class for operators */
|
||||
class Operator extends Operator_ {
|
||||
/** Gets the name of the special method used to implement this operator */
|
||||
string getSpecialMethodName() { none() }
|
||||
}
|
||||
|
||||
/* Unary Expression and its operators */
|
||||
/** A unary expression: (`+x`), (`-x`) or (`~x`) */
|
||||
class UnaryExpr extends UnaryExpr_ {
|
||||
override Expr getASubExpression() { result = this.getOperand() }
|
||||
}
|
||||
|
||||
/** A unary operator: `+`, `-`, `~` or `not` */
|
||||
class Unaryop extends Unaryop_ {
|
||||
/** Gets the name of the special method used to implement this operator */
|
||||
string getSpecialMethodName() { none() }
|
||||
}
|
||||
|
||||
/** An invert (`~`) unary operator */
|
||||
class Invert extends Invert_ {
|
||||
override string getSpecialMethodName() { result = "__invert__" }
|
||||
}
|
||||
|
||||
/** A positive (`+`) unary operator */
|
||||
class UAdd extends UAdd_ {
|
||||
override string getSpecialMethodName() { result = "__pos__" }
|
||||
}
|
||||
|
||||
/** A negation (`-`) unary operator */
|
||||
class USub extends USub_ {
|
||||
override string getSpecialMethodName() { result = "__neg__" }
|
||||
}
|
||||
|
||||
/** A `not` unary operator */
|
||||
class Not extends Not_ {
|
||||
override string getSpecialMethodName() { none() }
|
||||
}
|
||||
|
||||
/* Binary Operation and its operators */
|
||||
/** A binary expression, such as `x + y` */
|
||||
class BinaryExpr extends BinaryExpr_ {
|
||||
override Expr getASubExpression() { result = this.getLeft() or result = this.getRight() }
|
||||
}
|
||||
|
||||
/** A power (`**`) binary operator */
|
||||
class Pow extends Pow_ {
|
||||
override string getSpecialMethodName() { result = "__pow__" }
|
||||
}
|
||||
|
||||
/** A right shift (`>>`) binary operator */
|
||||
class RShift extends RShift_ {
|
||||
override string getSpecialMethodName() { result = "__rshift__" }
|
||||
}
|
||||
|
||||
/** A subtract (`-`) binary operator */
|
||||
class Sub extends Sub_ {
|
||||
override string getSpecialMethodName() { result = "__sub__" }
|
||||
}
|
||||
|
||||
/** A bitwise and (`&`) binary operator */
|
||||
class BitAnd extends BitAnd_ {
|
||||
override string getSpecialMethodName() { result = "__and__" }
|
||||
}
|
||||
|
||||
/** A bitwise or (`|`) binary operator */
|
||||
class BitOr extends BitOr_ {
|
||||
override string getSpecialMethodName() { result = "__or__" }
|
||||
}
|
||||
|
||||
/** A bitwise exclusive-or (`^`) binary operator */
|
||||
class BitXor extends BitXor_ {
|
||||
override string getSpecialMethodName() { result = "__xor__" }
|
||||
}
|
||||
|
||||
/** An add (`+`) binary operator */
|
||||
class Add extends Add_ {
|
||||
override string getSpecialMethodName() { result = "__add__" }
|
||||
}
|
||||
|
||||
/** An (true) divide (`/`) binary operator */
|
||||
class Div extends Div_ {
|
||||
override string getSpecialMethodName() {
|
||||
result = "__truediv__"
|
||||
or
|
||||
major_version() = 2 and result = "__div__"
|
||||
}
|
||||
}
|
||||
|
||||
/** An floor divide (`//`) binary operator */
|
||||
class FloorDiv extends FloorDiv_ {
|
||||
override string getSpecialMethodName() { result = "__floordiv__" }
|
||||
}
|
||||
|
||||
/** A left shift (`<<`) binary operator */
|
||||
class LShift extends LShift_ {
|
||||
override string getSpecialMethodName() { result = "__lshift__" }
|
||||
}
|
||||
|
||||
/** A modulo (`%`) binary operator, which includes string formatting */
|
||||
class Mod extends Mod_ {
|
||||
override string getSpecialMethodName() { result = "__mod__" }
|
||||
}
|
||||
|
||||
/** A multiplication (`*`) binary operator */
|
||||
class Mult extends Mult_ {
|
||||
override string getSpecialMethodName() { result = "__mul__" }
|
||||
}
|
||||
|
||||
/** A matrix multiplication (`@`) binary operator */
|
||||
class MatMult extends MatMult_ {
|
||||
override string getSpecialMethodName() { result = "__matmul__" }
|
||||
}
|
||||
|
||||
/* Comparison Operation and its operators */
|
||||
/** A comparison operation, such as `x<y` */
|
||||
class Compare extends Compare_ {
|
||||
override Expr getASubExpression() { result = this.getLeft() or result = this.getAComparator() }
|
||||
|
||||
/**
|
||||
* Whether as part of this comparison 'left' is compared with 'right' using the operator 'op'.
|
||||
* For example, the comparison `a<b<c` compares(`a`, `b`, `<`) and compares(`b`, `c`, `<`).
|
||||
*/
|
||||
predicate compares(Expr left, Cmpop op, Expr right) {
|
||||
this.getLeft() = left and this.getComparator(0) = right and op = this.getOp(0)
|
||||
or
|
||||
exists(int n |
|
||||
this.getComparator(n) = left and this.getComparator(n + 1) = right and op = this.getOp(n + 1)
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/** List of comparison operators in a comparison */
|
||||
class CmpopList extends CmpopList_ { }
|
||||
|
||||
/** A comparison operator */
|
||||
abstract class Cmpop extends Cmpop_ {
|
||||
string getSymbol() { none() }
|
||||
|
||||
string getSpecialMethodName() { none() }
|
||||
}
|
||||
|
||||
/** A greater than (`>`) comparison operator */
|
||||
class Gt extends Gt_ {
|
||||
override string getSymbol() { result = ">" }
|
||||
|
||||
override string getSpecialMethodName() { result = "__gt__" }
|
||||
}
|
||||
|
||||
/** A greater than or equals (`>=`) comparison operator */
|
||||
class GtE extends GtE_ {
|
||||
override string getSymbol() { result = ">=" }
|
||||
|
||||
override string getSpecialMethodName() { result = "__ge__" }
|
||||
}
|
||||
|
||||
/** An `in` comparison operator */
|
||||
class In extends In_ {
|
||||
override string getSymbol() { result = "in" }
|
||||
}
|
||||
|
||||
/** An `is` comparison operator */
|
||||
class Is extends Is_ {
|
||||
override string getSymbol() { result = "is" }
|
||||
}
|
||||
|
||||
/** An `is not` comparison operator */
|
||||
class IsNot extends IsNot_ {
|
||||
override string getSymbol() { result = "is not" }
|
||||
}
|
||||
|
||||
/** An equals (`==`) comparison operator */
|
||||
class Eq extends Eq_ {
|
||||
override string getSymbol() { result = "==" }
|
||||
|
||||
override string getSpecialMethodName() { result = "__eq__" }
|
||||
}
|
||||
|
||||
/** A less than (`<`) comparison operator */
|
||||
class Lt extends Lt_ {
|
||||
override string getSymbol() { result = "<" }
|
||||
|
||||
override string getSpecialMethodName() { result = "__lt__" }
|
||||
}
|
||||
|
||||
/** A less than or equals (`<=`) comparison operator */
|
||||
class LtE extends LtE_ {
|
||||
override string getSymbol() { result = "<=" }
|
||||
|
||||
override string getSpecialMethodName() { result = "__le__" }
|
||||
}
|
||||
|
||||
/** A not equals (`!=`) comparison operator */
|
||||
class NotEq extends NotEq_ {
|
||||
override string getSymbol() { result = "!=" }
|
||||
|
||||
override string getSpecialMethodName() { result = "__ne__" }
|
||||
}
|
||||
|
||||
/** An `not in` comparison operator */
|
||||
class NotIn extends NotIn_ {
|
||||
override string getSymbol() { result = "not in" }
|
||||
}
|
||||
|
||||
/* Boolean Operation (and/or) and its operators */
|
||||
/** A boolean shortcut (and/or) operation */
|
||||
class BoolExpr extends BoolExpr_ {
|
||||
override Expr getASubExpression() { result = this.getAValue() }
|
||||
|
||||
string getOperator() {
|
||||
this.getOp() instanceof And and result = "and"
|
||||
or
|
||||
this.getOp() instanceof Or and result = "or"
|
||||
}
|
||||
|
||||
/** Whether part evaluates to partIsTrue if this evaluates to wholeIsTrue */
|
||||
predicate impliesValue(Expr part, boolean partIsTrue, boolean wholeIsTrue) {
|
||||
if this.getOp() instanceof And
|
||||
then (
|
||||
wholeIsTrue = true and partIsTrue = true and part = this.getAValue()
|
||||
or
|
||||
wholeIsTrue = true and this.getAValue().(BoolExpr).impliesValue(part, partIsTrue, true)
|
||||
) else (
|
||||
wholeIsTrue = false and partIsTrue = false and part = this.getAValue()
|
||||
or
|
||||
wholeIsTrue = false and this.getAValue().(BoolExpr).impliesValue(part, partIsTrue, false)
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/** A short circuit boolean operator, and/or */
|
||||
class Boolop extends Boolop_ { }
|
||||
|
||||
/** An `and` boolean operator */
|
||||
class And extends And_ { }
|
||||
|
||||
/** An `or` boolean operator */
|
||||
class Or extends Or_ { }
|
||||
@@ -1,687 +0,0 @@
|
||||
/**
|
||||
* Provides queries to pretty-print a Python AST as a graph.
|
||||
*
|
||||
* By default, this will print the AST for all elements in the database. To change this behavior,
|
||||
* extend `PrintAstConfiguration` and override `shouldPrint` to hold for only the elements
|
||||
* you wish to view the AST for.
|
||||
*/
|
||||
|
||||
import python
|
||||
import semmle.python.RegexTreeView
|
||||
|
||||
private newtype TPrintAstConfiguration = MkPrintAstConfiguration()
|
||||
|
||||
/**
|
||||
* The query can extend this class to control which elements are printed.
|
||||
*/
|
||||
class PrintAstConfiguration extends TPrintAstConfiguration {
|
||||
/**
|
||||
* Gets a textual representation of this `PrintAstConfiguration`.
|
||||
*/
|
||||
string toString() { result = "PrintAstConfiguration" }
|
||||
|
||||
/**
|
||||
* Controls whether the `AstNode` should be considered for AST printing.
|
||||
* By default it checks whether the `AstNode` `e` belongs to `Location` `l`.
|
||||
*/
|
||||
predicate shouldPrint(AstNode e, Location l) { l = e.getLocation() }
|
||||
}
|
||||
|
||||
private predicate shouldPrint(AstNode e, Location l) {
|
||||
exists(PrintAstConfiguration config | config.shouldPrint(e, l))
|
||||
}
|
||||
|
||||
/** Holds if the given element does not need to be rendered in the AST. */
|
||||
private predicate isNotNeeded(AstNode el) {
|
||||
el.isArtificial()
|
||||
or
|
||||
el instanceof Module
|
||||
or
|
||||
exists(AstNode parent | isNotNeeded(parent) and not parent instanceof Module |
|
||||
el = parent.getAChildNode()
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Printed nodes.
|
||||
*/
|
||||
private newtype TPrintAstNode =
|
||||
TElementNode(AstNode el) { shouldPrint(el, _) and not isNotNeeded(el) } or
|
||||
TFunctionParamsNode(Function f) { shouldPrint(f, _) and not isNotNeeded(f) } or
|
||||
TCallArgumentsNode(Call c) { shouldPrint(c, _) and not isNotNeeded(c) } or
|
||||
TStmtListNode(StmtList list) {
|
||||
shouldPrint(list.getAnItem(), _) and
|
||||
not list = any(Module mod).getBody() and
|
||||
not forall(AstNode child | child = list.getAnItem() | isNotNeeded(child)) and
|
||||
exists(list.getAnItem())
|
||||
} or
|
||||
TRegExpTermNode(RegExpTerm term) {
|
||||
exists(StrConst str | term.getRootTerm() = getParsedRegExp(str) and shouldPrint(str, _))
|
||||
}
|
||||
|
||||
/**
|
||||
* A node in the output tree.
|
||||
*/
|
||||
class PrintAstNode extends TPrintAstNode {
|
||||
/**
|
||||
* Gets a textual representation of this node in the PrintAst output tree.
|
||||
*/
|
||||
string toString() { none() }
|
||||
|
||||
/**
|
||||
* Gets the child node at index `childIndex`. Child indices must be unique,
|
||||
* but need not be contiguous.
|
||||
*/
|
||||
PrintAstNode getChild(int childIndex) { none() }
|
||||
|
||||
/**
|
||||
* Gets a child of this node.
|
||||
*/
|
||||
final PrintAstNode getAChild() { result = getChild(_) }
|
||||
|
||||
/**
|
||||
* Gets the parent of this node, if any.
|
||||
*/
|
||||
final PrintAstNode getParent() { result.getAChild() = this }
|
||||
|
||||
/**
|
||||
* Gets the location of this node in the source code.
|
||||
*/
|
||||
Location getLocation() { none() }
|
||||
|
||||
/**
|
||||
* Gets the value of the property of this node, where the name of the property
|
||||
* is `key`.
|
||||
*/
|
||||
string getProperty(string key) {
|
||||
key = "semmle.label" and
|
||||
result = toString()
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the label for the edge from this node to the specified child. By
|
||||
* default, this is just the index of the child, but subclasses can override
|
||||
* this.
|
||||
*/
|
||||
string getChildEdgeLabel(int childIndex) {
|
||||
exists(getChild(childIndex)) and
|
||||
result = childIndex.toString()
|
||||
}
|
||||
}
|
||||
|
||||
/** A top-level AST node. */
|
||||
class TopLevelPrintAstNode extends PrintAstNode {
|
||||
TopLevelPrintAstNode() { not exists(this.getParent()) }
|
||||
|
||||
private int getOrder() {
|
||||
this =
|
||||
rank[result](TopLevelPrintAstNode n, Location l |
|
||||
l = n.getLocation()
|
||||
|
|
||||
n
|
||||
order by
|
||||
l.getFile().getRelativePath(), l.getStartLine(), l.getStartColumn(), l.getEndLine(),
|
||||
l.getEndColumn()
|
||||
)
|
||||
}
|
||||
|
||||
override string getProperty(string key) {
|
||||
result = super.getProperty(key)
|
||||
or
|
||||
key = "semmle.order" and
|
||||
result = this.getOrder().toString()
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* An `AstNode` printed in the print-viewer.
|
||||
*
|
||||
* This class can be overridden to define more specific behavior for some `AstNode`s.
|
||||
* The `getChildNode` and `getStmtList` methods can be overridden to easily set up a child-parent relation between different `AstElementNode`s.
|
||||
* Be very careful about overriding `getChild`, as `getChildNode` and `getStmtList` depend on the default behavior of `getChild`.
|
||||
*/
|
||||
class AstElementNode extends PrintAstNode, TElementNode {
|
||||
AstNode element;
|
||||
|
||||
AstElementNode() { this = TElementNode(element) }
|
||||
|
||||
override string toString() {
|
||||
result = "[" + PrettyPrinting::getQlClass(element) + "] " + PrettyPrinting::prettyPrint(element)
|
||||
}
|
||||
|
||||
override Location getLocation() { result = element.getLocation() }
|
||||
|
||||
/**
|
||||
* Gets the `AstNode` that is printed by this print node.
|
||||
*/
|
||||
final AstNode getAstNode() { result = element }
|
||||
|
||||
override PrintAstNode getChild(int childIndex) {
|
||||
exists(AstNode el | result.(AstElementNode).getAstNode() = el |
|
||||
el = this.getChildNode(childIndex) and not el = getStmtList(_, _).getAnItem()
|
||||
)
|
||||
or
|
||||
// displaying all `StmtList` after the other children.
|
||||
exists(int offset | offset = 1 + max([0, any(int index | exists(this.getChildNode(index)))]) |
|
||||
exists(int index | childIndex = index + offset |
|
||||
result.(StmtListNode).getList() = getStmtList(index, _)
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a child node for the AstNode that this print node represents.
|
||||
*
|
||||
* The default behavior in `getChild` uses `getChildNode` to easily define a parent-child relation between different `AstElementNode`s.
|
||||
*/
|
||||
AstNode getChildNode(int childIndex) { result = getChild(element, childIndex) }
|
||||
|
||||
/**
|
||||
* Gets the `index`th `StmtList` that is a child of the `AstNode` that this print node represents.
|
||||
* `label` is used for pretty-printing a label in the parent-child relation in the ast-viewer.
|
||||
*
|
||||
* The `StmtListNode` class and the `getChild` predicate uses `getStmtList` to define a parent-child relation with labels.
|
||||
*
|
||||
* `index` must be 0 or positive.
|
||||
*/
|
||||
StmtList getStmtList(int index, string label) { none() }
|
||||
}
|
||||
|
||||
/**
|
||||
* A print node for `Try` statements.
|
||||
*/
|
||||
class TryNode extends AstElementNode {
|
||||
override Try element;
|
||||
|
||||
override StmtList getStmtList(int index, string label) {
|
||||
index = 0 and result = element.getBody() and label = "body"
|
||||
or
|
||||
index = 1 and result = element.getOrelse() and label = "orelse"
|
||||
or
|
||||
index = 2 and result = element.getHandlers() and label = "handlers"
|
||||
or
|
||||
index = 3 and result = element.getFinalbody() and label = "final body"
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A print node for `If` statements.
|
||||
*/
|
||||
class IfNode extends AstElementNode {
|
||||
override If element;
|
||||
|
||||
override AstNode getChildNode(int childIndex) { childIndex = 0 and result = element.getTest() }
|
||||
|
||||
override StmtList getStmtList(int index, string label) {
|
||||
index = 1 and result = element.getBody() and label = "body"
|
||||
or
|
||||
index = 2 and result = element.getOrelse() and label = "orelse"
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A print node for classes.
|
||||
*/
|
||||
class ClassNode extends AstElementNode {
|
||||
override Class element;
|
||||
|
||||
override StmtList getStmtList(int index, string label) {
|
||||
index = 1 and result = element.getBody() and label = "body"
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A print node for `ExceptStmt`.
|
||||
*/
|
||||
class ExceptNode extends AstElementNode {
|
||||
override ExceptStmt element;
|
||||
|
||||
override StmtList getStmtList(int index, string label) {
|
||||
index = 1 and result = element.getBody() and label = "body"
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A print node for `With` statements.
|
||||
*/
|
||||
class WithNode extends AstElementNode {
|
||||
override With element;
|
||||
|
||||
override StmtList getStmtList(int index, string label) {
|
||||
index = 1 and result = element.getBody() and label = "body"
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A print node for `For` statements.
|
||||
*/
|
||||
class ForPrintNode extends AstElementNode {
|
||||
override For element;
|
||||
|
||||
override StmtList getStmtList(int index, string label) {
|
||||
index = 1 and result = element.getBody() and label = "body"
|
||||
or
|
||||
index = 2 and result = element.getOrelse() and label = "orelse"
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A print node for `While` statements.
|
||||
*/
|
||||
class WhilePrintNode extends AstElementNode {
|
||||
override While element;
|
||||
|
||||
override StmtList getStmtList(int index, string label) {
|
||||
index = 1 and result = element.getBody() and label = "body"
|
||||
or
|
||||
index = 2 and result = element.getOrelse() and label = "orelse"
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A print node for `StmtList`.
|
||||
* A `StmtListNode` is always a child of an `AstElementNode`,
|
||||
* and the child-parent relation is defined by the `getStmtList` predicate in `AstElementNode`.
|
||||
*
|
||||
* The label for a `StmtList` is decided based on the result from the `getStmtList` predicate in `AstElementNode`.
|
||||
*/
|
||||
class StmtListNode extends PrintAstNode, TStmtListNode {
|
||||
StmtList list;
|
||||
|
||||
StmtListNode() {
|
||||
this = TStmtListNode(list) and
|
||||
list = any(AstElementNode node).getStmtList(_, _)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the `StmtList` that this print node represents.
|
||||
*/
|
||||
StmtList getList() { result = list }
|
||||
|
||||
private string getLabel() { this.getList() = any(AstElementNode node).getStmtList(_, result) }
|
||||
|
||||
override string toString() { result = "(StmtList) " + getLabel() }
|
||||
|
||||
override PrintAstNode getChild(int childIndex) {
|
||||
exists(AstNode el | result.(AstElementNode).getAstNode() = el | el = list.getItem(childIndex))
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A print node for a `Call`.
|
||||
*
|
||||
* The arguments to this call are aggregated into a `CallArgumentsNode`.
|
||||
*/
|
||||
class CallPrintNode extends AstElementNode {
|
||||
override Call element;
|
||||
|
||||
override PrintAstNode getChild(int childIndex) {
|
||||
childIndex = 0 and result.(AstElementNode).getAstNode() = element.getFunc()
|
||||
or
|
||||
childIndex = 1 and result.(CallArgumentsNode).getCall() = element
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A synthetic print node for the arguments to `call`.
|
||||
*/
|
||||
class CallArgumentsNode extends PrintAstNode, TCallArgumentsNode {
|
||||
Call call;
|
||||
|
||||
CallArgumentsNode() { this = TCallArgumentsNode(call) }
|
||||
|
||||
/**
|
||||
* Gets the call for which this print node represents the arguments.
|
||||
*/
|
||||
Call getCall() { result = call }
|
||||
|
||||
override string toString() { result = "(arguments)" }
|
||||
|
||||
override PrintAstNode getChild(int childIndex) {
|
||||
result.(AstElementNode).getAstNode() = getChild(call, childIndex) and
|
||||
not result.(AstElementNode).getAstNode() = call.getFunc()
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A print node for a `Function`.
|
||||
*/
|
||||
class FunctionNode extends AstElementNode {
|
||||
override Function element;
|
||||
|
||||
override PrintAstNode getChild(int childIndex) {
|
||||
exists(FunctionParamsNode paramsNode | paramsNode.getFunction() = element |
|
||||
childIndex = 0 and result = paramsNode
|
||||
or
|
||||
result = AstElementNode.super.getChild(childIndex) and
|
||||
// parameters is handled above
|
||||
not result.(AstElementNode).getAstNode() =
|
||||
paramsNode.getChild(_).(AstElementNode).getAstNode() and
|
||||
// The default of a Parameter is handled by `ParameterNode`
|
||||
not result.(AstElementNode).getAstNode() = any(Parameter param).getDefault() and
|
||||
// The annotation is a parameter is handled by `ParameterNode`.
|
||||
not result.(AstElementNode).getAstNode() = any(Parameter param).getAnnotation()
|
||||
)
|
||||
}
|
||||
|
||||
override StmtList getStmtList(int index, string label) {
|
||||
index = 1 and result = element.getBody() and label = "body"
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A print node for a `FunctionDef`.
|
||||
*/
|
||||
class FunctionDefNode extends AstElementNode {
|
||||
override FunctionDef element;
|
||||
|
||||
override AstNode getChildNode(int childIndex) {
|
||||
childIndex = 0 and result = element.getTarget(0)
|
||||
or
|
||||
childIndex = 1 and result = element.getValue()
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A print node for the parameters in `func`.
|
||||
*/
|
||||
class FunctionParamsNode extends PrintAstNode, TFunctionParamsNode {
|
||||
Function func;
|
||||
|
||||
FunctionParamsNode() { this = TFunctionParamsNode(func) }
|
||||
|
||||
/**
|
||||
* Gets the `Function` that this print node represents.
|
||||
*/
|
||||
Function getFunction() { result = func }
|
||||
|
||||
override string toString() { result = "(parameters)" }
|
||||
|
||||
override PrintAstNode getChild(int childIndex) {
|
||||
// everything that is not a stmt is a parameter.
|
||||
exists(AstNode el | result.(AstElementNode).getAstNode() = el |
|
||||
el = getChild(func, childIndex) and not el = func.getAStmt()
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A print node for a `Parameter`.
|
||||
*
|
||||
* This print node has the annotation and default value of the `Parameter` as children.
|
||||
* The type annotation and default value would by default exist as children of the parent `Function`.
|
||||
*/
|
||||
class ParameterNode extends AstElementNode {
|
||||
Parameter param;
|
||||
|
||||
ParameterNode() { this.getAstNode() = param.asName() or this.getAstNode() = param.asTuple() }
|
||||
|
||||
override AstNode getChildNode(int childIndex) {
|
||||
childIndex = 0 and result = param.getAnnotation()
|
||||
or
|
||||
childIndex = 1 and result = param.getDefault()
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A print node for a `StrConst`.
|
||||
*
|
||||
* The string has a child, if the child is used as a regular expression,
|
||||
* which is the root of the regular expression.
|
||||
*/
|
||||
class StrConstNode extends AstElementNode {
|
||||
override StrConst element;
|
||||
|
||||
override PrintAstNode getChild(int childIndex) {
|
||||
childIndex = 0 and result.(RegExpTermNode).getTerm() = getParsedRegExp(element)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A print node for a regular expression term.
|
||||
*/
|
||||
class RegExpTermNode extends TRegExpTermNode, PrintAstNode {
|
||||
RegExpTerm term;
|
||||
|
||||
RegExpTermNode() { this = TRegExpTermNode(term) }
|
||||
|
||||
/** Gets the `RegExpTerm` for this node. */
|
||||
RegExpTerm getTerm() { result = term }
|
||||
|
||||
override PrintAstNode getChild(int childIndex) {
|
||||
result.(RegExpTermNode).getTerm() = term.getChild(childIndex)
|
||||
}
|
||||
|
||||
override string toString() {
|
||||
result = "[" + strictconcat(term.getPrimaryQLClass(), " | ") + "] " + term.toString()
|
||||
}
|
||||
|
||||
override Location getLocation() { result = term.getLocation() }
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the `i`th child from `node` ordered by location.
|
||||
*/
|
||||
private AstNode getChild(AstNode node, int i) {
|
||||
shouldPrint(node, _) and
|
||||
result =
|
||||
rank[i](AstNode child |
|
||||
child = node.getAChildNode()
|
||||
|
|
||||
child
|
||||
order by
|
||||
child.getLocation().getStartLine(), child.getLocation().getStartColumn(),
|
||||
child.getLocation().getEndLine(), child.getLocation().getEndColumn()
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* A module for pretty-printing some `AstNode`s.
|
||||
*/
|
||||
private module PrettyPrinting {
|
||||
/**
|
||||
* Gets the QL class for the `AstNode` `a`.
|
||||
* Most `AstNode`s print their QL class in the `toString()` method, however there are exceptions.
|
||||
* These exceptions are handled in the `getQlCustomClass` predicate.
|
||||
*/
|
||||
string getQlClass(AstNode a) {
|
||||
shouldPrint(a, _) and
|
||||
(
|
||||
not exists(getQlCustomClass(a)) and result = strictconcat(a.toString(), " | ")
|
||||
or
|
||||
result = strictconcat(getQlCustomClass(a), " | ")
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the QL class for `AstNode`s where the `toString` method does not print the QL class.
|
||||
*/
|
||||
string getQlCustomClass(AstNode a) {
|
||||
shouldPrint(a, _) and
|
||||
(
|
||||
a instanceof Name and
|
||||
result = "Name" and
|
||||
not a instanceof Parameter and
|
||||
not a instanceof NameConstant
|
||||
or
|
||||
a instanceof Parameter and result = "Parameter"
|
||||
or
|
||||
a instanceof PlaceHolder and result = "PlaceHolder"
|
||||
or
|
||||
a instanceof Function and result = "Function"
|
||||
or
|
||||
a instanceof Class and result = "Class"
|
||||
or
|
||||
a instanceof Call and result = "Call"
|
||||
or
|
||||
a instanceof NameConstant and result = "NameConstant"
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a human-readable representation of the `AstNode` `a`, or the empty string.
|
||||
*
|
||||
* Has exactly one result for every `AstNode`.
|
||||
*/
|
||||
string prettyPrint(AstNode a) {
|
||||
shouldPrint(a, _) and
|
||||
(
|
||||
// this strictconcat should not be needed.
|
||||
// However, the printAst feature breaks if this predicate has more than one result for an `AstNode`, so the strictconcat stays.
|
||||
result = strictconcat(reprRec(a), " | ")
|
||||
or
|
||||
not exists(reprRec(a)) and
|
||||
result = ""
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a human-readable representation of the given `AstNode`.
|
||||
*
|
||||
* Only has a result for some `AstNode`s.
|
||||
*
|
||||
* The monotonicity of this recursive predicate is kept by defining the non-recursive cases inside the `reprBase` predicate,
|
||||
* and then using `reprBase` when there is a negative edge.
|
||||
*/
|
||||
private string reprRec(AstNode a) {
|
||||
shouldPrint(a, _) and
|
||||
not isNotNeeded(a) and
|
||||
(
|
||||
// For NameNodes, we just use the underlying variable name
|
||||
result = reprBase(a)
|
||||
or
|
||||
exists(Expr obj |
|
||||
obj = a.(Attribute).getObject() // Attribute .getname .getObject
|
||||
|
|
||||
// Attributes of the form `name.name2`
|
||||
result = reprBase(obj) + "." + a.(Attribute).getName()
|
||||
or
|
||||
// Attributes where the object is a more complicated expression
|
||||
not exists(reprBase(obj)) and
|
||||
result = "(...)." + a.(Attribute).getName()
|
||||
)
|
||||
or
|
||||
result = "import " + reprRec(a.(Import).getName(_).getAsname())
|
||||
or
|
||||
exists(Keyword keyword | keyword = a |
|
||||
result = keyword.getArg() + "=" + reprRec(keyword.getValue())
|
||||
)
|
||||
or
|
||||
result = reprRec(a.(Call).getFunc()) + "(" + printArgs(a) + ")"
|
||||
or
|
||||
not exists(printArgs(a)) and result = reprRec(a.(Call).getFunc()) + "(...)"
|
||||
or
|
||||
result = "try " + reprRec(a.(Try).getBody().getItem(0))
|
||||
or
|
||||
result = "if " + reprRec(a.(If).getTest()) + ":"
|
||||
or
|
||||
result = reprRec(a.(Compare).getLeft()) + " " + a.(Compare).getOp(0).getSymbol() + " ..."
|
||||
or
|
||||
result = a.(Subscript).getObject() + "[" + reprRec(a.(Subscript).getIndex()) + "]"
|
||||
or
|
||||
exists(Assign asn | asn = a |
|
||||
strictcount(asn.getTargets()) = 1 and
|
||||
result = reprRec(a.(Assign).getTarget(0)) + " = " + reprRec(asn.getValue())
|
||||
)
|
||||
or
|
||||
result = "return " + reprRec(a.(Return).getValue())
|
||||
or
|
||||
result = reprRec(a.(ExprStmt).getValue())
|
||||
or
|
||||
exists(BoolExpr b, string op |
|
||||
a = b and
|
||||
(
|
||||
b.getOp() instanceof And and op = "and"
|
||||
or
|
||||
b.getOp() instanceof Or and op = "or"
|
||||
)
|
||||
|
|
||||
result = reprRec(b.getValue(0)) + " " + op + " " + reprRec(b.getValue(1))
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a comma separated pretty printed list of the arguments in `call`.
|
||||
*/
|
||||
string printArgs(Call call) {
|
||||
not exists(call.getAnArg()) and result = ""
|
||||
or
|
||||
result = strictconcat(int i | | reprBase(call.getArg(i)), ", ")
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a human-readable representation of the given `AstNode`.
|
||||
* Is only defined for `AstNode`s for which a human-readable representation can be created without using recursion.
|
||||
*/
|
||||
private string reprBase(AstNode a) {
|
||||
shouldPrint(a, _) and
|
||||
not isNotNeeded(a) and
|
||||
(
|
||||
result = a.(Name).getId()
|
||||
or
|
||||
result = a.(PlaceHolder).toString()
|
||||
or
|
||||
result = "class " + a.(ClassExpr).getName()
|
||||
or
|
||||
result = "class " + a.(Class).getName()
|
||||
or
|
||||
result = a.(StrConst).getText()
|
||||
or
|
||||
result = "yield " + a.(Yield).getValue()
|
||||
or
|
||||
result = "yield from " + a.(YieldFrom).getValue()
|
||||
or
|
||||
result = "*" + a.(Starred).getValue()
|
||||
or
|
||||
result = "`" + a.(Repr).getValue() + "`"
|
||||
or
|
||||
a instanceof Ellipsis and result = "..."
|
||||
or
|
||||
result = a.(Num).getText()
|
||||
or
|
||||
result = a.(NegativeIntegerLiteral).getValue().toString()
|
||||
or
|
||||
result = a.(NameConstant).toString()
|
||||
or
|
||||
result = "await " + a.(Await).getValue()
|
||||
or
|
||||
result = "function " + a.(FunctionExpr).getName() + "(...)"
|
||||
or
|
||||
result = "function " + a.(Function).getName() + "(...)"
|
||||
or
|
||||
a instanceof List and result = "[...]"
|
||||
or
|
||||
a instanceof Set and result = "{...}"
|
||||
or
|
||||
a instanceof Continue and result = "continue"
|
||||
or
|
||||
a instanceof Break and result = "break"
|
||||
or
|
||||
a instanceof Pass and result = "pass"
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/** Holds if `node` belongs to the output tree, and its property `key` has the given `value`. */
|
||||
query predicate nodes(PrintAstNode node, string key, string value) { value = node.getProperty(key) }
|
||||
|
||||
/**
|
||||
* Holds if `target` is a child of `source` in the AST, and property `key` of the edge has the
|
||||
* given `value`.
|
||||
*/
|
||||
query predicate edges(PrintAstNode source, PrintAstNode target, string key, string value) {
|
||||
exists(int childIndex |
|
||||
target = source.getChild(childIndex) and
|
||||
(
|
||||
key = "semmle.label" and value = source.getChildEdgeLabel(childIndex)
|
||||
or
|
||||
key = "semmle.order" and value = childIndex.toString()
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
/** Holds if property `key` of the graph has the given `value`. */
|
||||
query predicate graphProperties(string key, string value) {
|
||||
key = "semmle.graphKind" and value = "tree"
|
||||
}
|
||||
@@ -1,973 +0,0 @@
|
||||
/** Provides a class hierarchy corresponding to a parse tree of regular expressions. */
|
||||
|
||||
import python
|
||||
private import semmle.python.regex
|
||||
|
||||
/**
|
||||
* An element containing a regular expression term, that is, either
|
||||
* a string literal (parsed as a regular expression)
|
||||
* or another regular expression term.
|
||||
*/
|
||||
newtype TRegExpParent =
|
||||
/** A string literal used as a regular expression */
|
||||
TRegExpLiteral(Regex re) or
|
||||
/** A quantified term */
|
||||
TRegExpQuantifier(Regex re, int start, int end) { re.qualifiedItem(start, end, _, _) } or
|
||||
/** A sequence term */
|
||||
TRegExpSequence(Regex re, int start, int end) { re.sequence(start, end) } or
|
||||
/** An alternatio term */
|
||||
TRegExpAlt(Regex re, int start, int end) { re.alternation(start, end) } or
|
||||
/** A character class term */
|
||||
TRegExpCharacterClass(Regex re, int start, int end) { re.charSet(start, end) } or
|
||||
/** A character range term */
|
||||
TRegExpCharacterRange(Regex re, int start, int end) { re.charRange(_, start, _, _, end) } or
|
||||
/** A group term */
|
||||
TRegExpGroup(Regex re, int start, int end) { re.group(start, end) } or
|
||||
/** A special character */
|
||||
TRegExpSpecialChar(Regex re, int start, int end) { re.specialCharacter(start, end, _) } or
|
||||
/** A normal character */
|
||||
TRegExpNormalChar(Regex re, int start, int end) { re.normalCharacter(start, end) } or
|
||||
/** A back reference */
|
||||
TRegExpBackRef(Regex re, int start, int end) { re.backreference(start, end) }
|
||||
|
||||
/**
|
||||
* An element containing a regular expression term, that is, either
|
||||
* a string literal (parsed as a regular expression)
|
||||
* or another regular expression term.
|
||||
*/
|
||||
class RegExpParent extends TRegExpParent {
|
||||
string toString() { result = "RegExpParent" }
|
||||
|
||||
/** Gets the `i`th child term. */
|
||||
abstract RegExpTerm getChild(int i);
|
||||
|
||||
/** Gets a child term . */
|
||||
RegExpTerm getAChild() { result = getChild(_) }
|
||||
|
||||
/** Gets the number of child terms. */
|
||||
int getNumChild() { result = count(getAChild()) }
|
||||
|
||||
/** Gets the associated regex. */
|
||||
abstract Regex getRegex();
|
||||
}
|
||||
|
||||
/** A string literal used as a regular expression */
|
||||
class RegExpLiteral extends TRegExpLiteral, RegExpParent {
|
||||
Regex re;
|
||||
|
||||
RegExpLiteral() { this = TRegExpLiteral(re) }
|
||||
|
||||
override RegExpTerm getChild(int i) { i = 0 and result.getRegex() = re and result.isRootTerm() }
|
||||
|
||||
predicate isDotAll() { re.getAMode() = "DOTALL" }
|
||||
|
||||
override Regex getRegex() { result = re }
|
||||
|
||||
string getPrimaryQLClass() { result = "RegExpLiteral" }
|
||||
}
|
||||
|
||||
/**
|
||||
* A regular expression term, that is, a syntactic part of a regular expression.
|
||||
*/
|
||||
class RegExpTerm extends RegExpParent {
|
||||
Regex re;
|
||||
int start;
|
||||
int end;
|
||||
|
||||
RegExpTerm() {
|
||||
this = TRegExpAlt(re, start, end)
|
||||
or
|
||||
this = TRegExpBackRef(re, start, end)
|
||||
or
|
||||
this = TRegExpCharacterClass(re, start, end)
|
||||
or
|
||||
this = TRegExpCharacterRange(re, start, end)
|
||||
or
|
||||
this = TRegExpNormalChar(re, start, end)
|
||||
or
|
||||
this = TRegExpGroup(re, start, end)
|
||||
or
|
||||
this = TRegExpQuantifier(re, start, end)
|
||||
or
|
||||
this = TRegExpSequence(re, start, end) and
|
||||
exists(seqChild(re, start, end, 1)) // if a sequence does not have more than one element, it should be treated as that element instead.
|
||||
or
|
||||
this = TRegExpSpecialChar(re, start, end)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the outermost term of this regular expression.
|
||||
*/
|
||||
RegExpTerm getRootTerm() {
|
||||
this.isRootTerm() and result = this
|
||||
or
|
||||
result = getParent().(RegExpTerm).getRootTerm()
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if this term is part of a string literal
|
||||
* that is interpreted as a regular expression.
|
||||
*/
|
||||
predicate isUsedAsRegExp() { any() }
|
||||
|
||||
/**
|
||||
* Holds if this is the root term of a regular expression.
|
||||
*/
|
||||
predicate isRootTerm() { start = 0 and end = re.getText().length() }
|
||||
|
||||
override RegExpTerm getChild(int i) {
|
||||
result = this.(RegExpAlt).getChild(i)
|
||||
or
|
||||
result = this.(RegExpBackRef).getChild(i)
|
||||
or
|
||||
result = this.(RegExpCharacterClass).getChild(i)
|
||||
or
|
||||
result = this.(RegExpCharacterRange).getChild(i)
|
||||
or
|
||||
result = this.(RegExpNormalChar).getChild(i)
|
||||
or
|
||||
result = this.(RegExpGroup).getChild(i)
|
||||
or
|
||||
result = this.(RegExpQuantifier).getChild(i)
|
||||
or
|
||||
result = this.(RegExpSequence).getChild(i)
|
||||
or
|
||||
result = this.(RegExpSpecialChar).getChild(i)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the parent term of this regular expression term, or the
|
||||
* regular expression literal if this is the root term.
|
||||
*/
|
||||
RegExpParent getParent() { result.getAChild() = this }
|
||||
|
||||
override Regex getRegex() { result = re }
|
||||
|
||||
/** Gets the offset at which this term starts. */
|
||||
int getStart() { result = start }
|
||||
|
||||
/** Gets the offset at which this term ends. */
|
||||
int getEnd() { result = end }
|
||||
|
||||
override string toString() { result = re.getText().substring(start, end) }
|
||||
|
||||
/**
|
||||
* Gets the location of the surrounding regex, as locations inside the regex do not exist.
|
||||
* To get location information corresponding to the term inside the regex,
|
||||
* use `hasLocationInfo`.
|
||||
*/
|
||||
Location getLocation() { result = re.getLocation() }
|
||||
|
||||
/** Holds if this term is found at the specified location offsets. */
|
||||
predicate hasLocationInfo(
|
||||
string filepath, int startline, int startcolumn, int endline, int endcolumn
|
||||
) {
|
||||
exists(int re_start, int re_end |
|
||||
re.getLocation().hasLocationInfo(filepath, startline, re_start, endline, re_end) and
|
||||
startcolumn = re_start + start + 4 and
|
||||
endcolumn = re_start + end + 3
|
||||
)
|
||||
}
|
||||
|
||||
/** Gets the file in which this term is found. */
|
||||
File getFile() { result = this.getLocation().getFile() }
|
||||
|
||||
/** Gets the raw source text of this term. */
|
||||
string getRawValue() { result = this.toString() }
|
||||
|
||||
/** Gets the string literal in which this term is found. */
|
||||
RegExpLiteral getLiteral() { result = TRegExpLiteral(re) }
|
||||
|
||||
/** Gets the regular expression term that is matched (textually) before this one, if any. */
|
||||
RegExpTerm getPredecessor() {
|
||||
exists(RegExpTerm parent | parent = getParent() |
|
||||
result = parent.(RegExpSequence).previousElement(this)
|
||||
or
|
||||
not exists(parent.(RegExpSequence).previousElement(this)) and
|
||||
not parent instanceof RegExpSubPattern and
|
||||
result = parent.getPredecessor()
|
||||
)
|
||||
}
|
||||
|
||||
/** Gets the regular expression term that is matched (textually) after this one, if any. */
|
||||
RegExpTerm getSuccessor() {
|
||||
exists(RegExpTerm parent | parent = getParent() |
|
||||
result = parent.(RegExpSequence).nextElement(this)
|
||||
or
|
||||
not exists(parent.(RegExpSequence).nextElement(this)) and
|
||||
not parent instanceof RegExpSubPattern and
|
||||
result = parent.getSuccessor()
|
||||
)
|
||||
}
|
||||
|
||||
/** Gets the primary QL class for this term. */
|
||||
string getPrimaryQLClass() { result = "RegExpTerm" }
|
||||
}
|
||||
|
||||
/**
|
||||
* A quantified regular expression term.
|
||||
*
|
||||
* Example:
|
||||
*
|
||||
* ```
|
||||
* ((ECMA|Java)[sS]cript)*
|
||||
* ```
|
||||
*/
|
||||
class RegExpQuantifier extends RegExpTerm, TRegExpQuantifier {
|
||||
int part_end;
|
||||
boolean maybe_empty;
|
||||
boolean may_repeat_forever;
|
||||
|
||||
RegExpQuantifier() {
|
||||
this = TRegExpQuantifier(re, start, end) and
|
||||
re.qualifiedPart(start, part_end, end, maybe_empty, may_repeat_forever)
|
||||
}
|
||||
|
||||
override RegExpTerm getChild(int i) {
|
||||
i = 0 and
|
||||
result.getRegex() = re and
|
||||
result.getStart() = start and
|
||||
result.getEnd() = part_end
|
||||
}
|
||||
|
||||
predicate mayRepeatForever() { may_repeat_forever = true }
|
||||
|
||||
string getQualifier() { result = re.getText().substring(part_end, end) }
|
||||
|
||||
override string getPrimaryQLClass() { result = "RegExpQuantifier" }
|
||||
}
|
||||
|
||||
/**
|
||||
* A regular expression term that permits unlimited repetitions.
|
||||
*/
|
||||
class InfiniteRepetitionQuantifier extends RegExpQuantifier {
|
||||
InfiniteRepetitionQuantifier() { this.mayRepeatForever() }
|
||||
}
|
||||
|
||||
/**
|
||||
* A star-quantified term.
|
||||
*
|
||||
* Example:
|
||||
*
|
||||
* ```
|
||||
* \w*
|
||||
* ```
|
||||
*/
|
||||
class RegExpStar extends InfiniteRepetitionQuantifier {
|
||||
RegExpStar() { this.getQualifier().charAt(0) = "*" }
|
||||
|
||||
override string getPrimaryQLClass() { result = "RegExpStar" }
|
||||
}
|
||||
|
||||
/**
|
||||
* A plus-quantified term.
|
||||
*
|
||||
* Example:
|
||||
*
|
||||
* ```
|
||||
* \w+
|
||||
* ```
|
||||
*/
|
||||
class RegExpPlus extends InfiniteRepetitionQuantifier {
|
||||
RegExpPlus() { this.getQualifier().charAt(0) = "+" }
|
||||
|
||||
override string getPrimaryQLClass() { result = "RegExpPlus" }
|
||||
}
|
||||
|
||||
/**
|
||||
* An optional term.
|
||||
*
|
||||
* Example:
|
||||
*
|
||||
* ```
|
||||
* ;?
|
||||
* ```
|
||||
*/
|
||||
class RegExpOpt extends RegExpQuantifier {
|
||||
RegExpOpt() { this.getQualifier().charAt(0) = "?" }
|
||||
|
||||
override string getPrimaryQLClass() { result = "RegExpOpt" }
|
||||
}
|
||||
|
||||
/**
|
||||
* A range-quantified term
|
||||
*
|
||||
* Examples:
|
||||
*
|
||||
* ```
|
||||
* \w{2,4}
|
||||
* \w{2,}
|
||||
* \w{2}
|
||||
* ```
|
||||
*/
|
||||
class RegExpRange extends RegExpQuantifier {
|
||||
string upper;
|
||||
string lower;
|
||||
|
||||
RegExpRange() { re.multiples(part_end, end, lower, upper) }
|
||||
|
||||
string getUpper() { result = upper }
|
||||
|
||||
string getLower() { result = lower }
|
||||
|
||||
/**
|
||||
* Gets the upper bound of the range, if any.
|
||||
*
|
||||
* If there is no upper bound, any number of repetitions is allowed.
|
||||
* For a term of the form `r{lo}`, both the lower and the upper bound
|
||||
* are `lo`.
|
||||
*/
|
||||
int getUpperBound() { result = this.getUpper().toInt() }
|
||||
|
||||
/** Gets the lower bound of the range. */
|
||||
int getLowerBound() { result = this.getLower().toInt() }
|
||||
|
||||
override string getPrimaryQLClass() { result = "RegExpRange" }
|
||||
}
|
||||
|
||||
/**
|
||||
* A sequence term.
|
||||
*
|
||||
* Example:
|
||||
*
|
||||
* ```
|
||||
* (ECMA|Java)Script
|
||||
* ```
|
||||
*
|
||||
* This is a sequence with the elements `(ECMA|Java)` and `Script`.
|
||||
*/
|
||||
class RegExpSequence extends RegExpTerm, TRegExpSequence {
|
||||
RegExpSequence() {
|
||||
this = TRegExpSequence(re, start, end) and
|
||||
exists(seqChild(re, start, end, 1)) // if a sequence does not have more than one element, it should be treated as that element instead.
|
||||
}
|
||||
|
||||
override RegExpTerm getChild(int i) { result = seqChild(re, start, end, i) }
|
||||
|
||||
/** Gets the element preceding `element` in this sequence. */
|
||||
RegExpTerm previousElement(RegExpTerm element) { element = nextElement(result) }
|
||||
|
||||
/** Gets the element following `element` in this sequence. */
|
||||
RegExpTerm nextElement(RegExpTerm element) {
|
||||
exists(int i |
|
||||
element = this.getChild(i) and
|
||||
result = this.getChild(i + 1)
|
||||
)
|
||||
}
|
||||
|
||||
override string getPrimaryQLClass() { result = "RegExpSequence" }
|
||||
}
|
||||
|
||||
pragma[nomagic]
|
||||
private int seqChildEnd(Regex re, int start, int end, int i) {
|
||||
result = seqChild(re, start, end, i).getEnd()
|
||||
}
|
||||
|
||||
// moved out so we can use it in the charpred
|
||||
private RegExpTerm seqChild(Regex re, int start, int end, int i) {
|
||||
re.sequence(start, end) and
|
||||
(
|
||||
i = 0 and
|
||||
result.getRegex() = re and
|
||||
result.getStart() = start and
|
||||
exists(int itemEnd |
|
||||
re.item(start, itemEnd) and
|
||||
result.getEnd() = itemEnd
|
||||
)
|
||||
or
|
||||
i > 0 and
|
||||
result.getRegex() = re and
|
||||
exists(int itemStart | itemStart = seqChildEnd(re, start, end, i - 1) |
|
||||
result.getStart() = itemStart and
|
||||
re.item(itemStart, result.getEnd())
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* An alternative term, that is, a term of the form `a|b`.
|
||||
*
|
||||
* Example:
|
||||
*
|
||||
* ```
|
||||
* ECMA|Java
|
||||
* ```
|
||||
*/
|
||||
class RegExpAlt extends RegExpTerm, TRegExpAlt {
|
||||
RegExpAlt() { this = TRegExpAlt(re, start, end) }
|
||||
|
||||
override RegExpTerm getChild(int i) {
|
||||
i = 0 and
|
||||
result.getRegex() = re and
|
||||
result.getStart() = start and
|
||||
exists(int part_end |
|
||||
re.alternationOption(start, end, start, part_end) and
|
||||
result.getEnd() = part_end
|
||||
)
|
||||
or
|
||||
i > 0 and
|
||||
result.getRegex() = re and
|
||||
exists(int part_start |
|
||||
part_start = this.getChild(i - 1).getEnd() + 1 // allow for the |
|
||||
|
|
||||
result.getStart() = part_start and
|
||||
re.alternationOption(start, end, part_start, result.getEnd())
|
||||
)
|
||||
}
|
||||
|
||||
override string getPrimaryQLClass() { result = "RegExpAlt" }
|
||||
}
|
||||
|
||||
/**
|
||||
* An escaped regular expression term, that is, a regular expression
|
||||
* term starting with a backslash, which is not a backreference.
|
||||
*
|
||||
* Example:
|
||||
*
|
||||
* ```
|
||||
* \.
|
||||
* \w
|
||||
* ```
|
||||
*/
|
||||
class RegExpEscape extends RegExpNormalChar {
|
||||
RegExpEscape() { re.escapedCharacter(start, end) }
|
||||
|
||||
/**
|
||||
* Gets the name of the escaped; for example, `w` for `\w`.
|
||||
* TODO: Handle named escapes.
|
||||
*/
|
||||
override string getValue() {
|
||||
this.isIdentityEscape() and result = this.getUnescaped()
|
||||
or
|
||||
this.getUnescaped() = "n" and result = "\n"
|
||||
or
|
||||
this.getUnescaped() = "r" and result = "\r"
|
||||
or
|
||||
this.getUnescaped() = "t" and result = "\t"
|
||||
or
|
||||
// TODO: Find a way to include a formfeed character
|
||||
// this.getUnescaped() = "f" and result = ""
|
||||
// or
|
||||
isUnicode() and
|
||||
result = getUnicode()
|
||||
}
|
||||
|
||||
predicate isIdentityEscape() { not this.getUnescaped() in ["n", "r", "t", "f"] }
|
||||
|
||||
override string getPrimaryQLClass() { result = "RegExpEscape" }
|
||||
|
||||
string getUnescaped() { result = this.getText().suffix(1) }
|
||||
|
||||
/**
|
||||
* Gets the text for this escape. That is e.g. "\w".
|
||||
*/
|
||||
private string getText() { result = re.getText().substring(start, end) }
|
||||
|
||||
/**
|
||||
* Holds if this is a unicode escape.
|
||||
*/
|
||||
private predicate isUnicode() { getText().prefix(2) = ["\\u", "\\U"] }
|
||||
|
||||
/**
|
||||
* Gets the unicode char for this escape.
|
||||
* E.g. for `\u0061` this returns "a".
|
||||
*/
|
||||
private string getUnicode() {
|
||||
exists(int codepoint | codepoint = sum(this.getHexValueFromUnicode(_)) |
|
||||
result = codepoint.toUnicode()
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets int value for the `index`th char in the hex number of the unicode escape.
|
||||
* E.g. for `\u0061` and `index = 2` this returns 96 (the number `6` interpreted as hex).
|
||||
*/
|
||||
private int getHexValueFromUnicode(int index) {
|
||||
this.isUnicode() and
|
||||
exists(string hex, string char | hex = this.getText().suffix(2) |
|
||||
char = hex.charAt(index) and
|
||||
result = 16.pow(hex.length() - index - 1) * toHex(char)
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the hex number for the `hex` char.
|
||||
*/
|
||||
private int toHex(string hex) {
|
||||
hex = [0 .. 9].toString() and
|
||||
result = hex.toInt()
|
||||
or
|
||||
result = 10 and hex = ["a", "A"]
|
||||
or
|
||||
result = 11 and hex = ["b", "B"]
|
||||
or
|
||||
result = 12 and hex = ["c", "C"]
|
||||
or
|
||||
result = 13 and hex = ["d", "D"]
|
||||
or
|
||||
result = 14 and hex = ["e", "E"]
|
||||
or
|
||||
result = 15 and hex = ["f", "F"]
|
||||
}
|
||||
|
||||
/**
|
||||
* A character class escape in a regular expression.
|
||||
* That is, an escaped charachter that denotes multiple characters.
|
||||
*
|
||||
* Examples:
|
||||
*
|
||||
* ```
|
||||
* \w
|
||||
* \S
|
||||
* ```
|
||||
*/
|
||||
class RegExpCharacterClassEscape extends RegExpEscape {
|
||||
// string value;
|
||||
RegExpCharacterClassEscape() {
|
||||
// value = re.getText().substring(start + 1, end) and
|
||||
// value in ["d", "D", "s", "S", "w", "W"]
|
||||
this.getValue() in ["d", "D", "s", "S", "w", "W"]
|
||||
}
|
||||
|
||||
/** Gets the name of the character class; for example, `w` for `\w`. */
|
||||
// override string getValue() { result = value }
|
||||
override RegExpTerm getChild(int i) { none() }
|
||||
|
||||
override string getPrimaryQLClass() { result = "RegExpCharacterClassEscape" }
|
||||
}
|
||||
|
||||
/**
|
||||
* A character class in a regular expression.
|
||||
*
|
||||
* Examples:
|
||||
*
|
||||
* ```
|
||||
* [a-z_]
|
||||
* [^<>&]
|
||||
* ```
|
||||
*/
|
||||
class RegExpCharacterClass extends RegExpTerm, TRegExpCharacterClass {
|
||||
RegExpCharacterClass() { this = TRegExpCharacterClass(re, start, end) }
|
||||
|
||||
predicate isInverted() { re.getChar(start + 1) = "^" }
|
||||
|
||||
string getCharThing(int i) { result = re.getChar(i + start) }
|
||||
|
||||
predicate isUniversalClass() {
|
||||
// [^]
|
||||
isInverted() and not exists(getAChild())
|
||||
or
|
||||
// [\w\W] and similar
|
||||
not isInverted() and
|
||||
exists(string cce1, string cce2 |
|
||||
cce1 = getAChild().(RegExpCharacterClassEscape).getValue() and
|
||||
cce2 = getAChild().(RegExpCharacterClassEscape).getValue()
|
||||
|
|
||||
cce1 != cce2 and cce1.toLowerCase() = cce2.toLowerCase()
|
||||
)
|
||||
}
|
||||
|
||||
override RegExpTerm getChild(int i) {
|
||||
i = 0 and
|
||||
result.getRegex() = re and
|
||||
exists(int itemStart, int itemEnd |
|
||||
result.getStart() = itemStart and
|
||||
re.char_set_start(start, itemStart) and
|
||||
re.char_set_child(start, itemStart, itemEnd) and
|
||||
result.getEnd() = itemEnd
|
||||
)
|
||||
or
|
||||
i > 0 and
|
||||
result.getRegex() = re and
|
||||
exists(int itemStart | itemStart = this.getChild(i - 1).getEnd() |
|
||||
result.getStart() = itemStart and
|
||||
re.char_set_child(start, itemStart, result.getEnd())
|
||||
)
|
||||
}
|
||||
|
||||
override string getPrimaryQLClass() { result = "RegExpCharacterClass" }
|
||||
}
|
||||
|
||||
/**
|
||||
* A character range in a character class in a regular expression.
|
||||
*
|
||||
* Example:
|
||||
*
|
||||
* ```
|
||||
* a-z
|
||||
* ```
|
||||
*/
|
||||
class RegExpCharacterRange extends RegExpTerm, TRegExpCharacterRange {
|
||||
int lower_end;
|
||||
int upper_start;
|
||||
|
||||
RegExpCharacterRange() {
|
||||
this = TRegExpCharacterRange(re, start, end) and
|
||||
re.charRange(_, start, lower_end, upper_start, end)
|
||||
}
|
||||
|
||||
predicate isRange(string lo, string hi) {
|
||||
lo = re.getText().substring(start, lower_end) and
|
||||
hi = re.getText().substring(upper_start, end)
|
||||
}
|
||||
|
||||
override RegExpTerm getChild(int i) {
|
||||
i = 0 and
|
||||
result.getRegex() = re and
|
||||
result.getStart() = start and
|
||||
result.getEnd() = lower_end
|
||||
or
|
||||
i = 1 and
|
||||
result.getRegex() = re and
|
||||
result.getStart() = upper_start and
|
||||
result.getEnd() = end
|
||||
}
|
||||
|
||||
override string getPrimaryQLClass() { result = "RegExpCharacterRange" }
|
||||
}
|
||||
|
||||
/**
|
||||
* A normal character in a regular expression, that is, a character
|
||||
* without special meaning. This includes escaped characters.
|
||||
*
|
||||
* Examples:
|
||||
* ```
|
||||
* t
|
||||
* \t
|
||||
* ```
|
||||
*/
|
||||
class RegExpNormalChar extends RegExpTerm, TRegExpNormalChar {
|
||||
RegExpNormalChar() { this = TRegExpNormalChar(re, start, end) }
|
||||
|
||||
predicate isCharacter() { any() }
|
||||
|
||||
string getValue() { result = re.getText().substring(start, end) }
|
||||
|
||||
override RegExpTerm getChild(int i) { none() }
|
||||
|
||||
override string getPrimaryQLClass() { result = "RegExpNormalChar" }
|
||||
}
|
||||
|
||||
/**
|
||||
* A constant regular expression term, that is, a regular expression
|
||||
* term matching a single string. Currently, this will always be a single character.
|
||||
*
|
||||
* Example:
|
||||
*
|
||||
* ```
|
||||
* a
|
||||
* ```
|
||||
*/
|
||||
class RegExpConstant extends RegExpTerm {
|
||||
string value;
|
||||
|
||||
RegExpConstant() {
|
||||
this = TRegExpNormalChar(re, start, end) and
|
||||
not this instanceof RegExpCharacterClassEscape and
|
||||
// exclude chars in qualifiers
|
||||
// TODO: push this into regex library
|
||||
not exists(int qstart, int qend | re.qualifiedPart(_, qstart, qend, _, _) |
|
||||
qstart <= start and end <= qend
|
||||
) and
|
||||
value = this.(RegExpNormalChar).getValue()
|
||||
// This will never hold
|
||||
// or
|
||||
// this = TRegExpSpecialChar(re, start, end) and
|
||||
// re.inCharSet(start) and
|
||||
// value = this.(RegExpSpecialChar).getChar()
|
||||
}
|
||||
|
||||
predicate isCharacter() { any() }
|
||||
|
||||
string getValue() { result = value }
|
||||
|
||||
override RegExpTerm getChild(int i) { none() }
|
||||
|
||||
override string getPrimaryQLClass() { result = "RegExpConstant" }
|
||||
}
|
||||
|
||||
/**
|
||||
* A grouped regular expression.
|
||||
*
|
||||
* Examples:
|
||||
*
|
||||
* ```
|
||||
* (ECMA|Java)
|
||||
* (?:ECMA|Java)
|
||||
* (?<quote>['"])
|
||||
* ```
|
||||
*/
|
||||
class RegExpGroup extends RegExpTerm, TRegExpGroup {
|
||||
RegExpGroup() { this = TRegExpGroup(re, start, end) }
|
||||
|
||||
/**
|
||||
* Gets the index of this capture group within the enclosing regular
|
||||
* expression literal.
|
||||
*
|
||||
* For example, in the regular expression `/((a?).)(?:b)/`, the
|
||||
* group `((a?).)` has index 1, the group `(a?)` nested inside it
|
||||
* has index 2, and the group `(?:b)` has no index, since it is
|
||||
* not a capture group.
|
||||
*/
|
||||
int getNumber() { result = re.getGroupNumber(start, end) }
|
||||
|
||||
/** Holds if this is a named capture group. */
|
||||
predicate isNamed() { exists(this.getName()) }
|
||||
|
||||
/** Gets the name of this capture group, if any. */
|
||||
string getName() { result = re.getGroupName(start, end) }
|
||||
|
||||
predicate isCharacter() { any() }
|
||||
|
||||
string getValue() { result = re.getText().substring(start, end) }
|
||||
|
||||
override RegExpTerm getChild(int i) {
|
||||
result.getRegex() = re and
|
||||
i = 0 and
|
||||
re.groupContents(start, end, result.getStart(), result.getEnd())
|
||||
}
|
||||
|
||||
override string getPrimaryQLClass() { result = "RegExpGroup" }
|
||||
}
|
||||
|
||||
/**
|
||||
* A special character in a regular expression.
|
||||
*
|
||||
* Examples:
|
||||
* ```
|
||||
* ^
|
||||
* $
|
||||
* .
|
||||
* ```
|
||||
*/
|
||||
class RegExpSpecialChar extends RegExpTerm, TRegExpSpecialChar {
|
||||
string char;
|
||||
|
||||
RegExpSpecialChar() {
|
||||
this = TRegExpSpecialChar(re, start, end) and
|
||||
re.specialCharacter(start, end, char)
|
||||
}
|
||||
|
||||
predicate isCharacter() { any() }
|
||||
|
||||
string getChar() { result = char }
|
||||
|
||||
override RegExpTerm getChild(int i) { none() }
|
||||
|
||||
override string getPrimaryQLClass() { result = "RegExpSpecialChar" }
|
||||
}
|
||||
|
||||
/**
|
||||
* A dot regular expression.
|
||||
*
|
||||
* Example:
|
||||
*
|
||||
* ```
|
||||
* .
|
||||
* ```
|
||||
*/
|
||||
class RegExpDot extends RegExpSpecialChar {
|
||||
RegExpDot() { this.getChar() = "." }
|
||||
|
||||
override string getPrimaryQLClass() { result = "RegExpDot" }
|
||||
}
|
||||
|
||||
/**
|
||||
* A dollar assertion `$` matching the end of a line.
|
||||
*
|
||||
* Example:
|
||||
*
|
||||
* ```
|
||||
* $
|
||||
* ```
|
||||
*/
|
||||
class RegExpDollar extends RegExpSpecialChar {
|
||||
RegExpDollar() { this.getChar() = "$" }
|
||||
|
||||
override string getPrimaryQLClass() { result = "RegExpDollar" }
|
||||
}
|
||||
|
||||
/**
|
||||
* A caret assertion `^` matching the beginning of a line.
|
||||
*
|
||||
* Example:
|
||||
*
|
||||
* ```
|
||||
* ^
|
||||
* ```
|
||||
*/
|
||||
class RegExpCaret extends RegExpSpecialChar {
|
||||
RegExpCaret() { this.getChar() = "^" }
|
||||
|
||||
override string getPrimaryQLClass() { result = "RegExpCaret" }
|
||||
}
|
||||
|
||||
/**
|
||||
* A zero-width match, that is, either an empty group or an assertion.
|
||||
*
|
||||
* Examples:
|
||||
* ```
|
||||
* ()
|
||||
* (?=\w)
|
||||
* ```
|
||||
*/
|
||||
class RegExpZeroWidthMatch extends RegExpGroup {
|
||||
RegExpZeroWidthMatch() { re.zeroWidthMatch(start, end) }
|
||||
|
||||
override predicate isCharacter() { any() }
|
||||
|
||||
override RegExpTerm getChild(int i) { none() }
|
||||
|
||||
override string getPrimaryQLClass() { result = "RegExpZeroWidthMatch" }
|
||||
}
|
||||
|
||||
/**
|
||||
* A zero-width lookahead or lookbehind assertion.
|
||||
*
|
||||
* Examples:
|
||||
*
|
||||
* ```
|
||||
* (?=\w)
|
||||
* (?!\n)
|
||||
* (?<=\.)
|
||||
* (?<!\\)
|
||||
* ```
|
||||
*/
|
||||
class RegExpSubPattern extends RegExpZeroWidthMatch {
|
||||
RegExpSubPattern() { not re.emptyGroup(start, end) }
|
||||
|
||||
/** Gets the lookahead term. */
|
||||
RegExpTerm getOperand() {
|
||||
exists(int in_start, int in_end | re.groupContents(start, end, in_start, in_end) |
|
||||
result.getRegex() = re and
|
||||
result.getStart() = in_start and
|
||||
result.getEnd() = in_end
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A zero-width lookahead assertion.
|
||||
*
|
||||
* Examples:
|
||||
*
|
||||
* ```
|
||||
* (?=\w)
|
||||
* (?!\n)
|
||||
* ```
|
||||
*/
|
||||
abstract class RegExpLookahead extends RegExpSubPattern { }
|
||||
|
||||
/**
|
||||
* A positive-lookahead assertion.
|
||||
*
|
||||
* Examples:
|
||||
*
|
||||
* ```
|
||||
* (?=\w)
|
||||
* ```
|
||||
*/
|
||||
class RegExpPositiveLookahead extends RegExpLookahead {
|
||||
RegExpPositiveLookahead() { re.positiveLookaheadAssertionGroup(start, end) }
|
||||
|
||||
override string getPrimaryQLClass() { result = "RegExpPositiveLookahead" }
|
||||
}
|
||||
|
||||
/**
|
||||
* A negative-lookahead assertion.
|
||||
*
|
||||
* Examples:
|
||||
*
|
||||
* ```
|
||||
* (?!\n)
|
||||
* ```
|
||||
*/
|
||||
class RegExpNegativeLookahead extends RegExpLookahead {
|
||||
RegExpNegativeLookahead() { re.negativeLookaheadAssertionGroup(start, end) }
|
||||
|
||||
override string getPrimaryQLClass() { result = "RegExpNegativeLookahead" }
|
||||
}
|
||||
|
||||
/**
|
||||
* A zero-width lookbehind assertion.
|
||||
*
|
||||
* Examples:
|
||||
*
|
||||
* ```
|
||||
* (?<=\.)
|
||||
* (?<!\\)
|
||||
* ```
|
||||
*/
|
||||
abstract class RegExpLookbehind extends RegExpSubPattern { }
|
||||
|
||||
/**
|
||||
* A positive-lookbehind assertion.
|
||||
*
|
||||
* Examples:
|
||||
*
|
||||
* ```
|
||||
* (?<=\.)
|
||||
* ```
|
||||
*/
|
||||
class RegExpPositiveLookbehind extends RegExpLookbehind {
|
||||
RegExpPositiveLookbehind() { re.positiveLookbehindAssertionGroup(start, end) }
|
||||
|
||||
override string getPrimaryQLClass() { result = "RegExpPositiveLookbehind" }
|
||||
}
|
||||
|
||||
/**
|
||||
* A negative-lookbehind assertion.
|
||||
*
|
||||
* Examples:
|
||||
*
|
||||
* ```
|
||||
* (?<!\\)
|
||||
* ```
|
||||
*/
|
||||
class RegExpNegativeLookbehind extends RegExpLookbehind {
|
||||
RegExpNegativeLookbehind() { re.negativeLookbehindAssertionGroup(start, end) }
|
||||
|
||||
override string getPrimaryQLClass() { result = "RegExpNegativeLookbehind" }
|
||||
}
|
||||
|
||||
/**
|
||||
* A back reference, that is, a term of the form `\i` or `\k<name>`
|
||||
* in a regular expression.
|
||||
*
|
||||
* Examples:
|
||||
*
|
||||
* ```
|
||||
* \1
|
||||
* (?P=quote)
|
||||
* ```
|
||||
*/
|
||||
class RegExpBackRef extends RegExpTerm, TRegExpBackRef {
|
||||
RegExpBackRef() { this = TRegExpBackRef(re, start, end) }
|
||||
|
||||
/**
|
||||
* Gets the number of the capture group this back reference refers to, if any.
|
||||
*/
|
||||
int getNumber() { result = re.getBackrefNumber(start, end) }
|
||||
|
||||
/**
|
||||
* Gets the name of the capture group this back reference refers to, if any.
|
||||
*/
|
||||
string getName() { result = re.getBackrefName(start, end) }
|
||||
|
||||
/** Gets the capture group this back reference refers to. */
|
||||
RegExpGroup getGroup() {
|
||||
result.getLiteral() = this.getLiteral() and
|
||||
(
|
||||
result.getNumber() = this.getNumber() or
|
||||
result.getName() = this.getName()
|
||||
)
|
||||
}
|
||||
|
||||
override RegExpTerm getChild(int i) { none() }
|
||||
|
||||
override string getPrimaryQLClass() { result = "RegExpBackRef" }
|
||||
}
|
||||
|
||||
/** Gets the parse tree resulting from parsing `re`, if such has been constructed. */
|
||||
RegExpTerm getParsedRegExp(StrConst re) { result.getRegex() = re and result.isRootTerm() }
|
||||
@@ -1,222 +0,0 @@
|
||||
/** SSA library */
|
||||
|
||||
import python
|
||||
|
||||
/**
|
||||
* A single static assignment variable.
|
||||
* An SSA variable is a variable which is only assigned once (statically).
|
||||
* SSA variables can be defined as normal variables or by a phi node which can occur at joins in the flow graph.
|
||||
* Definitions without uses do not have a SSA variable.
|
||||
*/
|
||||
class SsaVariable extends @py_ssa_var {
|
||||
SsaVariable() { py_ssa_var(this, _) }
|
||||
|
||||
/** Gets the source variable */
|
||||
Variable getVariable() { py_ssa_var(this, result) }
|
||||
|
||||
/** Gets a use of this variable */
|
||||
ControlFlowNode getAUse() { py_ssa_use(result, this) }
|
||||
|
||||
/** Gets the definition (which may be a deletion) of this SSA variable */
|
||||
ControlFlowNode getDefinition() { py_ssa_defn(this, result) }
|
||||
|
||||
/**
|
||||
* Gets an argument of the phi function defining this variable.
|
||||
* This predicate uses the raw SSA form produced by the extractor.
|
||||
* In general, you should use `getAPrunedPhiInput()` instead.
|
||||
*/
|
||||
SsaVariable getAPhiInput() { py_ssa_phi(this, result) }
|
||||
|
||||
/**
|
||||
* Gets the edge(s) (result->this.getDefinition()) on which the SSA variable 'input' defines this SSA variable.
|
||||
* For each incoming edge `X->B`, where `B` is the basic block containing this phi-node, only one of the input SSA variables
|
||||
* for this phi-node is live. This predicate returns the predecessor block such that the variable 'input'
|
||||
* is the live variable on the edge result->B.
|
||||
*/
|
||||
BasicBlock getPredecessorBlockForPhiArgument(SsaVariable input) {
|
||||
input = this.getAPhiInput() and
|
||||
result = this.getAPredecessorBlockForPhi() and
|
||||
input.getDefinition().getBasicBlock().dominates(result) and
|
||||
/*
|
||||
* Beware the case where an SSA variable that is an input on one edge dominates another edge.
|
||||
* Consider (in SSA form):
|
||||
* x0 = 0
|
||||
* if cond:
|
||||
* x1 = 1
|
||||
* x2 = phi(x0, x1)
|
||||
* use(x2)
|
||||
*
|
||||
* The definition of x0 dominates the exit from the block x1=1, even though it does not reach it.
|
||||
* Hence we need to check that no other definition dominates the edge and actually reaches it.
|
||||
* Note that if a dominates c and b dominates c, then either a dominates b or vice-versa.
|
||||
*/
|
||||
|
||||
not exists(SsaVariable other, BasicBlock other_def |
|
||||
not other = input and
|
||||
other = this.getAPhiInput() and
|
||||
other_def = other.getDefinition().getBasicBlock()
|
||||
|
|
||||
other_def.dominates(result) and
|
||||
input.getDefinition().getBasicBlock().strictlyDominates(other_def)
|
||||
)
|
||||
}
|
||||
|
||||
/** Gets an argument of the phi function defining this variable, pruned of unlikely edges. */
|
||||
SsaVariable getAPrunedPhiInput() {
|
||||
result = this.getAPhiInput() and
|
||||
exists(BasicBlock incoming | incoming = this.getPredecessorBlockForPhiArgument(result) |
|
||||
not incoming.getLastNode().(RaisingNode).unlikelySuccessor(this.getDefinition())
|
||||
)
|
||||
}
|
||||
|
||||
/** Gets a variable that ultimately defines this variable and is not itself defined by another variable */
|
||||
SsaVariable getAnUltimateDefinition() {
|
||||
result = this and not exists(this.getAPhiInput())
|
||||
or
|
||||
result = this.getAPhiInput().getAnUltimateDefinition()
|
||||
}
|
||||
|
||||
/** Gets a textual representation of this element. */
|
||||
string toString() { result = "SSA Variable " + this.getId() }
|
||||
|
||||
Location getLocation() { result = this.getDefinition().getLocation() }
|
||||
|
||||
/** Gets the id (name) of this variable */
|
||||
string getId() { result = this.getVariable().getId() }
|
||||
|
||||
/** Gets the incoming edges for a Phi node. */
|
||||
private BasicBlock getAPredecessorBlockForPhi() {
|
||||
exists(getAPhiInput()) and
|
||||
result.getASuccessor() = this.getDefinition().getBasicBlock()
|
||||
}
|
||||
|
||||
/** Gets the incoming edges for a Phi node, pruned of unlikely edges. */
|
||||
private BasicBlock getAPrunedPredecessorBlockForPhi() {
|
||||
result = this.getAPredecessorBlockForPhi() and
|
||||
not result.unlikelySuccessor(this.getDefinition().getBasicBlock())
|
||||
}
|
||||
|
||||
/** Whether it is possible to reach a use of this variable without passing a definition */
|
||||
predicate reachableWithoutDefinition() {
|
||||
not exists(this.getDefinition()) and not py_ssa_phi(this, _)
|
||||
or
|
||||
exists(SsaVariable var | var = this.getAPhiInput() | var.reachableWithoutDefinition())
|
||||
or
|
||||
/*
|
||||
* For phi-nodes, there must be a corresponding phi-input for each control-flow
|
||||
* predecessor. Otherwise, the variable will be undefined on that incoming edge.
|
||||
* WARNING: the same phi-input may cover multiple predecessors, so this check
|
||||
* cannot be done by counting.
|
||||
*/
|
||||
|
||||
exists(BasicBlock incoming |
|
||||
incoming = this.getAPredecessorBlockForPhi() and
|
||||
not this.getAPhiInput().getDefinition().getBasicBlock().dominates(incoming)
|
||||
)
|
||||
}
|
||||
|
||||
/** Whether this variable may be undefined */
|
||||
predicate maybeUndefined() {
|
||||
not exists(this.getDefinition()) and not py_ssa_phi(this, _) and not this.implicitlyDefined()
|
||||
or
|
||||
this.getDefinition().isDelete()
|
||||
or
|
||||
exists(SsaVariable var | var = this.getAPrunedPhiInput() | var.maybeUndefined())
|
||||
or
|
||||
/*
|
||||
* For phi-nodes, there must be a corresponding phi-input for each control-flow
|
||||
* predecessor. Otherwise, the variable will be undefined on that incoming edge.
|
||||
* WARNING: the same phi-input may cover multiple predecessors, so this check
|
||||
* cannot be done by counting.
|
||||
*/
|
||||
|
||||
exists(BasicBlock incoming |
|
||||
reaches_end(incoming) and
|
||||
incoming = this.getAPrunedPredecessorBlockForPhi() and
|
||||
not this.getAPhiInput().getDefinition().getBasicBlock().dominates(incoming)
|
||||
)
|
||||
}
|
||||
|
||||
private predicate implicitlyDefined() {
|
||||
not exists(this.getDefinition()) and
|
||||
not py_ssa_phi(this, _) and
|
||||
exists(GlobalVariable var | this.getVariable() = var |
|
||||
globallyDefinedName(var.getId())
|
||||
or
|
||||
var.getId() = "__path__" and var.getScope().(Module).isPackageInit()
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the global variable that is accessed if this local is undefined.
|
||||
* Only applies to local variables in class scopes.
|
||||
*/
|
||||
GlobalVariable getFallbackGlobal() {
|
||||
exists(LocalVariable local, Class cls | this.getVariable() = local |
|
||||
local.getScope() = cls and
|
||||
result.getScope() = cls.getScope() and
|
||||
result.getId() = local.getId() and
|
||||
not exists(this.getDefinition())
|
||||
)
|
||||
}
|
||||
|
||||
/*
|
||||
* Whether this SSA variable is the first parameter of a method
|
||||
* (regardless of whether it is actually called self or not)
|
||||
*/
|
||||
|
||||
predicate isSelf() {
|
||||
exists(Function func |
|
||||
func.isMethod() and
|
||||
this.getDefinition().getNode() = func.getArg(0)
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
private predicate reaches_end(BasicBlock b) {
|
||||
not exits_early(b) and
|
||||
(
|
||||
/* Entry point */
|
||||
not exists(BasicBlock prev | prev.getASuccessor() = b)
|
||||
or
|
||||
exists(BasicBlock prev | prev.getASuccessor() = b | reaches_end(prev))
|
||||
)
|
||||
}
|
||||
|
||||
private predicate exits_early(BasicBlock b) {
|
||||
exists(FunctionObject f |
|
||||
f.neverReturns() and
|
||||
f.getACall().getBasicBlock() = b
|
||||
)
|
||||
}
|
||||
|
||||
private predicate gettext_installed() {
|
||||
// Good enough (and fast) approximation
|
||||
exists(Module m | m.getName() = "gettext")
|
||||
}
|
||||
|
||||
private predicate builtin_constant(string name) {
|
||||
exists(Object::builtin(name))
|
||||
or
|
||||
name = "WindowsError"
|
||||
or
|
||||
name = "_" and gettext_installed()
|
||||
}
|
||||
|
||||
private predicate auto_name(string name) {
|
||||
name = "__file__" or name = "__builtins__" or name = "__name__"
|
||||
}
|
||||
|
||||
/** Whether this name is (almost) always defined, ie. it is a builtin or VM defined name */
|
||||
predicate globallyDefinedName(string name) { builtin_constant(name) or auto_name(name) }
|
||||
|
||||
/** An SSA variable that is backed by a global variable */
|
||||
class GlobalSsaVariable extends EssaVariable {
|
||||
GlobalSsaVariable() { this.getSourceVariable() instanceof GlobalVariable }
|
||||
|
||||
GlobalVariable getVariable() { result = this.getSourceVariable() }
|
||||
|
||||
string getId() { result = this.getVariable().getId() }
|
||||
|
||||
override string toString() { result = "GSSA Variable " + this.getId() }
|
||||
}
|
||||
@@ -1,153 +0,0 @@
|
||||
import python
|
||||
|
||||
/**
|
||||
* A Scope. A scope is the lexical extent over which all identifiers with the same name refer to the same variable.
|
||||
* Modules, Classes and Functions are all Scopes. There are no other scopes.
|
||||
* The scopes for expressions that create new scopes, lambdas and comprehensions, are handled by creating an anonymous Function.
|
||||
*/
|
||||
class Scope extends Scope_ {
|
||||
Module getEnclosingModule() { result = this.getEnclosingScope().getEnclosingModule() }
|
||||
|
||||
/**
|
||||
* This method will be deprecated in the next release. Please use `getEnclosingScope()` instead.
|
||||
* The reason for this is to avoid confusion around use of `x.getScope+()` where `x` might be an
|
||||
* `AstNode` or a `Variable`. Forcing the users to write `x.getScope().getEnclosingScope*()` ensures that
|
||||
* the apparent semantics and the actual semantics coincide.
|
||||
* [ Gets the scope enclosing this scope (modules have no enclosing scope) ]
|
||||
*/
|
||||
Scope getScope() { none() }
|
||||
|
||||
/** Gets the scope enclosing this scope (modules have no enclosing scope) */
|
||||
Scope getEnclosingScope() { none() }
|
||||
|
||||
/** Gets the statements forming the body of this scope */
|
||||
StmtList getBody() { none() }
|
||||
|
||||
/** Gets the nth statement of this scope */
|
||||
Stmt getStmt(int n) { none() }
|
||||
|
||||
/** Gets a top-level statement in this scope */
|
||||
Stmt getAStmt() { none() }
|
||||
|
||||
Location getLocation() { none() }
|
||||
|
||||
/** Gets the name of this scope */
|
||||
string getName() { py_strs(result, this, 0) }
|
||||
|
||||
/** Gets the docstring for this scope */
|
||||
StrConst getDocString() { result = this.getStmt(0).(ExprStmt).getValue() }
|
||||
|
||||
/** Gets the entry point into this Scope's control flow graph */
|
||||
ControlFlowNode getEntryNode() { py_scope_flow(result, this, -1) }
|
||||
|
||||
/** Gets the non-explicit exit from this Scope's control flow graph */
|
||||
ControlFlowNode getFallthroughNode() { py_scope_flow(result, this, 0) }
|
||||
|
||||
/** Gets the exit of this scope following from a return statement */
|
||||
ControlFlowNode getReturnNode() { py_scope_flow(result, this, 2) }
|
||||
|
||||
/** Gets an exit from this Scope's control flow graph */
|
||||
ControlFlowNode getAnExitNode() { exists(int i | py_scope_flow(result, this, i) and i >= 0) }
|
||||
|
||||
/**
|
||||
* Gets an exit from this Scope's control flow graph,
|
||||
* that does not result from an exception
|
||||
*/
|
||||
ControlFlowNode getANormalExit() {
|
||||
result = this.getFallthroughNode()
|
||||
or
|
||||
result = this.getReturnNode()
|
||||
}
|
||||
|
||||
/** Holds if this a top-level (non-nested) class or function */
|
||||
predicate isTopLevel() { this.getEnclosingModule() = this.getEnclosingScope() }
|
||||
|
||||
/** Holds if this scope is deemed to be public */
|
||||
predicate isPublic() {
|
||||
/* Not inside a function */
|
||||
not this.getEnclosingScope() instanceof Function and
|
||||
/* Not implicitly private */
|
||||
this.getName().charAt(0) != "_" and
|
||||
(
|
||||
this instanceof Module
|
||||
or
|
||||
exists(Module m | m = this.getEnclosingScope() and m.isPublic() |
|
||||
/* If the module has an __all__, is this in it */
|
||||
not exists(m.getAnExport())
|
||||
or
|
||||
m.getAnExport() = this.getName()
|
||||
)
|
||||
or
|
||||
exists(Class c | c = this.getEnclosingScope() |
|
||||
this instanceof Function and
|
||||
c.isPublic()
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
predicate contains(AstNode a) {
|
||||
this.getBody().contains(a)
|
||||
or
|
||||
exists(Scope inner | inner.getEnclosingScope() = this | inner.contains(a))
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if this scope can be expected to execute before `other`.
|
||||
* Modules precede functions and methods in those modules
|
||||
* `__init__` precedes other methods. `__enter__` precedes `__exit__`.
|
||||
* NOTE that this is context-insensitive, so a module "precedes" a function
|
||||
* in that module, even if that function is called from the module scope.
|
||||
*/
|
||||
predicate precedes(Scope other) {
|
||||
exists(Function f, string name | f = other and name = f.getName() |
|
||||
if f.isMethod()
|
||||
then
|
||||
// The __init__ method is preceded by the enclosing module
|
||||
this = f.getEnclosingModule() and name = "__init__"
|
||||
or
|
||||
exists(Class c, string pred_name |
|
||||
// __init__ -> __enter__ -> __exit__
|
||||
// __init__ -> other-methods
|
||||
f.getScope() = c and
|
||||
(
|
||||
pred_name = "__init__" and not name = "__init__" and not name = "__exit__"
|
||||
or
|
||||
pred_name = "__enter__" and name = "__exit__"
|
||||
)
|
||||
|
|
||||
this.getScope() = c and
|
||||
pred_name = this.(Function).getName()
|
||||
or
|
||||
not exists(Function pre_func |
|
||||
pre_func.getName() = pred_name and
|
||||
pre_func.getScope() = c
|
||||
) and
|
||||
this = other.getEnclosingModule()
|
||||
)
|
||||
else
|
||||
// Normal functions are preceded by the enclosing module
|
||||
this = f.getEnclosingModule()
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the evaluation scope for code in this (lexical) scope.
|
||||
* This is usually the scope itself, but may be an enclosing scope.
|
||||
* Notably, for list comprehensions in Python 2.
|
||||
*/
|
||||
Scope getEvaluatingScope() { result = this }
|
||||
|
||||
/**
|
||||
* Holds if this scope is in the source archive,
|
||||
* that is it is part of the code specified, not library code
|
||||
*/
|
||||
predicate inSource() { exists(this.getEnclosingModule().getFile().getRelativePath()) }
|
||||
|
||||
Stmt getLastStatement() { result = this.getBody().getLastItem().getLastStatement() }
|
||||
|
||||
/** Whether this contains `inner` syntactically and `inner` has the same scope as `this` */
|
||||
predicate containsInScope(AstNode inner) {
|
||||
this.getBody().contains(inner) and
|
||||
this = inner.getScope()
|
||||
}
|
||||
}
|
||||
@@ -1,90 +0,0 @@
|
||||
/**
|
||||
* Utilities to support queries about instance attribute accesses of
|
||||
* the form `self.attr`.
|
||||
*/
|
||||
|
||||
import python
|
||||
private import semmle.python.pointsto.Filters
|
||||
|
||||
/**
|
||||
* An attribute access where the left hand side of the attribute expression
|
||||
* is `self`.
|
||||
*/
|
||||
class SelfAttribute extends Attribute {
|
||||
SelfAttribute() { self_attribute(this, _) }
|
||||
|
||||
Class getClass() { self_attribute(this, result) }
|
||||
}
|
||||
|
||||
/** Whether variable 'self' is the self variable in method 'method' */
|
||||
private predicate self_variable(Function method, Variable self) {
|
||||
self.isParameter() and
|
||||
method.isMethod() and
|
||||
method.getArg(0).asName() = self.getAnAccess()
|
||||
}
|
||||
|
||||
/** Whether attribute is an access of the form `self.attr` in the body of the class 'cls' */
|
||||
private predicate self_attribute(Attribute attr, Class cls) {
|
||||
exists(Function f, Variable self | self_variable(f, self) |
|
||||
self.getAnAccess() = attr.getObject() and
|
||||
cls = f.getScope+()
|
||||
)
|
||||
}
|
||||
|
||||
/** Helper class for UndefinedClassAttribute.ql & MaybeUndefinedClassAttribute.ql */
|
||||
class SelfAttributeRead extends SelfAttribute {
|
||||
SelfAttributeRead() {
|
||||
this.getCtx() instanceof Load and
|
||||
// Be stricter for loads.
|
||||
// We want to generous as to what is defined (i.e. stores),
|
||||
// but strict as to what needs to be defined (i.e. loads).
|
||||
exists(ClassObject cls, FunctionObject func | cls.declaredAttribute(_) = func |
|
||||
func.getFunction() = this.getScope() and
|
||||
cls.getPyClass() = this.getClass()
|
||||
)
|
||||
}
|
||||
|
||||
predicate guardedByHasattr() {
|
||||
exists(Variable var, ControlFlowNode n |
|
||||
var.getAUse() = this.getObject().getAFlowNode() and
|
||||
hasattr(n, var.getAUse(), this.getName()) and
|
||||
n.strictlyDominates(this.getAFlowNode())
|
||||
)
|
||||
}
|
||||
|
||||
pragma[noinline]
|
||||
predicate locallyDefined() {
|
||||
exists(SelfAttributeStore store |
|
||||
this.getName() = store.getName() and
|
||||
this.getScope() = store.getScope()
|
||||
|
|
||||
store.getAFlowNode().strictlyDominates(this.getAFlowNode())
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
class SelfAttributeStore extends SelfAttribute {
|
||||
SelfAttributeStore() { this.getCtx() instanceof Store }
|
||||
|
||||
Expr getAssignedValue() { exists(Assign a | a.getATarget() = this | result = a.getValue()) }
|
||||
}
|
||||
|
||||
private predicate attr_assigned_in_method_arg_n(FunctionObject method, string name, int n) {
|
||||
exists(SsaVariable param |
|
||||
method.getFunction().getArg(n).asName() = param.getDefinition().getNode()
|
||||
|
|
||||
exists(AttrNode attr |
|
||||
attr.getObject(name) = param.getAUse() and
|
||||
attr.isStore()
|
||||
)
|
||||
or
|
||||
exists(CallNode call, FunctionObject callee, int m |
|
||||
callee.getArgumentForCall(call, m) = param.getAUse() and
|
||||
attr_assigned_in_method_arg_n(callee, name, m)
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
predicate attribute_assigned_in_method(FunctionObject method, string name) {
|
||||
attr_assigned_in_method_arg_n(method, name, 0)
|
||||
}
|
||||
@@ -1,117 +0,0 @@
|
||||
/**
|
||||
* Provides support for special methods.
|
||||
* This is done in two steps:
|
||||
* - A subset of `ControlFlowNode`s are labelled as potentially corresponding to
|
||||
* a special method call (by being an instance of `SpecialMethod::Potential`).
|
||||
* - A subset of the potential special method calls are labelled as being actual
|
||||
* special method calls (`SpecialMethodCallNode`) if the appropriate method is defined.
|
||||
* Extend `SpecialMethod::Potential` to capture more cases.
|
||||
*/
|
||||
|
||||
import python
|
||||
|
||||
/** A control flow node which might correspond to a special method call. */
|
||||
class PotentialSpecialMethodCallNode extends ControlFlowNode {
|
||||
PotentialSpecialMethodCallNode() { this instanceof SpecialMethod::Potential }
|
||||
}
|
||||
|
||||
/**
|
||||
* Machinery for detecting special method calls.
|
||||
* Extend `SpecialMethod::Potential` to capture more cases.
|
||||
*/
|
||||
module SpecialMethod {
|
||||
/** A control flow node which might correspond to a special method call. */
|
||||
abstract class Potential extends ControlFlowNode {
|
||||
/** Gets the name of the method that would be called. */
|
||||
abstract string getSpecialMethodName();
|
||||
|
||||
/** Gets the control flow node that would be passed as the specified argument. */
|
||||
abstract ControlFlowNode getArg(int n);
|
||||
|
||||
/**
|
||||
* Gets the control flow node corresponding to the instance
|
||||
* that would define the special method.
|
||||
*/
|
||||
ControlFlowNode getSelf() { result = this.getArg(0) }
|
||||
}
|
||||
|
||||
/** A binary expression node that might correspond to a special method call. */
|
||||
class SpecialBinOp extends Potential, BinaryExprNode {
|
||||
Operator operator;
|
||||
|
||||
SpecialBinOp() { this.getOp() = operator }
|
||||
|
||||
override string getSpecialMethodName() { result = operator.getSpecialMethodName() }
|
||||
|
||||
override ControlFlowNode getArg(int n) {
|
||||
n = 0 and result = this.getLeft()
|
||||
or
|
||||
n = 1 and result = this.getRight()
|
||||
}
|
||||
}
|
||||
|
||||
/** A subscript expression node that might correspond to a special method call. */
|
||||
abstract class SpecialSubscript extends Potential, SubscriptNode {
|
||||
override ControlFlowNode getArg(int n) {
|
||||
n = 0 and result = this.getObject()
|
||||
or
|
||||
n = 1 and result = this.getIndex()
|
||||
}
|
||||
}
|
||||
|
||||
/** A subscript expression node that might correspond to a call to __getitem__. */
|
||||
class SpecialGetItem extends SpecialSubscript {
|
||||
SpecialGetItem() { this.isLoad() }
|
||||
|
||||
override string getSpecialMethodName() { result = "__getitem__" }
|
||||
}
|
||||
|
||||
/** A subscript expression node that might correspond to a call to __setitem__. */
|
||||
class SpecialSetItem extends SpecialSubscript {
|
||||
SpecialSetItem() { this.isStore() }
|
||||
|
||||
override string getSpecialMethodName() { result = "__setitem__" }
|
||||
|
||||
override ControlFlowNode getArg(int n) {
|
||||
n = 0 and result = this.getObject()
|
||||
or
|
||||
n = 1 and result = this.getIndex()
|
||||
or
|
||||
n = 2 and result = this.getValueNode()
|
||||
}
|
||||
|
||||
private ControlFlowNode getValueNode() {
|
||||
exists(AssignStmt a |
|
||||
a.getATarget() = this.getNode() and
|
||||
result.getNode() = a.getValue()
|
||||
)
|
||||
or
|
||||
exists(AugAssign a |
|
||||
a.getTarget() = this.getNode() and
|
||||
result.getNode() = a.getValue()
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/** A subscript expression node that might correspond to a call to __delitem__. */
|
||||
class SpecialDelItem extends SpecialSubscript {
|
||||
SpecialDelItem() { this.isDelete() }
|
||||
|
||||
override string getSpecialMethodName() { result = "__delitem__" }
|
||||
}
|
||||
}
|
||||
|
||||
/** A control flow node corresponding to a special method call. */
|
||||
class SpecialMethodCallNode extends PotentialSpecialMethodCallNode {
|
||||
Value resolvedSpecialMethod;
|
||||
|
||||
SpecialMethodCallNode() {
|
||||
exists(SpecialMethod::Potential pot |
|
||||
this.(SpecialMethod::Potential) = pot and
|
||||
pot.getSelf().pointsTo().getClass().lookup(pot.getSpecialMethodName()) = resolvedSpecialMethod
|
||||
)
|
||||
}
|
||||
|
||||
/** The method that is called. */
|
||||
Value getResolvedSpecialMethod() { result = resolvedSpecialMethod }
|
||||
}
|
||||
@@ -1,437 +0,0 @@
|
||||
import python
|
||||
|
||||
/** A statement */
|
||||
class Stmt extends Stmt_, AstNode {
|
||||
/** Gets the scope immediately enclosing this statement */
|
||||
override Scope getScope() { py_scopes(this, result) }
|
||||
|
||||
override string toString() { result = "Stmt" }
|
||||
|
||||
/** Gets the module enclosing this statement */
|
||||
Module getEnclosingModule() { result = this.getScope().getEnclosingModule() }
|
||||
|
||||
override Location getLocation() { result = Stmt_.super.getLocation() }
|
||||
|
||||
/** Gets an immediate (non-nested) sub-expression of this statement */
|
||||
Expr getASubExpression() { none() }
|
||||
|
||||
/** Gets an immediate (non-nested) sub-statement of this statement */
|
||||
Stmt getASubStatement() { none() }
|
||||
|
||||
override AstNode getAChildNode() {
|
||||
result = this.getASubExpression()
|
||||
or
|
||||
result = this.getASubStatement()
|
||||
}
|
||||
|
||||
private ControlFlowNode possibleEntryNode() {
|
||||
result.getNode() = this or
|
||||
this.containsInScope(result.getNode())
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a control flow node for an entry into this statement.
|
||||
*/
|
||||
ControlFlowNode getAnEntryNode() {
|
||||
result = this.possibleEntryNode() and
|
||||
exists(ControlFlowNode pred |
|
||||
pred.getASuccessor() = result and
|
||||
not pred = this.possibleEntryNode()
|
||||
)
|
||||
}
|
||||
|
||||
/** Holds if this statement cannot be reached */
|
||||
predicate isUnreachable() {
|
||||
not exists(this.getAnEntryNode())
|
||||
or
|
||||
exists(If ifstmt |
|
||||
ifstmt.getTest().(ImmutableLiteral).booleanValue() = false and ifstmt.getBody().contains(this)
|
||||
or
|
||||
ifstmt.getTest().(ImmutableLiteral).booleanValue() = true and
|
||||
ifstmt.getOrelse().contains(this)
|
||||
)
|
||||
or
|
||||
exists(While whilestmt |
|
||||
whilestmt.getTest().(ImmutableLiteral).booleanValue() = false and
|
||||
whilestmt.getBody().contains(this)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the final statement in this statement, ordered by location.
|
||||
* Will be this statement if not a compound statement.
|
||||
*/
|
||||
Stmt getLastStatement() { result = this }
|
||||
}
|
||||
|
||||
/** A statement that includes a binding (except imports) */
|
||||
class Assign extends Assign_ {
|
||||
/** Use ControlFlowNodes and SsaVariables for data-flow analysis. */
|
||||
predicate defines(Variable v) { this.getATarget().defines(v) }
|
||||
|
||||
override Expr getASubExpression() {
|
||||
result = this.getATarget() or
|
||||
result = this.getValue()
|
||||
}
|
||||
|
||||
override Stmt getASubStatement() { none() }
|
||||
}
|
||||
|
||||
/** An assignment statement */
|
||||
class AssignStmt extends Assign {
|
||||
/* syntax: Expr, ... = Expr */
|
||||
AssignStmt() { not this instanceof FunctionDef and not this instanceof ClassDef }
|
||||
|
||||
override string toString() { result = "AssignStmt" }
|
||||
}
|
||||
|
||||
/** An augmented assignment statement, such as `x += y` */
|
||||
class AugAssign extends AugAssign_ {
|
||||
/* syntax: Expr += Expr */
|
||||
override Expr getASubExpression() { result = this.getOperation() }
|
||||
|
||||
/**
|
||||
* Gets the target of this augmented assignment statement.
|
||||
* That is, the `a` in `a += b`.
|
||||
*/
|
||||
Expr getTarget() { result = this.getOperation().(BinaryExpr).getLeft() }
|
||||
|
||||
/**
|
||||
* Gets the value of this augmented assignment statement.
|
||||
* That is, the `b` in `a += b`.
|
||||
*/
|
||||
Expr getValue() { result = this.getOperation().(BinaryExpr).getRight() }
|
||||
|
||||
override Stmt getASubStatement() { none() }
|
||||
}
|
||||
|
||||
/** An annotated assignment statement, such as `x: int = 0` */
|
||||
class AnnAssign extends AnnAssign_ {
|
||||
/* syntax: Expr: Expr = Expr */
|
||||
override Expr getASubExpression() {
|
||||
result = this.getAnnotation() or
|
||||
result = this.getTarget() or
|
||||
result = this.getValue()
|
||||
}
|
||||
|
||||
override Stmt getASubStatement() { none() }
|
||||
|
||||
/** Holds if the value of the annotation of this assignment is stored at runtime. */
|
||||
predicate isStored() {
|
||||
not this.getScope() instanceof Function and
|
||||
exists(Name n |
|
||||
n = this.getTarget() and
|
||||
not n.isParenthesized()
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/** An exec statement */
|
||||
class Exec extends Exec_ {
|
||||
/* syntax: exec Expr */
|
||||
override Expr getASubExpression() {
|
||||
result = this.getBody() or
|
||||
result = this.getGlobals() or
|
||||
result = this.getLocals()
|
||||
}
|
||||
|
||||
override Stmt getASubStatement() { none() }
|
||||
}
|
||||
|
||||
/** An except statement (part of a `try` statement), such as `except IOError as err:` */
|
||||
class ExceptStmt extends ExceptStmt_ {
|
||||
/* syntax: except Expr [ as Expr ]: */
|
||||
/** Gets the immediately enclosing try statement */
|
||||
Try getTry() { result.getAHandler() = this }
|
||||
|
||||
override Expr getASubExpression() {
|
||||
result = this.getName()
|
||||
or
|
||||
result = this.getType()
|
||||
}
|
||||
|
||||
override Stmt getASubStatement() { result = this.getAStmt() }
|
||||
|
||||
override Stmt getLastStatement() { result = this.getBody().getLastItem().getLastStatement() }
|
||||
}
|
||||
|
||||
/** An assert statement, such as `assert a == b, "A is not equal to b"` */
|
||||
class Assert extends Assert_ {
|
||||
/* syntax: assert Expr [, Expr] */
|
||||
override Expr getASubExpression() { result = this.getMsg() or result = this.getTest() }
|
||||
|
||||
override Stmt getASubStatement() { none() }
|
||||
}
|
||||
|
||||
/** A break statement */
|
||||
class Break extends Break_ {
|
||||
/* syntax: assert Expr [, Expr] */
|
||||
override Expr getASubExpression() { none() }
|
||||
|
||||
override Stmt getASubStatement() { none() }
|
||||
}
|
||||
|
||||
/** A continue statement */
|
||||
class Continue extends Continue_ {
|
||||
/* syntax: continue */
|
||||
override Expr getASubExpression() { none() }
|
||||
|
||||
override Stmt getASubStatement() { none() }
|
||||
}
|
||||
|
||||
/** A delete statement, such as `del x[-1]` */
|
||||
class Delete extends Delete_ {
|
||||
/* syntax: del Expr, ... */
|
||||
override Expr getASubExpression() { result = this.getATarget() }
|
||||
|
||||
override Stmt getASubStatement() { none() }
|
||||
}
|
||||
|
||||
/** An expression statement, such as `len(x)` or `yield y` */
|
||||
class ExprStmt extends ExprStmt_ {
|
||||
/* syntax: Expr */
|
||||
override Expr getASubExpression() { result = this.getValue() }
|
||||
|
||||
override Stmt getASubStatement() { none() }
|
||||
}
|
||||
|
||||
/** A for statement, such as `for x in y: print(x)` */
|
||||
class For extends For_ {
|
||||
/* syntax: for varname in Expr: ... */
|
||||
override Stmt getASubStatement() {
|
||||
result = this.getAStmt() or
|
||||
result = this.getAnOrelse()
|
||||
}
|
||||
|
||||
override Expr getASubExpression() {
|
||||
result = this.getTarget() or
|
||||
result = this.getIter()
|
||||
}
|
||||
|
||||
override Stmt getLastStatement() { result = this.getBody().getLastItem().getLastStatement() }
|
||||
}
|
||||
|
||||
/** A global statement, such as `global var` */
|
||||
class Global extends Global_ {
|
||||
/* syntax: global varname */
|
||||
override Expr getASubExpression() { none() }
|
||||
|
||||
override Stmt getASubStatement() { none() }
|
||||
}
|
||||
|
||||
/** An if statement, such as `if eggs: print("spam")` */
|
||||
class If extends If_ {
|
||||
/* syntax: if Expr: ... */
|
||||
override Stmt getASubStatement() {
|
||||
result = this.getAStmt() or
|
||||
result = this.getAnOrelse()
|
||||
}
|
||||
|
||||
override Expr getASubExpression() { result = this.getTest() }
|
||||
|
||||
/** Whether this if statement takes the form `if __name__ == "__main__":` */
|
||||
predicate isNameEqMain() {
|
||||
exists(StrConst m, Name n, Compare c |
|
||||
this.getTest() = c and
|
||||
c.getOp(0) instanceof Eq and
|
||||
(
|
||||
c.getLeft() = n and c.getComparator(0) = m
|
||||
or
|
||||
c.getLeft() = m and c.getComparator(0) = n
|
||||
) and
|
||||
n.getId() = "__name__" and
|
||||
m.getText() = "__main__"
|
||||
)
|
||||
}
|
||||
|
||||
/** Whether this if statement starts with the keyword `elif` */
|
||||
predicate isElif() {
|
||||
/*
|
||||
* The Python parser turns all elif chains into nested if-else statements.
|
||||
* An `elif` can be identified as it is the first statement in an `else` block
|
||||
* and it is not indented relative to its parent `if`.
|
||||
*/
|
||||
|
||||
exists(If i |
|
||||
i.getOrelse(0) = this and
|
||||
this.getLocation().getStartColumn() = i.getLocation().getStartColumn()
|
||||
)
|
||||
}
|
||||
|
||||
/** Gets the `elif` branch of this `if`-statement, if present */
|
||||
If getElif() {
|
||||
result = this.getOrelse(0) and
|
||||
result.isElif()
|
||||
}
|
||||
|
||||
override Stmt getLastStatement() {
|
||||
result = this.getOrelse().getLastItem().getLastStatement()
|
||||
or
|
||||
not exists(this.getOrelse()) and
|
||||
result = this.getBody().getLastItem().getLastStatement()
|
||||
}
|
||||
}
|
||||
|
||||
/** A nonlocal statement, such as `nonlocal var` */
|
||||
class Nonlocal extends Nonlocal_ {
|
||||
/* syntax: nonlocal varname */
|
||||
override Stmt getASubStatement() { none() }
|
||||
|
||||
override Expr getASubExpression() { none() }
|
||||
|
||||
Variable getAVariable() {
|
||||
result.getScope() = this.getScope() and
|
||||
result.getId() = this.getAName()
|
||||
}
|
||||
}
|
||||
|
||||
/** A pass statement */
|
||||
class Pass extends Pass_ {
|
||||
/* syntax: pass */
|
||||
override Stmt getASubStatement() { none() }
|
||||
|
||||
override Expr getASubExpression() { none() }
|
||||
}
|
||||
|
||||
/** A print statement (Python 2 only), such as `print 0` */
|
||||
class Print extends Print_ {
|
||||
/* syntax: print Expr, ... */
|
||||
override Stmt getASubStatement() { none() }
|
||||
|
||||
override Expr getASubExpression() {
|
||||
result = this.getAValue() or
|
||||
result = this.getDest()
|
||||
}
|
||||
}
|
||||
|
||||
/** A raise statement, such as `raise CompletelyDifferentException()` */
|
||||
class Raise extends Raise_ {
|
||||
/* syntax: raise Expr */
|
||||
override Stmt getASubStatement() { none() }
|
||||
|
||||
override Expr getASubExpression() { py_exprs(result, _, this, _) }
|
||||
|
||||
/**
|
||||
* The expression immediately following the `raise`, this is the
|
||||
* exception raised, but not accounting for tuples in Python 2.
|
||||
*/
|
||||
Expr getException() {
|
||||
result = this.getType()
|
||||
or
|
||||
result = this.getExc()
|
||||
}
|
||||
|
||||
/** The exception raised, accounting for tuples in Python 2. */
|
||||
Expr getRaised() {
|
||||
exists(Expr raw | raw = this.getException() |
|
||||
if not major_version() = 2 or not exists(raw.(Tuple).getAnElt())
|
||||
then result = raw
|
||||
else
|
||||
/* In Python 2 raising a tuple will result in the first element of the tuple being raised. */
|
||||
result = raw.(Tuple).getElt(0)
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/** A return statement, such as return None */
|
||||
class Return extends Return_ {
|
||||
/* syntax: return Expr */
|
||||
override Stmt getASubStatement() { none() }
|
||||
|
||||
override Expr getASubExpression() { result = this.getValue() }
|
||||
}
|
||||
|
||||
/** A try statement */
|
||||
class Try extends Try_ {
|
||||
/* syntax: try: ... */
|
||||
override Expr getASubExpression() { none() }
|
||||
|
||||
override Stmt getASubStatement() {
|
||||
result = this.getAHandler() or
|
||||
result = this.getAStmt() or
|
||||
result = this.getAFinalstmt() or
|
||||
result = this.getAnOrelse()
|
||||
}
|
||||
|
||||
override ExceptStmt getHandler(int i) { result = Try_.super.getHandler(i) }
|
||||
|
||||
/** Gets an exception handler of this try statement. */
|
||||
override ExceptStmt getAHandler() { result = Try_.super.getAHandler() }
|
||||
|
||||
override Stmt getLastStatement() {
|
||||
result = this.getFinalbody().getLastItem().getLastStatement()
|
||||
or
|
||||
not exists(this.getFinalbody()) and
|
||||
result = this.getOrelse().getLastItem().getLastStatement()
|
||||
or
|
||||
not exists(this.getFinalbody()) and
|
||||
not exists(this.getOrelse()) and
|
||||
result = this.getHandlers().getLastItem().getLastStatement()
|
||||
or
|
||||
not exists(this.getFinalbody()) and
|
||||
not exists(this.getOrelse()) and
|
||||
not exists(this.getHandlers()) and
|
||||
result = this.getBody().getLastItem().getLastStatement()
|
||||
}
|
||||
}
|
||||
|
||||
/** A while statement, such as `while parrot_resting():` */
|
||||
class While extends While_ {
|
||||
/* syntax: while Expr: ... */
|
||||
override Expr getASubExpression() { result = this.getTest() }
|
||||
|
||||
override Stmt getASubStatement() {
|
||||
result = this.getAStmt() or
|
||||
result = this.getAnOrelse()
|
||||
}
|
||||
|
||||
override Stmt getLastStatement() {
|
||||
result = this.getOrelse().getLastItem().getLastStatement()
|
||||
or
|
||||
not exists(this.getOrelse()) and
|
||||
result = this.getBody().getLastItem().getLastStatement()
|
||||
}
|
||||
}
|
||||
|
||||
/** A with statement such as `with f as open("file"): text = f.read()` */
|
||||
class With extends With_ {
|
||||
/* syntax: with Expr as varname: ... */
|
||||
override Expr getASubExpression() {
|
||||
result = this.getContextExpr() or
|
||||
result = this.getOptionalVars()
|
||||
}
|
||||
|
||||
override Stmt getASubStatement() { result = this.getAStmt() }
|
||||
|
||||
override Stmt getLastStatement() { result = this.getBody().getLastItem().getLastStatement() }
|
||||
}
|
||||
|
||||
/** A plain text used in a template is wrapped in a TemplateWrite statement */
|
||||
class TemplateWrite extends TemplateWrite_ {
|
||||
override Expr getASubExpression() { result = this.getValue() }
|
||||
|
||||
override Stmt getASubStatement() { none() }
|
||||
}
|
||||
|
||||
/** An asynchronous `for` statement, such as `async for varname in Expr: ...` */
|
||||
class AsyncFor extends For {
|
||||
/* syntax: async for varname in Expr: ... */
|
||||
AsyncFor() { this.isAsync() }
|
||||
}
|
||||
|
||||
/** An asynchronous `with` statement, such as `async with varname as Expr: ...` */
|
||||
class AsyncWith extends With {
|
||||
/* syntax: async with Expr as varname: ... */
|
||||
AsyncWith() { this.isAsync() }
|
||||
}
|
||||
|
||||
/** A list of statements */
|
||||
class StmtList extends StmtList_ {
|
||||
/** Holds if this list of statements contains the AST node `a` */
|
||||
predicate contains(AstNode a) {
|
||||
exists(Stmt item | item = this.getAnItem() | item = a or item.contains(a))
|
||||
}
|
||||
|
||||
/** Gets the last item in this list of statements, if any. */
|
||||
Stmt getLastItem() { result = this.getItem(max(int i | exists(this.getItem(i)))) }
|
||||
}
|
||||
@@ -1,29 +0,0 @@
|
||||
/* This file contains test-related utility functions */
|
||||
import python
|
||||
|
||||
/** Removes everything up to the occurrence of `sub` in the string `str` */
|
||||
bindingset[str, sub]
|
||||
string remove_prefix_before_substring(string str, string sub) {
|
||||
exists(int index |
|
||||
index = str.indexOf(sub) and
|
||||
result = str.suffix(index)
|
||||
)
|
||||
or
|
||||
not exists(str.indexOf(sub)) and
|
||||
result = str
|
||||
}
|
||||
|
||||
/**
|
||||
* Removes the part of the `resources/lib` Python library path that may vary
|
||||
* from machine to machine.
|
||||
*/
|
||||
string remove_library_prefix(Location loc) {
|
||||
result = remove_prefix_before_substring(loc.toString(), "resources/lib")
|
||||
}
|
||||
|
||||
/** Returns the location of an AST node in compact form: `basename:line:column` */
|
||||
string compact_location(AstNode a) {
|
||||
exists(Location l | l = a.getLocation() |
|
||||
result = l.getFile().getBaseName() + ":" + l.getStartLine() + ":" + l.getStartColumn()
|
||||
)
|
||||
}
|
||||
@@ -1,10 +0,0 @@
|
||||
/** Provides the `Unit` class. */
|
||||
|
||||
/** The unit type. */
|
||||
private newtype TUnit = TMkUnit()
|
||||
|
||||
/** The trivial type with a single element. */
|
||||
class Unit extends TUnit {
|
||||
/** Gets a textual representation of this element. */
|
||||
string toString() { result = "unit" }
|
||||
}
|
||||
@@ -1,93 +0,0 @@
|
||||
import python
|
||||
|
||||
/** A variable, either a global or local variable (including parameters) */
|
||||
class Variable extends @py_variable {
|
||||
Variable() {
|
||||
exists(string name |
|
||||
variable(this, _, name) and
|
||||
not name = "*" and
|
||||
not name = "$"
|
||||
)
|
||||
}
|
||||
|
||||
/** Gets the identifier (name) of this variable */
|
||||
string getId() { variable(this, _, result) }
|
||||
|
||||
/** Gets a textual representation of this element. */
|
||||
string toString() { result = "Variable " + this.getId() }
|
||||
|
||||
/** Gets an access (load or store) of this variable */
|
||||
Name getAnAccess() {
|
||||
result = this.getALoad()
|
||||
or
|
||||
result = this.getAStore()
|
||||
}
|
||||
|
||||
/** Gets a load of this variable */
|
||||
Name getALoad() { result.uses(this) }
|
||||
|
||||
/** Gets a store of this variable */
|
||||
Name getAStore() { result.defines(this) }
|
||||
|
||||
/** Gets a use of this variable */
|
||||
NameNode getAUse() { result.uses(this) }
|
||||
|
||||
/** Gets the scope of this variable */
|
||||
Scope getScope() { variable(this, result, _) }
|
||||
|
||||
/**
|
||||
* Whether there is an access to this variable outside
|
||||
* of its own scope. Usually occurs in nested functions
|
||||
* or for global variables.
|
||||
*/
|
||||
predicate escapes() { exists(Name n | n = this.getAnAccess() | n.getScope() != this.getScope()) }
|
||||
|
||||
/** Whether this variable is a parameter */
|
||||
predicate isParameter() { none() }
|
||||
|
||||
predicate isSelf() { none() }
|
||||
}
|
||||
|
||||
/** A local (function or class) variable */
|
||||
class LocalVariable extends Variable {
|
||||
LocalVariable() {
|
||||
exists(Scope s | s = this.getScope() | s instanceof Function or s instanceof Class)
|
||||
}
|
||||
|
||||
override string toString() { result = "Local Variable " + this.getId() }
|
||||
|
||||
/** Whether this variable is a parameter */
|
||||
override predicate isParameter() { exists(Parameter p | this.getAnAccess() = p) }
|
||||
|
||||
/** Holds if this variable is the first parameter of a method. It is not necessarily called "self" */
|
||||
override predicate isSelf() {
|
||||
exists(Function f, Parameter self |
|
||||
this.getAnAccess() = self and
|
||||
f.isMethod() and
|
||||
f.getArg(0) = self
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A local variable that uses "load fast" semantics, for lookup:
|
||||
* If the variable is undefined, then raise an exception.
|
||||
*/
|
||||
class FastLocalVariable extends LocalVariable {
|
||||
FastLocalVariable() { this.getScope() instanceof FastLocalsFunction }
|
||||
}
|
||||
|
||||
/**
|
||||
* A local variable that uses "load name" semantics, for lookup:
|
||||
* If the variable is undefined, then lookup the value in globals().
|
||||
*/
|
||||
class NameLocalVariable extends LocalVariable {
|
||||
NameLocalVariable() { not this instanceof FastLocalVariable }
|
||||
}
|
||||
|
||||
/** A global (module-level) variable */
|
||||
class GlobalVariable extends Variable {
|
||||
GlobalVariable() { exists(Module m | m = this.getScope()) }
|
||||
|
||||
override string toString() { result = "Global Variable " + this.getId() }
|
||||
}
|
||||
@@ -1,174 +0,0 @@
|
||||
/**
|
||||
* Provides classes modeling cryptographic algorithms, separated into strong and weak variants.
|
||||
*
|
||||
* The classification into strong and weak are based on Wikipedia, OWASP and google (2017).
|
||||
*/
|
||||
|
||||
/**
|
||||
* Names of cryptographic algorithms, separated into strong and weak variants.
|
||||
*
|
||||
* The names are normalized: upper-case, no spaces, dashes or underscores.
|
||||
*
|
||||
* The names are inspired by the names used in real world crypto libraries.
|
||||
*
|
||||
* The classification into strong and weak are based on Wikipedia, OWASP and google (2017).
|
||||
*/
|
||||
private module AlgorithmNames {
|
||||
predicate isStrongHashingAlgorithm(string name) {
|
||||
name = "DSA" or
|
||||
name = "ED25519" or
|
||||
name = "ES256" or
|
||||
name = "ECDSA256" or
|
||||
name = "ES384" or
|
||||
name = "ECDSA384" or
|
||||
name = "ES512" or
|
||||
name = "ECDSA512" or
|
||||
name = "SHA2" or
|
||||
name = "SHA224" or
|
||||
name = "SHA256" or
|
||||
name = "SHA384" or
|
||||
name = "SHA512" or
|
||||
name = "SHA3"
|
||||
}
|
||||
|
||||
predicate isWeakHashingAlgorithm(string name) {
|
||||
name = "HAVEL128" or
|
||||
name = "MD2" or
|
||||
name = "MD4" or
|
||||
name = "MD5" or
|
||||
name = "PANAMA" or
|
||||
name = "RIPEMD" or
|
||||
name = "RIPEMD128" or
|
||||
name = "RIPEMD256" or
|
||||
name = "RIPEMD160" or
|
||||
name = "RIPEMD320" or
|
||||
name = "SHA0" or
|
||||
name = "SHA1"
|
||||
}
|
||||
|
||||
predicate isStrongEncryptionAlgorithm(string name) {
|
||||
name = "AES" or
|
||||
name = "AES128" or
|
||||
name = "AES192" or
|
||||
name = "AES256" or
|
||||
name = "AES512" or
|
||||
name = "RSA" or
|
||||
name = "RABBIT" or
|
||||
name = "BLOWFISH"
|
||||
}
|
||||
|
||||
predicate isWeakEncryptionAlgorithm(string name) {
|
||||
name = "DES" or
|
||||
name = "3DES" or
|
||||
name = "TRIPLEDES" or
|
||||
name = "TDEA" or
|
||||
name = "TRIPLEDEA" or
|
||||
name = "ARC2" or
|
||||
name = "RC2" or
|
||||
name = "ARC4" or
|
||||
name = "RC4" or
|
||||
name = "ARCFOUR" or
|
||||
name = "ARC5" or
|
||||
name = "RC5"
|
||||
}
|
||||
|
||||
predicate isStrongPasswordHashingAlgorithm(string name) {
|
||||
name = "ARGON2" or
|
||||
name = "PBKDF2" or
|
||||
name = "BCRYPT" or
|
||||
name = "SCRYPT"
|
||||
}
|
||||
|
||||
predicate isWeakPasswordHashingAlgorithm(string name) { none() }
|
||||
}
|
||||
|
||||
private import AlgorithmNames
|
||||
|
||||
/**
|
||||
* A cryptographic algorithm.
|
||||
*/
|
||||
private newtype TCryptographicAlgorithm =
|
||||
MkHashingAlgorithm(string name, boolean isWeak) {
|
||||
isStrongHashingAlgorithm(name) and isWeak = false
|
||||
or
|
||||
isWeakHashingAlgorithm(name) and isWeak = true
|
||||
} or
|
||||
MkEncryptionAlgorithm(string name, boolean isWeak) {
|
||||
isStrongEncryptionAlgorithm(name) and isWeak = false
|
||||
or
|
||||
isWeakEncryptionAlgorithm(name) and isWeak = true
|
||||
} or
|
||||
MkPasswordHashingAlgorithm(string name, boolean isWeak) {
|
||||
isStrongPasswordHashingAlgorithm(name) and isWeak = false
|
||||
or
|
||||
isWeakPasswordHashingAlgorithm(name) and isWeak = true
|
||||
}
|
||||
|
||||
/**
|
||||
* A cryptographic algorithm.
|
||||
*/
|
||||
abstract class CryptographicAlgorithm extends TCryptographicAlgorithm {
|
||||
/** Gets a textual representation of this element. */
|
||||
string toString() { result = getName() }
|
||||
|
||||
/**
|
||||
* Gets the normalized name of this algorithm (upper-case, no spaces, dashes or underscores).
|
||||
*/
|
||||
abstract string getName();
|
||||
|
||||
/**
|
||||
* Holds if the name of this algorithm matches `name` modulo case,
|
||||
* white space, dashes, and underscores.
|
||||
*/
|
||||
bindingset[name]
|
||||
predicate matchesName(string name) {
|
||||
name.toUpperCase().regexpReplaceAll("[-_ ]", "") = getName()
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if this algorithm is weak.
|
||||
*/
|
||||
abstract predicate isWeak();
|
||||
}
|
||||
|
||||
/**
|
||||
* A hashing algorithm such as `MD5` or `SHA512`.
|
||||
*/
|
||||
class HashingAlgorithm extends MkHashingAlgorithm, CryptographicAlgorithm {
|
||||
string name;
|
||||
boolean isWeak;
|
||||
|
||||
HashingAlgorithm() { this = MkHashingAlgorithm(name, isWeak) }
|
||||
|
||||
override string getName() { result = name }
|
||||
|
||||
override predicate isWeak() { isWeak = true }
|
||||
}
|
||||
|
||||
/**
|
||||
* An encryption algorithm such as `DES` or `AES512`.
|
||||
*/
|
||||
class EncryptionAlgorithm extends MkEncryptionAlgorithm, CryptographicAlgorithm {
|
||||
string name;
|
||||
boolean isWeak;
|
||||
|
||||
EncryptionAlgorithm() { this = MkEncryptionAlgorithm(name, isWeak) }
|
||||
|
||||
override string getName() { result = name }
|
||||
|
||||
override predicate isWeak() { isWeak = true }
|
||||
}
|
||||
|
||||
/**
|
||||
* A password hashing algorithm such as `PBKDF2` or `SCRYPT`.
|
||||
*/
|
||||
class PasswordHashingAlgorithm extends MkPasswordHashingAlgorithm, CryptographicAlgorithm {
|
||||
string name;
|
||||
boolean isWeak;
|
||||
|
||||
PasswordHashingAlgorithm() { this = MkPasswordHashingAlgorithm(name, isWeak) }
|
||||
|
||||
override string getName() { result = name }
|
||||
|
||||
override predicate isWeak() { isWeak = true }
|
||||
}
|
||||
@@ -1 +0,0 @@
|
||||
import old.Configuration
|
||||
@@ -1 +0,0 @@
|
||||
import old.DataFlow
|
||||
@@ -1 +0,0 @@
|
||||
import old.Files
|
||||
@@ -1 +0,0 @@
|
||||
import old.Implementation
|
||||
@@ -1 +0,0 @@
|
||||
import old.Legacy
|
||||
@@ -1 +0,0 @@
|
||||
import old.StateTracking
|
||||
@@ -1 +0,0 @@
|
||||
import old.TaintTracking
|
||||
@@ -1,37 +0,0 @@
|
||||
/** Provides commonly used BarrierGuards. */
|
||||
|
||||
private import python
|
||||
private import semmle.python.dataflow.new.DataFlow
|
||||
|
||||
/** A validation of unknown node by comparing with a constant string value. */
|
||||
class StringConstCompare extends DataFlow::BarrierGuard, CompareNode {
|
||||
ControlFlowNode checked_node;
|
||||
boolean safe_branch;
|
||||
|
||||
StringConstCompare() {
|
||||
exists(StrConst str_const, Cmpop op |
|
||||
op = any(Eq eq) and safe_branch = true
|
||||
or
|
||||
op = any(NotEq ne) and safe_branch = false
|
||||
|
|
||||
this.operands(str_const.getAFlowNode(), op, checked_node)
|
||||
or
|
||||
this.operands(checked_node, op, str_const.getAFlowNode())
|
||||
)
|
||||
or
|
||||
exists(IterableNode str_const_iterable, Cmpop op |
|
||||
op = any(In in_) and safe_branch = true
|
||||
or
|
||||
op = any(NotIn ni) and safe_branch = false
|
||||
|
|
||||
forall(ControlFlowNode elem | elem = str_const_iterable.getAnElement() |
|
||||
elem.getNode() instanceof StrConst
|
||||
) and
|
||||
this.operands(checked_node, op, str_const_iterable)
|
||||
)
|
||||
}
|
||||
|
||||
override predicate checks(ControlFlowNode node, boolean branch) {
|
||||
node = checked_node and branch = safe_branch
|
||||
}
|
||||
}
|
||||
@@ -1,26 +0,0 @@
|
||||
/**
|
||||
* Provides a library for local (intra-procedural) and global (inter-procedural)
|
||||
* data flow analysis: deciding whether data can flow from a _source_ to a
|
||||
* _sink_.
|
||||
*
|
||||
* Unless configured otherwise, _flow_ means that the exact value of
|
||||
* the source may reach the sink. We do not track flow across pointer
|
||||
* dereferences or array indexing. To track these types of flow, where the
|
||||
* exact value may not be preserved, import
|
||||
* `experimental.dataflow.TaintTracking`.
|
||||
*
|
||||
* To use global (interprocedural) data flow, extend the class
|
||||
* `DataFlow::Configuration` as documented on that class. To use local
|
||||
* (intraprocedural) data flow, call `DataFlow::localFlow` or
|
||||
* `DataFlow::localFlowStep` with arguments of type `DataFlow::Node`.
|
||||
*/
|
||||
|
||||
private import python
|
||||
|
||||
/**
|
||||
* Provides classes for performing local (intra-procedural) and
|
||||
* global (inter-procedural) data flow analyses.
|
||||
*/
|
||||
module DataFlow {
|
||||
import internal.DataFlowImpl
|
||||
}
|
||||
@@ -1,26 +0,0 @@
|
||||
/**
|
||||
* Provides a library for local (intra-procedural) and global (inter-procedural)
|
||||
* data flow analysis: deciding whether data can flow from a _source_ to a
|
||||
* _sink_.
|
||||
*
|
||||
* Unless configured otherwise, _flow_ means that the exact value of
|
||||
* the source may reach the sink. We do not track flow across pointer
|
||||
* dereferences or array indexing. To track these types of flow, where the
|
||||
* exact value may not be preserved, import
|
||||
* `semmle.python.dataflow.new.TaintTracking`.
|
||||
*
|
||||
* To use global (interprocedural) data flow, extend the class
|
||||
* `DataFlow::Configuration` as documented on that class. To use local
|
||||
* (intraprocedural) data flow, call `DataFlow::localFlow` or
|
||||
* `DataFlow::localFlowStep` with arguments of type `DataFlow::Node`.
|
||||
*/
|
||||
|
||||
private import python
|
||||
|
||||
/**
|
||||
* Provides classes for performing local (intra-procedural) and
|
||||
* global (inter-procedural) data flow analyses.
|
||||
*/
|
||||
module DataFlow2 {
|
||||
import semmle.python.dataflow.new.internal.DataFlowImpl2
|
||||
}
|
||||
@@ -1,26 +0,0 @@
|
||||
/**
|
||||
* Provides a library for local (intra-procedural) and global (inter-procedural)
|
||||
* data flow analysis: deciding whether data can flow from a _source_ to a
|
||||
* _sink_.
|
||||
*
|
||||
* Unless configured otherwise, _flow_ means that the exact value of
|
||||
* the source may reach the sink. We do not track flow across pointer
|
||||
* dereferences or array indexing. To track these types of flow, where the
|
||||
* exact value may not be preserved, import
|
||||
* `semmle.python.dataflow.new.TaintTracking`.
|
||||
*
|
||||
* To use global (interprocedural) data flow, extend the class
|
||||
* `DataFlow::Configuration` as documented on that class. To use local
|
||||
* (intraprocedural) data flow, call `DataFlow::localFlow` or
|
||||
* `DataFlow::localFlowStep` with arguments of type `DataFlow::Node`.
|
||||
*/
|
||||
|
||||
private import python
|
||||
|
||||
/**
|
||||
* Provides classes for performing local (intra-procedural) and
|
||||
* global (inter-procedural) data flow analyses.
|
||||
*/
|
||||
module DataFlow3 {
|
||||
import semmle.python.dataflow.new.internal.DataFlowImpl3
|
||||
}
|
||||
@@ -1,26 +0,0 @@
|
||||
/**
|
||||
* Provides a library for local (intra-procedural) and global (inter-procedural)
|
||||
* data flow analysis: deciding whether data can flow from a _source_ to a
|
||||
* _sink_.
|
||||
*
|
||||
* Unless configured otherwise, _flow_ means that the exact value of
|
||||
* the source may reach the sink. We do not track flow across pointer
|
||||
* dereferences or array indexing. To track these types of flow, where the
|
||||
* exact value may not be preserved, import
|
||||
* `semmle.python.dataflow.new.TaintTracking`.
|
||||
*
|
||||
* To use global (interprocedural) data flow, extend the class
|
||||
* `DataFlow::Configuration` as documented on that class. To use local
|
||||
* (intraprocedural) data flow, call `DataFlow::localFlow` or
|
||||
* `DataFlow::localFlowStep` with arguments of type `DataFlow::Node`.
|
||||
*/
|
||||
|
||||
private import python
|
||||
|
||||
/**
|
||||
* Provides classes for performing local (intra-procedural) and
|
||||
* global (inter-procedural) data flow analyses.
|
||||
*/
|
||||
module DataFlow4 {
|
||||
import semmle.python.dataflow.new.internal.DataFlowImpl4
|
||||
}
|
||||
@@ -1,39 +0,0 @@
|
||||
/**
|
||||
* Provides an extension point for for modeling user-controlled data.
|
||||
* Such data is often used as data-flow sources in security queries.
|
||||
*/
|
||||
|
||||
private import python
|
||||
private import semmle.python.dataflow.new.DataFlow
|
||||
// Need to import since frameworks can extend `RemoteFlowSource::Range`
|
||||
private import semmle.python.Frameworks
|
||||
private import semmle.python.Concepts
|
||||
|
||||
/**
|
||||
* A data flow source of remote user input.
|
||||
*
|
||||
* Extend this class to refine existing API models. If you want to model new APIs,
|
||||
* extend `RemoteFlowSource::Range` instead.
|
||||
*/
|
||||
class RemoteFlowSource extends DataFlow::Node {
|
||||
RemoteFlowSource::Range self;
|
||||
|
||||
RemoteFlowSource() { this = self }
|
||||
|
||||
/** Gets a string that describes the type of this remote flow source. */
|
||||
string getSourceType() { result = self.getSourceType() }
|
||||
}
|
||||
|
||||
/** Provides a class for modeling new sources of remote user input. */
|
||||
module RemoteFlowSource {
|
||||
/**
|
||||
* A data flow source of remote user input.
|
||||
*
|
||||
* Extend this class to model new APIs. If you want to refine existing API models,
|
||||
* extend `RemoteFlowSource` instead.
|
||||
*/
|
||||
abstract class Range extends DataFlow::Node {
|
||||
/** Gets a string that describes the type of this remote flow source. */
|
||||
abstract string getSourceType();
|
||||
}
|
||||
}
|
||||
@@ -1,317 +0,0 @@
|
||||
/**
|
||||
* Provides an extension point for for modeling sensitive data, such as secrets, certificates, or passwords.
|
||||
* Sensitive data can be interesting to use as data-flow sources in security queries.
|
||||
*/
|
||||
|
||||
private import python
|
||||
private import semmle.python.dataflow.new.DataFlow
|
||||
// Need to import `semmle.python.Frameworks` since frameworks can extend `SensitiveDataSource::Range`
|
||||
private import semmle.python.Frameworks
|
||||
private import semmle.python.security.internal.SensitiveDataHeuristics as SensitiveDataHeuristics
|
||||
|
||||
// We export these explicitly, so we don't also export the `HeuristicNames` module.
|
||||
class SensitiveDataClassification = SensitiveDataHeuristics::SensitiveDataClassification;
|
||||
|
||||
module SensitiveDataClassification = SensitiveDataHeuristics::SensitiveDataClassification;
|
||||
|
||||
/**
|
||||
* A data flow source of sensitive data, such as secrets, certificates, or passwords.
|
||||
*
|
||||
* Extend this class to refine existing API models. If you want to model new APIs,
|
||||
* extend `SensitiveDataSource::Range` instead.
|
||||
*/
|
||||
class SensitiveDataSource extends DataFlow::Node {
|
||||
SensitiveDataSource::Range range;
|
||||
|
||||
SensitiveDataSource() { this = range }
|
||||
|
||||
/**
|
||||
* Gets the classification of the sensitive data.
|
||||
*/
|
||||
SensitiveDataClassification getClassification() { result = range.getClassification() }
|
||||
}
|
||||
|
||||
/** Provides a class for modeling new sources of sensitive data, such as secrets, certificates, or passwords. */
|
||||
module SensitiveDataSource {
|
||||
/**
|
||||
* A data flow source of sensitive data, such as secrets, certificates, or passwords.
|
||||
*
|
||||
* Extend this class to model new APIs. If you want to refine existing API models,
|
||||
* extend `SensitiveDataSource` instead.
|
||||
*/
|
||||
abstract class Range extends DataFlow::Node {
|
||||
/**
|
||||
* Gets the classification of the sensitive data.
|
||||
*/
|
||||
abstract SensitiveDataClassification getClassification();
|
||||
}
|
||||
}
|
||||
|
||||
/** Actual sensitive data modeling */
|
||||
private module SensitiveDataModeling {
|
||||
private import SensitiveDataHeuristics::HeuristicNames
|
||||
|
||||
/**
|
||||
* Gets a reference to a function that is considered to be a sensitive source of
|
||||
* `classification`.
|
||||
*/
|
||||
private DataFlow::TypeTrackingNode sensitiveFunction(
|
||||
DataFlow::TypeTracker t, SensitiveDataClassification classification
|
||||
) {
|
||||
t.start() and
|
||||
exists(Function f |
|
||||
f.getName() = sensitiveString(classification) and
|
||||
result.asExpr() = f.getDefinition()
|
||||
)
|
||||
or
|
||||
exists(DataFlow::TypeTracker t2 | result = sensitiveFunction(t2, classification).track(t2, t))
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a reference to a function that is considered to be a sensitive source of
|
||||
* `classification`.
|
||||
*/
|
||||
DataFlow::Node sensitiveFunction(SensitiveDataClassification classification) {
|
||||
sensitiveFunction(DataFlow::TypeTracker::end(), classification).flowsTo(result)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a reference (in local scope) to a string constant that, if used as the key in
|
||||
* a lookup, indicates the presence of sensitive data with `classification`.
|
||||
*/
|
||||
DataFlow::Node sensitiveLookupStringConst(SensitiveDataClassification classification) {
|
||||
// Note: If this is implemented with type-tracking, we will get cross-talk as
|
||||
// illustrated in python/ql/test/experimental/dataflow/sensitive-data/test.py
|
||||
exists(DataFlow::LocalSourceNode source |
|
||||
source.asExpr().(StrConst).getText() = sensitiveString(classification) and
|
||||
source.flowsTo(result)
|
||||
)
|
||||
}
|
||||
|
||||
/** A function call that is considered a source of sensitive data. */
|
||||
class SensitiveFunctionCall extends SensitiveDataSource::Range, DataFlow::CallCfgNode {
|
||||
SensitiveDataClassification classification;
|
||||
|
||||
SensitiveFunctionCall() {
|
||||
this.getFunction() = sensitiveFunction(classification)
|
||||
or
|
||||
// to cover functions that we don't have the definition for, and where the
|
||||
// reference to the function has not already been marked as being sensitive
|
||||
this.getFunction().asCfgNode().(NameNode).getId() = sensitiveString(classification)
|
||||
}
|
||||
|
||||
override SensitiveDataClassification getClassification() { result = classification }
|
||||
}
|
||||
|
||||
/**
|
||||
* Tracks any modeled source of sensitive data (with any classification),
|
||||
* to limit the scope of `extraStepForCalls`. See it's QLDoc for more context.
|
||||
*
|
||||
* Also see `extraStepForCalls`.
|
||||
*/
|
||||
private DataFlow::TypeTrackingNode possibleSensitiveCallable(DataFlow::TypeTracker t) {
|
||||
t.start() and
|
||||
result instanceof SensitiveDataSource
|
||||
or
|
||||
exists(DataFlow::TypeTracker t2 | result = possibleSensitiveCallable(t2).track(t2, t))
|
||||
}
|
||||
|
||||
/**
|
||||
* Tracks any modeled source of sensitive data (with any classification),
|
||||
* to limit the scope of `extraStepForCalls`. See it's QLDoc for more context.
|
||||
*
|
||||
* Also see `extraStepForCalls`.
|
||||
*/
|
||||
private DataFlow::Node possibleSensitiveCallable() {
|
||||
possibleSensitiveCallable(DataFlow::TypeTracker::end()).flowsTo(result)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if the step from `nodeFrom` to `nodeTo` should be considered a
|
||||
* taint-flow step for sensitive-data, to ensure calls are handled correctly.
|
||||
*
|
||||
* To handle calls properly, while preserving a good source for path explanations,
|
||||
* you need to include this predicate as an additional taint step in your taint-tracking
|
||||
* configurations.
|
||||
*
|
||||
* The core problem can be illustrated by the example below. If we consider the
|
||||
* `print` a sink, what path and what source do we want to show? My initial approach
|
||||
* would be to use type-tracking to propagate from the `not_found.get_passwd` attribute
|
||||
* lookup, to the use of `non_sensitive_name`, and then create a new `SensitiveDataSource::Range`
|
||||
* like `SensitiveFunctionCall`. Although that seems likely to work, it will also end up
|
||||
* with a non-optimal path, which starts at _bad source_, and therefore doesn't show
|
||||
* how we figured out that `non_sensitive_name`
|
||||
* could be a function that returns a password (and in cases where there is many calls to
|
||||
* `my_func` it will be annoying for someone to figure this out manually).
|
||||
*
|
||||
* By including this additional taint-step in the taint-tracking configuration, it's possible
|
||||
* to get a path explanation going from _good source_ to the sink.
|
||||
*
|
||||
* ```python
|
||||
* def my_func(non_sensitive_name):
|
||||
* x = non_sensitive_name() # <-- bad source
|
||||
* print(x) # <-- sink
|
||||
*
|
||||
* import not_found
|
||||
* f = not_found.get_passwd # <-- good source
|
||||
* my_func(f)
|
||||
* ```
|
||||
*/
|
||||
predicate extraStepForCalls(DataFlow::Node nodeFrom, DataFlow::CallCfgNode nodeTo) {
|
||||
// However, we do still use the type-tracking approach to limit the size of this
|
||||
// predicate.
|
||||
nodeTo.getFunction() = nodeFrom and
|
||||
nodeFrom = possibleSensitiveCallable()
|
||||
}
|
||||
|
||||
pragma[nomagic]
|
||||
private string sensitiveStrConstCandidate() {
|
||||
result = any(StrConst s | not s.isDocString()).getText() and
|
||||
not result.regexpMatch(notSensitiveRegexp())
|
||||
}
|
||||
|
||||
pragma[nomagic]
|
||||
private string sensitiveAttributeNameCandidate() {
|
||||
result = any(DataFlow::AttrRead a).getAttributeName() and
|
||||
not result.regexpMatch(notSensitiveRegexp())
|
||||
}
|
||||
|
||||
pragma[nomagic]
|
||||
private string sensitiveParameterNameCandidate() {
|
||||
result = any(Parameter p).getName() and
|
||||
not result.regexpMatch(notSensitiveRegexp())
|
||||
}
|
||||
|
||||
pragma[nomagic]
|
||||
private string sensitiveFunctionNameCandidate() {
|
||||
result = any(Function f).getName() and
|
||||
not result.regexpMatch(notSensitiveRegexp())
|
||||
}
|
||||
|
||||
pragma[nomagic]
|
||||
private string sensitiveNameCandidate() {
|
||||
result = any(Name n).getId() and
|
||||
not result.regexpMatch(notSensitiveRegexp())
|
||||
}
|
||||
|
||||
/**
|
||||
* This helper predicate serves to deduplicate the results of the preceding predicates. This
|
||||
* means that if, say, an attribute and a function parameter have the same name, then that name will
|
||||
* only be matched once, which greatly cuts down on the number of regexp matches that have to be
|
||||
* performed.
|
||||
*
|
||||
* Under normal circumstances, deduplication is only performed when a predicate is materialized, and
|
||||
* so to see the effect of this we must create a separate predicate that calculates the union of the
|
||||
* preceding predicates.
|
||||
*/
|
||||
pragma[nomagic]
|
||||
private string sensitiveStringCandidate() {
|
||||
result in [
|
||||
sensitiveNameCandidate(), sensitiveAttributeNameCandidate(),
|
||||
sensitiveParameterNameCandidate(), sensitiveFunctionNameCandidate(),
|
||||
sensitiveStrConstCandidate()
|
||||
]
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns strings (primarily the names of various program entities) that may contain sensitive data
|
||||
* with the classification `classification`.
|
||||
*
|
||||
* This helper predicate ends up being very similar to `nameIndicatesSensitiveData`,
|
||||
* but is performance optimized to limit the number of regexp matches that have to be performed.
|
||||
*/
|
||||
pragma[nomagic]
|
||||
private string sensitiveString(SensitiveDataClassification classification) {
|
||||
result = sensitiveStringCandidate() and
|
||||
result.regexpMatch(maybeSensitiveRegexp(classification))
|
||||
}
|
||||
|
||||
/**
|
||||
* Any kind of variable assignment (also including with/for) where the name indicates
|
||||
* it contains sensitive data.
|
||||
*
|
||||
* Note: We _could_ make any access to a variable with a sensitive name a source of
|
||||
* sensitive data, but to make path explanations in data-flow/taint-tracking good,
|
||||
* we don't want that, since it works against allowing users to understand the flow
|
||||
* in the program (which is the whole point).
|
||||
*
|
||||
* Note: To make data-flow/taint-tracking work, the expression that is _assigned_ to
|
||||
* the variable is marked as the source (as compared to marking the variable as the
|
||||
* source).
|
||||
*/
|
||||
class SensitiveVariableAssignment extends SensitiveDataSource::Range {
|
||||
SensitiveDataClassification classification;
|
||||
|
||||
SensitiveVariableAssignment() {
|
||||
exists(DefinitionNode def |
|
||||
def.(NameNode).getId() = sensitiveString(classification) and
|
||||
(
|
||||
this.asCfgNode() = def.getValue()
|
||||
or
|
||||
this.asCfgNode() = def.getValue().(ForNode).getSequence()
|
||||
) and
|
||||
not this.asExpr() instanceof FunctionExpr and
|
||||
not this.asExpr() instanceof ClassExpr
|
||||
)
|
||||
or
|
||||
exists(With with |
|
||||
with.getOptionalVars().(Name).getId() = sensitiveString(classification) and
|
||||
this.asExpr() = with.getContextExpr()
|
||||
)
|
||||
}
|
||||
|
||||
override SensitiveDataClassification getClassification() { result = classification }
|
||||
}
|
||||
|
||||
/** An attribute access that is considered a source of sensitive data. */
|
||||
class SensitiveAttributeAccess extends SensitiveDataSource::Range {
|
||||
SensitiveDataClassification classification;
|
||||
|
||||
SensitiveAttributeAccess() {
|
||||
// Things like `foo.<sensitive-name>` or `from <module> import <sensitive-name>`
|
||||
// I considered excluding any `from ... import something_sensitive`, but then realized that
|
||||
// we should flag up `form ... import password as ...` as a password
|
||||
this.(DataFlow::AttrRead).getAttributeName() = sensitiveString(classification)
|
||||
or
|
||||
// Things like `getattr(foo, <reference-to-string>)`
|
||||
this.(DataFlow::AttrRead).getAttributeNameExpr() = sensitiveLookupStringConst(classification)
|
||||
}
|
||||
|
||||
override SensitiveDataClassification getClassification() { result = classification }
|
||||
}
|
||||
|
||||
/** A subscript, where the key indicates the result will be sensitive data. */
|
||||
class SensitiveSubscript extends SensitiveDataSource::Range {
|
||||
SensitiveDataClassification classification;
|
||||
|
||||
SensitiveSubscript() {
|
||||
this.asCfgNode().(SubscriptNode).getIndex() =
|
||||
sensitiveLookupStringConst(classification).asCfgNode()
|
||||
}
|
||||
|
||||
override SensitiveDataClassification getClassification() { result = classification }
|
||||
}
|
||||
|
||||
/** A call to `get` on an object, where the key indicates the result will be sensitive data. */
|
||||
class SensitiveGetCall extends SensitiveDataSource::Range, DataFlow::CallCfgNode {
|
||||
SensitiveDataClassification classification;
|
||||
|
||||
SensitiveGetCall() {
|
||||
this.getFunction().(DataFlow::AttrRef).getAttributeName() = "get" and
|
||||
this.getArg(0) = sensitiveLookupStringConst(classification)
|
||||
}
|
||||
|
||||
override SensitiveDataClassification getClassification() { result = classification }
|
||||
}
|
||||
|
||||
/** A parameter where the name indicates it will receive sensitive data. */
|
||||
class SensitiveParameter extends SensitiveDataSource::Range, DataFlow::ParameterNode {
|
||||
SensitiveDataClassification classification;
|
||||
|
||||
SensitiveParameter() { this.getParameter().getName() = sensitiveString(classification) }
|
||||
|
||||
override SensitiveDataClassification getClassification() { result = classification }
|
||||
}
|
||||
}
|
||||
|
||||
predicate sensitiveDataExtraStepForCalls = SensitiveDataModeling::extraStepForCalls/2;
|
||||
@@ -1,19 +0,0 @@
|
||||
/**
|
||||
* Provides classes for performing local (intra-procedural) and
|
||||
* global (inter-procedural) taint-tracking analyses.
|
||||
*
|
||||
* To use global (interprocedural) taint tracking, extend the class
|
||||
* `TaintTracking::Configuration` as documented on that class. To use local
|
||||
* (intraprocedural) taint tracking, call `TaintTracking::localTaint` or
|
||||
* `TaintTracking::localTaintStep` with arguments of type `DataFlow::Node`.
|
||||
*/
|
||||
|
||||
private import python
|
||||
|
||||
/**
|
||||
* Provides classes for performing local (intra-procedural) and
|
||||
* global (inter-procedural) taint-tracking analyses.
|
||||
*/
|
||||
module TaintTracking {
|
||||
import internal.tainttracking1.TaintTrackingImpl
|
||||
}
|
||||
@@ -1,19 +0,0 @@
|
||||
/**
|
||||
* Provides classes for performing local (intra-procedural) and
|
||||
* global (inter-procedural) taint-tracking analyses.
|
||||
*
|
||||
* To use global (interprocedural) taint tracking, extend the class
|
||||
* `TaintTracking::Configuration` as documented on that class. To use local
|
||||
* (intraprocedural) taint tracking, call `TaintTracking::localTaint` or
|
||||
* `TaintTracking::localTaintStep` with arguments of type `DataFlow::Node`.
|
||||
*/
|
||||
|
||||
private import python
|
||||
|
||||
/**
|
||||
* Provides classes for performing local (intra-procedural) and
|
||||
* global (inter-procedural) taint-tracking analyses.
|
||||
*/
|
||||
module TaintTracking2 {
|
||||
import semmle.python.dataflow.new.internal.tainttracking2.TaintTrackingImpl
|
||||
}
|
||||
@@ -1,19 +0,0 @@
|
||||
/**
|
||||
* Provides classes for performing local (intra-procedural) and
|
||||
* global (inter-procedural) taint-tracking analyses.
|
||||
*
|
||||
* To use global (interprocedural) taint tracking, extend the class
|
||||
* `TaintTracking::Configuration` as documented on that class. To use local
|
||||
* (intraprocedural) taint tracking, call `TaintTracking::localTaint` or
|
||||
* `TaintTracking::localTaintStep` with arguments of type `DataFlow::Node`.
|
||||
*/
|
||||
|
||||
private import python
|
||||
|
||||
/**
|
||||
* Provides classes for performing local (intra-procedural) and
|
||||
* global (inter-procedural) taint-tracking analyses.
|
||||
*/
|
||||
module TaintTracking3 {
|
||||
import semmle.python.dataflow.new.internal.tainttracking3.TaintTrackingImpl
|
||||
}
|
||||
@@ -1,19 +0,0 @@
|
||||
/**
|
||||
* Provides classes for performing local (intra-procedural) and
|
||||
* global (inter-procedural) taint-tracking analyses.
|
||||
*
|
||||
* To use global (interprocedural) taint tracking, extend the class
|
||||
* `TaintTracking::Configuration` as documented on that class. To use local
|
||||
* (intraprocedural) taint tracking, call `TaintTracking::localTaint` or
|
||||
* `TaintTracking::localTaintStep` with arguments of type `DataFlow::Node`.
|
||||
*/
|
||||
|
||||
private import python
|
||||
|
||||
/**
|
||||
* Provides classes for performing local (intra-procedural) and
|
||||
* global (inter-procedural) taint-tracking analyses.
|
||||
*/
|
||||
module TaintTracking4 {
|
||||
import semmle.python.dataflow.new.internal.tainttracking4.TaintTrackingImpl
|
||||
}
|
||||
@@ -1,65 +0,0 @@
|
||||
/**
|
||||
* This file acts as a wrapper for `internal.TypeTracker`, exposing some of the functionality with
|
||||
* names that are more appropriate for Python.
|
||||
*/
|
||||
|
||||
private import python
|
||||
private import internal.TypeTracker as Internal
|
||||
|
||||
/** Any string that may appear as the name of an attribute or access path. */
|
||||
class AttributeName = Internal::ContentName;
|
||||
|
||||
/** Either an attribute name, or the empty string (representing no attribute). */
|
||||
class OptionalAttributeName = Internal::OptionalContentName;
|
||||
|
||||
/**
|
||||
* Summary of the steps needed to track a value to a given dataflow node.
|
||||
*
|
||||
* This can be used to track objects that implement a certain API in order to
|
||||
* recognize calls to that API. Note that type-tracking does not by itself provide a
|
||||
* source/sink relation, that is, it may determine that a node has a given type,
|
||||
* but it won't determine where that type came from.
|
||||
*
|
||||
* It is recommended that all uses of this type are written in the following form,
|
||||
* for tracking some type `myType`:
|
||||
* ```ql
|
||||
* DataFlow::TypeTrackingNode myType(DataFlow::TypeTracker t) {
|
||||
* t.start() and
|
||||
* result = < source of myType >
|
||||
* or
|
||||
* exists (DataFlow::TypeTracker t2 |
|
||||
* result = myType(t2).track(t2, t)
|
||||
* )
|
||||
* }
|
||||
*
|
||||
* DataFlow::LocalSourceNode myType() { myType(DataFlow::TypeTracker::end()) }
|
||||
* ```
|
||||
*
|
||||
* Instead of `result = myType(t2).track(t2, t)`, you can also use the equivalent
|
||||
* `t = t2.step(myType(t2), result)`. If you additionally want to track individual
|
||||
* intra-procedural steps, use `t = t2.smallstep(myCallback(t2), result)`.
|
||||
*/
|
||||
class TypeTracker extends Internal::TypeTracker {
|
||||
/**
|
||||
* Holds if this is the starting point of type tracking, and the value starts in the attribute named `attrName`.
|
||||
* The type tracking only ends after the attribute has been loaded.
|
||||
*/
|
||||
predicate startInAttr(string attrName) { this.startInContent(attrName) }
|
||||
|
||||
/**
|
||||
* INTERNAL. DO NOT USE.
|
||||
*
|
||||
* Gets the attribute associated with this type tracker.
|
||||
*/
|
||||
string getAttr() { result = this.getContent() }
|
||||
}
|
||||
|
||||
module TypeTracker = Internal::TypeTracker;
|
||||
|
||||
class StepSummary = Internal::StepSummary;
|
||||
|
||||
module StepSummary = Internal::StepSummary;
|
||||
|
||||
class TypeBackTracker = Internal::TypeBackTracker;
|
||||
|
||||
module TypeBackTracker = Internal::TypeBackTracker;
|
||||
@@ -1,252 +0,0 @@
|
||||
/** This module provides an API for attribute reads and writes. */
|
||||
|
||||
import DataFlowUtil
|
||||
import DataFlowPublic
|
||||
private import DataFlowPrivate
|
||||
private import semmle.python.types.Builtins
|
||||
|
||||
/**
|
||||
* A data flow node that reads or writes an attribute of an object.
|
||||
*
|
||||
* This abstract base class only knows about the base object on which the attribute is being
|
||||
* accessed, and the attribute itself, if it is statically inferrable.
|
||||
*/
|
||||
abstract class AttrRef extends Node {
|
||||
/**
|
||||
* Gets the data flow node corresponding to the object whose attribute is being read or written.
|
||||
*/
|
||||
abstract Node getObject();
|
||||
|
||||
/**
|
||||
* Holds if this data flow node accesses attribute named `attrName` on object `object`.
|
||||
*/
|
||||
predicate accesses(Node object, string attrName) {
|
||||
this.getObject() = object and this.getAttributeName() = attrName
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the expression node that defines the attribute being accessed, if any. This is
|
||||
* usually an identifier or literal.
|
||||
*/
|
||||
abstract ExprNode getAttributeNameExpr();
|
||||
|
||||
/**
|
||||
* Holds if this attribute reference may access an attribute named `attrName`.
|
||||
* Uses local data flow to track potential attribute names, which may lead to imprecision. If more
|
||||
* precision is needed, consider using `getAttributeName` instead.
|
||||
*/
|
||||
predicate mayHaveAttributeName(string attrName) {
|
||||
attrName = this.getAttributeName()
|
||||
or
|
||||
exists(LocalSourceNode nodeFrom |
|
||||
nodeFrom.flowsTo(this.getAttributeNameExpr()) and
|
||||
attrName = nodeFrom.asExpr().(StrConst).getText()
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the name of the attribute being read or written. For dynamic attribute accesses, this
|
||||
* method is not guaranteed to return a result. For such cases, using `mayHaveAttributeName` may yield
|
||||
* better results.
|
||||
*/
|
||||
abstract string getAttributeName();
|
||||
}
|
||||
|
||||
/**
|
||||
* A data flow node that writes an attribute of an object. This includes
|
||||
* - Simple attribute writes: `object.attr = value`
|
||||
* - Dynamic attribute writes: `setattr(object, attr, value)`
|
||||
* - Fields written during class initialization: `class MyClass: attr = value`
|
||||
*/
|
||||
abstract class AttrWrite extends AttrRef {
|
||||
/** Gets the data flow node corresponding to the value that is written to the attribute. */
|
||||
abstract Node getValue();
|
||||
}
|
||||
|
||||
/**
|
||||
* Represents a control flow node for a simple attribute assignment. That is,
|
||||
* ```python
|
||||
* object.attr = value
|
||||
* ```
|
||||
* Also gives access to the `value` being written, by extending `DefinitionNode`.
|
||||
*/
|
||||
private class AttributeAssignmentNode extends DefinitionNode, AttrNode {
|
||||
override ControlFlowNode getValue() { result = DefinitionNode.super.getValue() }
|
||||
}
|
||||
|
||||
/** A simple attribute assignment: `object.attr = value`. */
|
||||
private class AttributeAssignmentAsAttrWrite extends AttrWrite, CfgNode {
|
||||
override AttributeAssignmentNode node;
|
||||
|
||||
override Node getValue() { result.asCfgNode() = node.getValue() }
|
||||
|
||||
override Node getObject() { result.asCfgNode() = node.getObject() }
|
||||
|
||||
override ExprNode getAttributeNameExpr() {
|
||||
// Attribute names don't exist as `Node`s in the control flow graph, as they can only ever be
|
||||
// identifiers, and are therefore represented directly as strings.
|
||||
// Use `getAttributeName` to access the name of the attribute.
|
||||
none()
|
||||
}
|
||||
|
||||
override string getAttributeName() { result = node.getName() }
|
||||
}
|
||||
|
||||
/** Represents `CallNode`s that may refer to calls to built-in functions or classes. */
|
||||
private class BuiltInCallNode extends CallNode {
|
||||
string name;
|
||||
|
||||
BuiltInCallNode() {
|
||||
// TODO disallow instances where the name of the built-in may refer to an in-scope variable of that name.
|
||||
exists(NameNode id | this.getFunction() = id and id.getId() = name and id.isGlobal()) and
|
||||
name = any(Builtin b).getName()
|
||||
}
|
||||
|
||||
/** Gets the name of the built-in function that is called at this `CallNode` */
|
||||
string getBuiltinName() { result = name }
|
||||
}
|
||||
|
||||
/**
|
||||
* Represents a call to the built-ins that handle dynamic inspection and modification of
|
||||
* attributes: `getattr`, `setattr`, `hasattr`, and `delattr`.
|
||||
*/
|
||||
private class BuiltinAttrCallNode extends BuiltInCallNode {
|
||||
BuiltinAttrCallNode() { name in ["setattr", "getattr", "hasattr", "delattr"] }
|
||||
|
||||
/** Gets the control flow node for object on which the attribute is accessed. */
|
||||
ControlFlowNode getObject() { result in [this.getArg(0), this.getArgByName("object")] }
|
||||
|
||||
/**
|
||||
* Gets the control flow node for the value that is being written to the attribute.
|
||||
* Only relevant for `setattr` calls.
|
||||
*/
|
||||
ControlFlowNode getValue() {
|
||||
// only valid for `setattr`
|
||||
name = "setattr" and
|
||||
result in [this.getArg(2), this.getArgByName("value")]
|
||||
}
|
||||
|
||||
/** Gets the control flow node that defines the name of the attribute being accessed. */
|
||||
ControlFlowNode getName() { result in [this.getArg(1), this.getArgByName("name")] }
|
||||
}
|
||||
|
||||
/** Represents calls to the built-in `setattr`. */
|
||||
private class SetAttrCallNode extends BuiltinAttrCallNode {
|
||||
SetAttrCallNode() { name = "setattr" }
|
||||
}
|
||||
|
||||
/** Represents calls to the built-in `getattr`. */
|
||||
private class GetAttrCallNode extends BuiltinAttrCallNode {
|
||||
GetAttrCallNode() { name = "getattr" }
|
||||
}
|
||||
|
||||
/** An attribute assignment using `setattr`, e.g. `setattr(object, attr, value)` */
|
||||
private class SetAttrCallAsAttrWrite extends AttrWrite, CfgNode {
|
||||
override SetAttrCallNode node;
|
||||
|
||||
override Node getValue() { result.asCfgNode() = node.getValue() }
|
||||
|
||||
override Node getObject() { result.asCfgNode() = node.getObject() }
|
||||
|
||||
override ExprNode getAttributeNameExpr() { result.asCfgNode() = node.getName() }
|
||||
|
||||
override string getAttributeName() {
|
||||
result = this.getAttributeNameExpr().asExpr().(StrConst).getText()
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Represents an attribute of a class that is assigned statically during class definition. For instance
|
||||
* ```python
|
||||
* class MyClass:
|
||||
* attr = value
|
||||
* ...
|
||||
* ```
|
||||
* Instances of this class correspond to the `NameNode` for `attr`, and also gives access to `value` by
|
||||
* virtue of being a `DefinitionNode`.
|
||||
*/
|
||||
private class ClassAttributeAssignmentNode extends DefinitionNode, NameNode {
|
||||
ClassAttributeAssignmentNode() { this.getScope() = any(ClassExpr c).getInnerScope() }
|
||||
}
|
||||
|
||||
/**
|
||||
* An attribute assignment via a class field, e.g.
|
||||
* ```python
|
||||
* class MyClass:
|
||||
* attr = value
|
||||
* ```
|
||||
* is treated as equivalent to `MyClass.attr = value`.
|
||||
*/
|
||||
private class ClassDefinitionAsAttrWrite extends AttrWrite, CfgNode {
|
||||
ClassExpr cls;
|
||||
override ClassAttributeAssignmentNode node;
|
||||
|
||||
ClassDefinitionAsAttrWrite() { node.getScope() = cls.getInnerScope() }
|
||||
|
||||
override Node getValue() { result.asCfgNode() = node.getValue() }
|
||||
|
||||
override Node getObject() { result.asCfgNode() = cls.getAFlowNode() }
|
||||
|
||||
override ExprNode getAttributeNameExpr() { none() }
|
||||
|
||||
override string getAttributeName() { result = node.getId() }
|
||||
}
|
||||
|
||||
/**
|
||||
* A read of an attribute on an object. This includes
|
||||
* - Simple attribute reads: `object.attr`
|
||||
* - Dynamic attribute reads using `getattr`: `getattr(object, attr)`
|
||||
* - Qualified imports: `from module import attr as name`
|
||||
*/
|
||||
abstract class AttrRead extends AttrRef, Node, LocalSourceNode { }
|
||||
|
||||
/** A simple attribute read, e.g. `object.attr` */
|
||||
private class AttributeReadAsAttrRead extends AttrRead, CfgNode {
|
||||
override AttrNode node;
|
||||
|
||||
override Node getObject() { result.asCfgNode() = node.getObject() }
|
||||
|
||||
override ExprNode getAttributeNameExpr() {
|
||||
// Attribute names don't exist as `Node`s in the control flow graph, as they can only ever be
|
||||
// identifiers, and are therefore represented directly as strings.
|
||||
// Use `getAttributeName` to access the name of the attribute.
|
||||
none()
|
||||
}
|
||||
|
||||
override string getAttributeName() { result = node.getName() }
|
||||
}
|
||||
|
||||
/** An attribute read using `getattr`: `getattr(object, attr)` */
|
||||
private class GetAttrCallAsAttrRead extends AttrRead, CfgNode {
|
||||
override GetAttrCallNode node;
|
||||
|
||||
override Node getObject() { result.asCfgNode() = node.getObject() }
|
||||
|
||||
override ExprNode getAttributeNameExpr() { result.asCfgNode() = node.getName() }
|
||||
|
||||
override string getAttributeName() {
|
||||
result = this.getAttributeNameExpr().asExpr().(StrConst).getText()
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Represents a named import as an attribute read. That is,
|
||||
* ```python
|
||||
* from module import attr as attr_ref
|
||||
* ```
|
||||
* is treated as if it is a read of the attribute `module.attr`, even if `module` is not imported directly.
|
||||
*/
|
||||
private class ModuleAttributeImportAsAttrRead extends AttrRead, CfgNode {
|
||||
override ImportMemberNode node;
|
||||
|
||||
override Node getObject() { result.asCfgNode() = node.getModule(_) }
|
||||
|
||||
override ExprNode getAttributeNameExpr() {
|
||||
// The name of an imported attribute doesn't exist as a `Node` in the control flow graph, as it
|
||||
// can only ever be an identifier, and is therefore represented directly as a string.
|
||||
// Use `getAttributeName` to access the name of the attribute.
|
||||
none()
|
||||
}
|
||||
|
||||
override string getAttributeName() { exists(node.getModule(result)) }
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -1,181 +0,0 @@
|
||||
/**
|
||||
* Provides consistency queries for checking invariants in the language-specific
|
||||
* data-flow classes and predicates.
|
||||
*/
|
||||
|
||||
private import DataFlowImplSpecific::Private
|
||||
private import DataFlowImplSpecific::Public
|
||||
private import tainttracking1.TaintTrackingParameter::Private
|
||||
private import tainttracking1.TaintTrackingParameter::Public
|
||||
|
||||
module Consistency {
|
||||
private class RelevantNode extends Node {
|
||||
RelevantNode() {
|
||||
this instanceof ArgumentNode or
|
||||
this instanceof ParameterNode or
|
||||
this instanceof ReturnNode or
|
||||
this = getAnOutNode(_, _) or
|
||||
simpleLocalFlowStep(this, _) or
|
||||
simpleLocalFlowStep(_, this) or
|
||||
jumpStep(this, _) or
|
||||
jumpStep(_, this) or
|
||||
storeStep(this, _, _) or
|
||||
storeStep(_, _, this) or
|
||||
readStep(this, _, _) or
|
||||
readStep(_, _, this) or
|
||||
defaultAdditionalTaintStep(this, _) or
|
||||
defaultAdditionalTaintStep(_, this)
|
||||
}
|
||||
}
|
||||
|
||||
query predicate uniqueEnclosingCallable(Node n, string msg) {
|
||||
exists(int c |
|
||||
n instanceof RelevantNode and
|
||||
c = count(n.getEnclosingCallable()) and
|
||||
c != 1 and
|
||||
msg = "Node should have one enclosing callable but has " + c + "."
|
||||
)
|
||||
}
|
||||
|
||||
query predicate uniqueType(Node n, string msg) {
|
||||
exists(int c |
|
||||
n instanceof RelevantNode and
|
||||
c = count(getNodeType(n)) and
|
||||
c != 1 and
|
||||
msg = "Node should have one type but has " + c + "."
|
||||
)
|
||||
}
|
||||
|
||||
query predicate uniqueNodeLocation(Node n, string msg) {
|
||||
exists(int c |
|
||||
c =
|
||||
count(string filepath, int startline, int startcolumn, int endline, int endcolumn |
|
||||
n.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
|
||||
) and
|
||||
c != 1 and
|
||||
msg = "Node should have one location but has " + c + "."
|
||||
)
|
||||
}
|
||||
|
||||
query predicate missingLocation(string msg) {
|
||||
exists(int c |
|
||||
c =
|
||||
strictcount(Node n |
|
||||
not exists(string filepath, int startline, int startcolumn, int endline, int endcolumn |
|
||||
n.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
|
||||
)
|
||||
) and
|
||||
msg = "Nodes without location: " + c
|
||||
)
|
||||
}
|
||||
|
||||
query predicate uniqueNodeToString(Node n, string msg) {
|
||||
exists(int c |
|
||||
c = count(n.toString()) and
|
||||
c != 1 and
|
||||
msg = "Node should have one toString but has " + c + "."
|
||||
)
|
||||
}
|
||||
|
||||
query predicate missingToString(string msg) {
|
||||
exists(int c |
|
||||
c = strictcount(Node n | not exists(n.toString())) and
|
||||
msg = "Nodes without toString: " + c
|
||||
)
|
||||
}
|
||||
|
||||
query predicate parameterCallable(ParameterNode p, string msg) {
|
||||
exists(DataFlowCallable c | p.isParameterOf(c, _) and c != p.getEnclosingCallable()) and
|
||||
msg = "Callable mismatch for parameter."
|
||||
}
|
||||
|
||||
query predicate localFlowIsLocal(Node n1, Node n2, string msg) {
|
||||
simpleLocalFlowStep(n1, n2) and
|
||||
n1.getEnclosingCallable() != n2.getEnclosingCallable() and
|
||||
msg = "Local flow step does not preserve enclosing callable."
|
||||
}
|
||||
|
||||
private DataFlowType typeRepr() { result = getNodeType(_) }
|
||||
|
||||
query predicate compatibleTypesReflexive(DataFlowType t, string msg) {
|
||||
t = typeRepr() and
|
||||
not compatibleTypes(t, t) and
|
||||
msg = "Type compatibility predicate is not reflexive."
|
||||
}
|
||||
|
||||
query predicate unreachableNodeCCtx(Node n, DataFlowCall call, string msg) {
|
||||
isUnreachableInCall(n, call) and
|
||||
exists(DataFlowCallable c |
|
||||
c = n.getEnclosingCallable() and
|
||||
not viableCallable(call) = c
|
||||
) and
|
||||
msg = "Call context for isUnreachableInCall is inconsistent with call graph."
|
||||
}
|
||||
|
||||
query predicate localCallNodes(DataFlowCall call, Node n, string msg) {
|
||||
(
|
||||
n = getAnOutNode(call, _) and
|
||||
msg = "OutNode and call does not share enclosing callable."
|
||||
or
|
||||
n.(ArgumentNode).argumentOf(call, _) and
|
||||
msg = "ArgumentNode and call does not share enclosing callable."
|
||||
) and
|
||||
n.getEnclosingCallable() != call.getEnclosingCallable()
|
||||
}
|
||||
|
||||
// This predicate helps the compiler forget that in some languages
|
||||
// it is impossible for a result of `getPreUpdateNode` to be an
|
||||
// instance of `PostUpdateNode`.
|
||||
private Node getPre(PostUpdateNode n) {
|
||||
result = n.getPreUpdateNode()
|
||||
or
|
||||
none()
|
||||
}
|
||||
|
||||
query predicate postIsNotPre(PostUpdateNode n, string msg) {
|
||||
getPre(n) = n and
|
||||
msg = "PostUpdateNode should not equal its pre-update node."
|
||||
}
|
||||
|
||||
query predicate postHasUniquePre(PostUpdateNode n, string msg) {
|
||||
exists(int c |
|
||||
c = count(n.getPreUpdateNode()) and
|
||||
c != 1 and
|
||||
msg = "PostUpdateNode should have one pre-update node but has " + c + "."
|
||||
)
|
||||
}
|
||||
|
||||
query predicate uniquePostUpdate(Node n, string msg) {
|
||||
1 < strictcount(PostUpdateNode post | post.getPreUpdateNode() = n) and
|
||||
msg = "Node has multiple PostUpdateNodes."
|
||||
}
|
||||
|
||||
query predicate postIsInSameCallable(PostUpdateNode n, string msg) {
|
||||
n.getEnclosingCallable() != n.getPreUpdateNode().getEnclosingCallable() and
|
||||
msg = "PostUpdateNode does not share callable with its pre-update node."
|
||||
}
|
||||
|
||||
private predicate hasPost(Node n) { exists(PostUpdateNode post | post.getPreUpdateNode() = n) }
|
||||
|
||||
query predicate reverseRead(Node n, string msg) {
|
||||
exists(Node n2 | readStep(n, _, n2) and hasPost(n2) and not hasPost(n)) and
|
||||
msg = "Origin of readStep is missing a PostUpdateNode."
|
||||
}
|
||||
|
||||
query predicate argHasPostUpdate(ArgumentNode n, string msg) {
|
||||
not hasPost(n) and
|
||||
not isImmutableOrUnobservable(n) and
|
||||
msg = "ArgumentNode is missing PostUpdateNode."
|
||||
}
|
||||
|
||||
// This predicate helps the compiler forget that in some languages
|
||||
// it is impossible for a `PostUpdateNode` to be the target of
|
||||
// `simpleLocalFlowStep`.
|
||||
private predicate isPostUpdateNode(Node n) { n instanceof PostUpdateNode or none() }
|
||||
|
||||
query predicate postWithInFlow(Node n, string msg) {
|
||||
isPostUpdateNode(n) and
|
||||
simpleLocalFlowStep(_, n) and
|
||||
msg = "PostUpdateNode should not be the target of local flow."
|
||||
}
|
||||
}
|
||||
@@ -1,12 +0,0 @@
|
||||
/**
|
||||
* Provides Python-specific definitions for use in the data flow library.
|
||||
*/
|
||||
module Private {
|
||||
import DataFlowPrivate
|
||||
// import DataFlowDispatch
|
||||
}
|
||||
|
||||
module Public {
|
||||
import DataFlowPublic
|
||||
import DataFlowUtil
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,583 +0,0 @@
|
||||
/**
|
||||
* Provides Python-specific definitions for use in the data flow library.
|
||||
*/
|
||||
|
||||
private import python
|
||||
private import DataFlowPrivate
|
||||
import semmle.python.dataflow.new.TypeTracker
|
||||
import Attributes
|
||||
import LocalSources
|
||||
private import semmle.python.essa.SsaCompute
|
||||
|
||||
/**
|
||||
* IPA type for data flow nodes.
|
||||
*
|
||||
* Flow between SSA variables are computed in `Essa.qll`
|
||||
*
|
||||
* Flow from SSA variables to control flow nodes are generally via uses.
|
||||
*
|
||||
* Flow from control flow nodes to SSA variables are generally via assignments.
|
||||
*
|
||||
* The current implementation of these cross flows can be seen in `EssaTaintTracking`.
|
||||
*/
|
||||
newtype TNode =
|
||||
/** A node corresponding to an SSA variable. */
|
||||
TEssaNode(EssaVariable var) or
|
||||
/** A node corresponding to a control flow node. */
|
||||
TCfgNode(ControlFlowNode node) { isExpressionNode(node) } or
|
||||
/** A synthetic node representing the value of an object before a state change */
|
||||
TSyntheticPreUpdateNode(NeedsSyntheticPreUpdateNode post) or
|
||||
/** A synthetic node representing the value of an object after a state change. */
|
||||
TSyntheticPostUpdateNode(NeedsSyntheticPostUpdateNode pre) or
|
||||
/** A node representing a global (module-level) variable in a specific module. */
|
||||
TModuleVariableNode(Module m, GlobalVariable v) { v.getScope() = m and v.escapes() } or
|
||||
/**
|
||||
* A node representing the overflow positional arguments to a call.
|
||||
* That is, `call` contains more positional arguments than there are
|
||||
* positional parameters in `callable`. The extra ones are passed as
|
||||
* a tuple to a starred parameter; this synthetic node represents that tuple.
|
||||
*/
|
||||
TPosOverflowNode(CallNode call, CallableValue callable) {
|
||||
exists(getPositionalOverflowArg(call, callable, _))
|
||||
} or
|
||||
/**
|
||||
* A node representing the overflow keyword arguments to a call.
|
||||
* That is, `call` contains keyword arguments for keys that do not have
|
||||
* keyword parameters in `callable`. These extra ones are passed as
|
||||
* a dictionary to a doubly starred parameter; this synthetic node
|
||||
* represents that dictionary.
|
||||
*/
|
||||
TKwOverflowNode(CallNode call, CallableValue callable) {
|
||||
exists(getKeywordOverflowArg(call, callable, _))
|
||||
or
|
||||
ArgumentPassing::connects(call, callable) and
|
||||
exists(call.getNode().getKwargs()) and
|
||||
callable.getScope().hasKwArg()
|
||||
} or
|
||||
/**
|
||||
* A node representing an unpacked element of a dictionary argument.
|
||||
* That is, `call` contains argument `**{"foo": bar}` which is passed
|
||||
* to parameter `foo` of `callable`.
|
||||
*/
|
||||
TKwUnpackedNode(CallNode call, CallableValue callable, string name) {
|
||||
call_unpacks(call, _, callable, name, _)
|
||||
} or
|
||||
/**
|
||||
* A synthetic node representing that an iterable sequence flows to consumer.
|
||||
*/
|
||||
TIterableSequenceNode(UnpackingAssignmentSequenceTarget consumer) or
|
||||
/**
|
||||
* A synthetic node representing that there may be an iterable element
|
||||
* for `consumer` to consume.
|
||||
*/
|
||||
TIterableElementNode(UnpackingAssignmentTarget consumer)
|
||||
|
||||
/** Helper for `Node::getEnclosingCallable`. */
|
||||
private DataFlowCallable getCallableScope(Scope s) {
|
||||
result.getScope() = s
|
||||
or
|
||||
not exists(DataFlowCallable c | c.getScope() = s) and
|
||||
result = getCallableScope(s.getEnclosingScope())
|
||||
}
|
||||
|
||||
/**
|
||||
* An element, viewed as a node in a data flow graph. Either an SSA variable
|
||||
* (`EssaNode`) or a control flow node (`CfgNode`).
|
||||
*/
|
||||
class Node extends TNode {
|
||||
/** Gets a textual representation of this element. */
|
||||
string toString() { result = "Data flow node" }
|
||||
|
||||
/** Gets the scope of this node. */
|
||||
Scope getScope() { none() }
|
||||
|
||||
/** Gets the enclosing callable of this node. */
|
||||
DataFlowCallable getEnclosingCallable() { result = getCallableScope(this.getScope()) }
|
||||
|
||||
/** Gets the location of this node */
|
||||
Location getLocation() { none() }
|
||||
|
||||
/**
|
||||
* Holds if this element is at the specified location.
|
||||
* The location spans column `startcolumn` of line `startline` to
|
||||
* column `endcolumn` of line `endline` in file `filepath`.
|
||||
* For more information, see
|
||||
* [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html).
|
||||
*/
|
||||
predicate hasLocationInfo(
|
||||
string filepath, int startline, int startcolumn, int endline, int endcolumn
|
||||
) {
|
||||
this.getLocation().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
|
||||
}
|
||||
|
||||
/** Gets the ESSA variable corresponding to this node, if any. */
|
||||
EssaVariable asVar() { none() }
|
||||
|
||||
/** Gets the control-flow node corresponding to this node, if any. */
|
||||
ControlFlowNode asCfgNode() { none() }
|
||||
|
||||
/** Gets the expression corresponding to this node, if any. */
|
||||
Expr asExpr() { none() }
|
||||
|
||||
/**
|
||||
* Gets a local source node from which data may flow to this node in zero or more local data-flow steps.
|
||||
*/
|
||||
LocalSourceNode getALocalSource() { result.flowsTo(this) }
|
||||
}
|
||||
|
||||
/** A data-flow node corresponding to an SSA variable. */
|
||||
class EssaNode extends Node, TEssaNode {
|
||||
EssaVariable var;
|
||||
|
||||
EssaNode() { this = TEssaNode(var) }
|
||||
|
||||
/** Gets the `EssaVariable` represented by this data-flow node. */
|
||||
EssaVariable getVar() { result = var }
|
||||
|
||||
override EssaVariable asVar() { result = var }
|
||||
|
||||
/** Gets a textual representation of this element. */
|
||||
override string toString() { result = var.toString() }
|
||||
|
||||
override Scope getScope() { result = var.getScope() }
|
||||
|
||||
override Location getLocation() { result = var.getLocation() }
|
||||
}
|
||||
|
||||
/** A data-flow node corresponding to a control-flow node. */
|
||||
class CfgNode extends Node, TCfgNode {
|
||||
ControlFlowNode node;
|
||||
|
||||
CfgNode() { this = TCfgNode(node) }
|
||||
|
||||
/** Gets the `ControlFlowNode` represented by this data-flow node. */
|
||||
ControlFlowNode getNode() { result = node }
|
||||
|
||||
override ControlFlowNode asCfgNode() { result = node }
|
||||
|
||||
/** Gets a textual representation of this element. */
|
||||
override string toString() { result = node.toString() }
|
||||
|
||||
override Scope getScope() { result = node.getScope() }
|
||||
|
||||
override Location getLocation() { result = node.getLocation() }
|
||||
}
|
||||
|
||||
/** A data-flow node corresponding to a `CallNode` in the control-flow graph. */
|
||||
class CallCfgNode extends CfgNode, LocalSourceNode {
|
||||
override CallNode node;
|
||||
|
||||
/**
|
||||
* Gets the data-flow node for the function component of the call corresponding to this data-flow
|
||||
* node.
|
||||
*/
|
||||
Node getFunction() { result.asCfgNode() = node.getFunction() }
|
||||
|
||||
/** Gets the data-flow node corresponding to the i'th argument of the call corresponding to this data-flow node */
|
||||
Node getArg(int i) { result.asCfgNode() = node.getArg(i) }
|
||||
|
||||
/** Gets the data-flow node corresponding to the named argument of the call corresponding to this data-flow node */
|
||||
Node getArgByName(string name) { result.asCfgNode() = node.getArgByName(name) }
|
||||
}
|
||||
|
||||
/**
|
||||
* A data-flow node corresponding to a method call, that is `foo.bar(...)`.
|
||||
*
|
||||
* Also covers the case where the method lookup is done separately from the call itself, as in
|
||||
* `temp = foo.bar; temp(...)`. Note that this is only tracked through local scope.
|
||||
*/
|
||||
class MethodCallNode extends CallCfgNode {
|
||||
AttrRead method_lookup;
|
||||
|
||||
MethodCallNode() { method_lookup = this.getFunction().getALocalSource() }
|
||||
|
||||
/**
|
||||
* Gets the name of the method being invoked (the `bar` in `foo.bar(...)`) if it can be determined.
|
||||
*
|
||||
* Note that this method may have multiple results if a single call node represents calls to
|
||||
* multiple different objects and methods. If you want to link up objects and method names
|
||||
* accurately, use the `calls` method instead.
|
||||
*/
|
||||
string getMethodName() { result = method_lookup.getAttributeName() }
|
||||
|
||||
/**
|
||||
* Gets the data-flow node corresponding to the object receiving this call. That is, the `foo` in
|
||||
* `foo.bar(...)`.
|
||||
*
|
||||
* Note that this method may have multiple results if a single call node represents calls to
|
||||
* multiple different objects and methods. If you want to link up objects and method names
|
||||
* accurately, use the `calls` method instead.
|
||||
*/
|
||||
Node getObject() { result = method_lookup.getObject() }
|
||||
|
||||
/** Holds if this data-flow node calls method `methodName` on the object node `object`. */
|
||||
predicate calls(Node object, string methodName) {
|
||||
// As `getObject` and `getMethodName` may both have multiple results, we must look up the object
|
||||
// and method name directly on `method_lookup`.
|
||||
object = method_lookup.getObject() and
|
||||
methodName = method_lookup.getAttributeName()
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* An expression, viewed as a node in a data flow graph.
|
||||
*
|
||||
* Note that because of control-flow splitting, one `Expr` may correspond
|
||||
* to multiple `ExprNode`s, just like it may correspond to multiple
|
||||
* `ControlFlow::Node`s.
|
||||
*/
|
||||
class ExprNode extends CfgNode {
|
||||
ExprNode() { isExpressionNode(node) }
|
||||
|
||||
override Expr asExpr() { result = node.getNode() }
|
||||
}
|
||||
|
||||
/** Gets a node corresponding to expression `e`. */
|
||||
ExprNode exprNode(DataFlowExpr e) { result.getNode().getNode() = e }
|
||||
|
||||
/**
|
||||
* The value of a parameter at function entry, viewed as a node in a data
|
||||
* flow graph.
|
||||
*/
|
||||
class ParameterNode extends CfgNode, LocalSourceNode {
|
||||
ParameterDefinition def;
|
||||
|
||||
ParameterNode() {
|
||||
node = def.getDefiningNode() and
|
||||
// Disregard parameters that we cannot resolve
|
||||
// TODO: Make this unnecessary
|
||||
exists(DataFlowCallable c | node = c.getParameter(_))
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if this node is the parameter of callable `c` at the
|
||||
* (zero-based) index `i`.
|
||||
*/
|
||||
predicate isParameterOf(DataFlowCallable c, int i) { node = c.getParameter(i) }
|
||||
|
||||
override DataFlowCallable getEnclosingCallable() { this.isParameterOf(result, _) }
|
||||
|
||||
/** Gets the `Parameter` this `ParameterNode` represents. */
|
||||
Parameter getParameter() { result = def.getParameter() }
|
||||
}
|
||||
|
||||
/** Gets a node corresponding to parameter `p`. */
|
||||
ParameterNode parameterNode(Parameter p) { result.getParameter() = p }
|
||||
|
||||
/** A data flow node that represents a call argument. */
|
||||
class ArgumentNode extends Node {
|
||||
ArgumentNode() { this = any(DataFlowCall c).getArg(_) }
|
||||
|
||||
/** Holds if this argument occurs at the given position in the given call. */
|
||||
predicate argumentOf(DataFlowCall call, int pos) { this = call.getArg(pos) }
|
||||
|
||||
/** Gets the call in which this node is an argument. */
|
||||
final DataFlowCall getCall() { this.argumentOf(result, _) }
|
||||
}
|
||||
|
||||
/**
|
||||
* A node associated with an object after an operation that might have
|
||||
* changed its state.
|
||||
*
|
||||
* This can be either the argument to a callable after the callable returns
|
||||
* (which might have mutated the argument), or the qualifier of a field after
|
||||
* an update to the field.
|
||||
*
|
||||
* Nodes corresponding to AST elements, for example `ExprNode`s, usually refer
|
||||
* to the value before the update with the exception of `ObjectCreationNode`s,
|
||||
* which represents the value _after_ the constructor has run.
|
||||
*/
|
||||
abstract class PostUpdateNode extends Node {
|
||||
/** Gets the node before the state update. */
|
||||
abstract Node getPreUpdateNode();
|
||||
}
|
||||
|
||||
/**
|
||||
* A data flow node corresponding to a module-level (global) variable that is accessed outside of the module scope.
|
||||
*
|
||||
* Global variables may appear twice in the data flow graph, as both `EssaNode`s and
|
||||
* `ModuleVariableNode`s. The former is used to represent data flow between global variables as it
|
||||
* occurs during module initialization, and the latter is used to represent data flow via global
|
||||
* variable reads and writes during run-time.
|
||||
*
|
||||
* It is possible for data to flow from assignments made at module initialization time to reads made
|
||||
* at run-time, but not vice versa. For example, there will be flow from `SOURCE` to `SINK` in the
|
||||
* following snippet:
|
||||
*
|
||||
* ```python
|
||||
* g = SOURCE
|
||||
*
|
||||
* def foo():
|
||||
* SINK(g)
|
||||
* ```
|
||||
* but not the other way round:
|
||||
*
|
||||
* ```python
|
||||
* SINK(g)
|
||||
*
|
||||
* def bar()
|
||||
* global g
|
||||
* g = SOURCE
|
||||
* ```
|
||||
*
|
||||
* Data flow through `ModuleVariableNode`s is represented as `jumpStep`s, and so any write of a
|
||||
* global variable can flow to any read of the same variable.
|
||||
*/
|
||||
class ModuleVariableNode extends Node, TModuleVariableNode {
|
||||
Module mod;
|
||||
GlobalVariable var;
|
||||
|
||||
ModuleVariableNode() { this = TModuleVariableNode(mod, var) }
|
||||
|
||||
override Scope getScope() { result = mod }
|
||||
|
||||
override string toString() {
|
||||
result = "ModuleVariableNode for " + var.toString() + " in " + mod.toString()
|
||||
}
|
||||
|
||||
/** Gets the module in which this variable appears. */
|
||||
Module getModule() { result = mod }
|
||||
|
||||
/** Gets the global variable corresponding to this node. */
|
||||
GlobalVariable getVariable() { result = var }
|
||||
|
||||
/** Gets a node that reads this variable. */
|
||||
Node getARead() {
|
||||
result.asCfgNode() = var.getALoad().getAFlowNode() and
|
||||
// Ignore reads that happen when the module is imported. These are only executed once.
|
||||
not result.getScope() = mod
|
||||
}
|
||||
|
||||
/** Gets an `EssaNode` that corresponds to an assignment of this global variable. */
|
||||
EssaNode getAWrite() {
|
||||
result.asVar().getDefinition().(EssaNodeDefinition).definedBy(var, any(DefinitionNode defn))
|
||||
}
|
||||
|
||||
override DataFlowCallable getEnclosingCallable() { result.(DataFlowModuleScope).getScope() = mod }
|
||||
|
||||
override Location getLocation() { result = mod.getLocation() }
|
||||
}
|
||||
|
||||
/**
|
||||
* The node holding the extra positional arguments to a call. This node is passed as a tuple
|
||||
* to the starred parameter of the callable.
|
||||
*/
|
||||
class PosOverflowNode extends Node, TPosOverflowNode {
|
||||
CallNode call;
|
||||
|
||||
PosOverflowNode() { this = TPosOverflowNode(call, _) }
|
||||
|
||||
override string toString() { result = "PosOverflowNode for " + call.getNode().toString() }
|
||||
|
||||
override DataFlowCallable getEnclosingCallable() {
|
||||
exists(Node node |
|
||||
node = TCfgNode(call) and
|
||||
result = node.getEnclosingCallable()
|
||||
)
|
||||
}
|
||||
|
||||
override Location getLocation() { result = call.getLocation() }
|
||||
}
|
||||
|
||||
/**
|
||||
* The node holding the extra keyword arguments to a call. This node is passed as a dictionary
|
||||
* to the doubly starred parameter of the callable.
|
||||
*/
|
||||
class KwOverflowNode extends Node, TKwOverflowNode {
|
||||
CallNode call;
|
||||
|
||||
KwOverflowNode() { this = TKwOverflowNode(call, _) }
|
||||
|
||||
override string toString() { result = "KwOverflowNode for " + call.getNode().toString() }
|
||||
|
||||
override DataFlowCallable getEnclosingCallable() {
|
||||
exists(Node node |
|
||||
node = TCfgNode(call) and
|
||||
result = node.getEnclosingCallable()
|
||||
)
|
||||
}
|
||||
|
||||
override Location getLocation() { result = call.getLocation() }
|
||||
}
|
||||
|
||||
/**
|
||||
* The node representing the synthetic argument of a call that is unpacked from a dictionary
|
||||
* argument.
|
||||
*/
|
||||
class KwUnpackedNode extends Node, TKwUnpackedNode {
|
||||
CallNode call;
|
||||
string name;
|
||||
|
||||
KwUnpackedNode() { this = TKwUnpackedNode(call, _, name) }
|
||||
|
||||
override string toString() { result = "KwUnpacked " + name }
|
||||
|
||||
override DataFlowCallable getEnclosingCallable() {
|
||||
exists(Node node |
|
||||
node = TCfgNode(call) and
|
||||
result = node.getEnclosingCallable()
|
||||
)
|
||||
}
|
||||
|
||||
override Location getLocation() { result = call.getLocation() }
|
||||
}
|
||||
|
||||
/**
|
||||
* A synthetic node representing an iterable sequence. Used for changing content type
|
||||
* for instance from a `ListElement` to a `TupleElement`, especially if the content is
|
||||
* transferred via a read step which cannot be broken up into a read and a store. The
|
||||
* read step then targets TIterableSequence, and the conversion can happen via a read
|
||||
* step to TIterableElement followed by a store step to the target.
|
||||
*/
|
||||
class IterableSequenceNode extends Node, TIterableSequenceNode {
|
||||
CfgNode consumer;
|
||||
|
||||
IterableSequenceNode() { this = TIterableSequenceNode(consumer.getNode()) }
|
||||
|
||||
override string toString() { result = "IterableSequence" }
|
||||
|
||||
override DataFlowCallable getEnclosingCallable() { result = consumer.getEnclosingCallable() }
|
||||
|
||||
override Location getLocation() { result = consumer.getLocation() }
|
||||
}
|
||||
|
||||
/**
|
||||
* A synthetic node representing an iterable element. Used for changing content type
|
||||
* for instance from a `ListElement` to a `TupleElement`. This would happen via a
|
||||
* read step from the list to IterableElement followed by a store step to the tuple.
|
||||
*/
|
||||
class IterableElementNode extends Node, TIterableElementNode {
|
||||
CfgNode consumer;
|
||||
|
||||
IterableElementNode() { this = TIterableElementNode(consumer.getNode()) }
|
||||
|
||||
override string toString() { result = "IterableElement" }
|
||||
|
||||
override DataFlowCallable getEnclosingCallable() { result = consumer.getEnclosingCallable() }
|
||||
|
||||
override Location getLocation() { result = consumer.getLocation() }
|
||||
}
|
||||
|
||||
/**
|
||||
* A node that controls whether other nodes are evaluated.
|
||||
*/
|
||||
class GuardNode extends ControlFlowNode {
|
||||
ConditionBlock conditionBlock;
|
||||
|
||||
GuardNode() { this = conditionBlock.getLastNode() }
|
||||
|
||||
/** Holds if this guard controls block `b` upon evaluating to `branch`. */
|
||||
predicate controlsBlock(BasicBlock b, boolean branch) { conditionBlock.controls(b, branch) }
|
||||
}
|
||||
|
||||
/**
|
||||
* A guard that validates some expression.
|
||||
*
|
||||
* To use this in a configuration, extend the class and provide a
|
||||
* characteristic predicate precisely specifying the guard, and override
|
||||
* `checks` to specify what is being validated and in which branch.
|
||||
*
|
||||
* It is important that all extending classes in scope are disjoint.
|
||||
*/
|
||||
class BarrierGuard extends GuardNode {
|
||||
/** Holds if this guard validates `node` upon evaluating to `branch`. */
|
||||
abstract predicate checks(ControlFlowNode node, boolean branch);
|
||||
|
||||
/** Gets a node guarded by this guard. */
|
||||
final ExprNode getAGuardedNode() {
|
||||
exists(EssaDefinition def, ControlFlowNode node, boolean branch |
|
||||
AdjacentUses::useOfDef(def, node) and
|
||||
this.checks(node, branch) and
|
||||
AdjacentUses::useOfDef(def, result.asCfgNode()) and
|
||||
this.controlsBlock(result.asCfgNode().getBasicBlock(), branch)
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Algebraic datatype for tracking data content associated with values.
|
||||
* Content can be collection elements or object attributes.
|
||||
*/
|
||||
newtype TContent =
|
||||
/** An element of a list. */
|
||||
TListElementContent() or
|
||||
/** An element of a set. */
|
||||
TSetElementContent() or
|
||||
/** An element of a tuple at a specific index. */
|
||||
TTupleElementContent(int index) {
|
||||
exists(any(TupleNode tn).getElement(index))
|
||||
or
|
||||
// Arguments can overflow and end up in the starred parameter tuple.
|
||||
exists(any(CallNode cn).getArg(index))
|
||||
} or
|
||||
/** An element of a dictionary under a specific key. */
|
||||
TDictionaryElementContent(string key) {
|
||||
key = any(KeyValuePair kvp).getKey().(StrConst).getS()
|
||||
or
|
||||
key = any(Keyword kw).getArg()
|
||||
} or
|
||||
/** An element of a dictionary under any key. */
|
||||
TDictionaryElementAnyContent() or
|
||||
/** An object attribute. */
|
||||
TAttributeContent(string attr) { attr = any(Attribute a).getName() }
|
||||
|
||||
/**
|
||||
* A data-flow value can have associated content.
|
||||
* If the value is a collection, it can have elements,
|
||||
* if it is an object, it can have attribute values.
|
||||
*/
|
||||
class Content extends TContent {
|
||||
/** Gets a textual representation of this element. */
|
||||
string toString() { result = "Content" }
|
||||
}
|
||||
|
||||
/** An element of a list. */
|
||||
class ListElementContent extends TListElementContent, Content {
|
||||
override string toString() { result = "List element" }
|
||||
}
|
||||
|
||||
/** An element of a set. */
|
||||
class SetElementContent extends TSetElementContent, Content {
|
||||
override string toString() { result = "Set element" }
|
||||
}
|
||||
|
||||
/** An element of a tuple at a specific index. */
|
||||
class TupleElementContent extends TTupleElementContent, Content {
|
||||
int index;
|
||||
|
||||
TupleElementContent() { this = TTupleElementContent(index) }
|
||||
|
||||
/** Gets the index for this tuple element. */
|
||||
int getIndex() { result = index }
|
||||
|
||||
override string toString() { result = "Tuple element at index " + index.toString() }
|
||||
}
|
||||
|
||||
/** An element of a dictionary under a specific key. */
|
||||
class DictionaryElementContent extends TDictionaryElementContent, Content {
|
||||
string key;
|
||||
|
||||
DictionaryElementContent() { this = TDictionaryElementContent(key) }
|
||||
|
||||
/** Gets the key for this dictionary element. */
|
||||
string getKey() { result = key }
|
||||
|
||||
override string toString() { result = "Dictionary element at key " + key }
|
||||
}
|
||||
|
||||
/** An element of a dictionary under any key. */
|
||||
class DictionaryElementAnyContent extends TDictionaryElementAnyContent, Content {
|
||||
override string toString() { result = "Any dictionary element" }
|
||||
}
|
||||
|
||||
/** An object attribute. */
|
||||
class AttributeContent extends TAttributeContent, Content {
|
||||
private string attr;
|
||||
|
||||
AttributeContent() { this = TAttributeContent(attr) }
|
||||
|
||||
/** Gets the name of the attribute under which this content is stored. */
|
||||
string getAttribute() { result = attr }
|
||||
|
||||
override string toString() { result = "Attribute " + attr }
|
||||
}
|
||||
@@ -1,78 +0,0 @@
|
||||
/**
|
||||
* Contains utility functions for writing data flow queries
|
||||
*/
|
||||
|
||||
private import DataFlowPrivate
|
||||
import DataFlowPublic
|
||||
|
||||
/**
|
||||
* Holds if data flows from `nodeFrom` to `nodeTo` in exactly one local
|
||||
* (intra-procedural) step.
|
||||
*/
|
||||
predicate localFlowStep(Node nodeFrom, Node nodeTo) { simpleLocalFlowStep(nodeFrom, nodeTo) }
|
||||
|
||||
/**
|
||||
* Holds if data flows from `source` to `sink` in zero or more local
|
||||
* (intra-procedural) steps.
|
||||
*/
|
||||
predicate localFlow(Node source, Node sink) { localFlowStep*(source, sink) }
|
||||
|
||||
/**
|
||||
* Gets a `Node` that refers to the module referenced by `name`.
|
||||
* Note that for the statement `import pkg.mod`, the new variable introduced is `pkg` that is a
|
||||
* reference to the module `pkg`.
|
||||
*
|
||||
* This predicate handles (with optional `... as <new-name>`):
|
||||
* 1. `import <name>`
|
||||
* 2. `from <package> import <module>` when `<name> = <package> + "." + <module>`
|
||||
* 3. `from <module> import <member>` when `<name> = <module> + "." + <member>`
|
||||
*
|
||||
* Finally, in `from <module> import <member>` we consider the `ImportExpr` corresponding to
|
||||
* `<module>` to be a reference to that module.
|
||||
*
|
||||
* Note:
|
||||
* While it is technically possible that `import mypkg.foo` and `from mypkg import foo` can give different values,
|
||||
* it's highly unlikely that this will be a problem in production level code.
|
||||
* Example: If `mypkg/__init__.py` contains `foo = 42`, then `from mypkg import foo` will not import the module
|
||||
* `mypkg/foo.py` but the variable `foo` containing `42` -- however, `import mypkg.foo` will always cause `mypkg.foo`
|
||||
* to refer to the module.
|
||||
*/
|
||||
Node importNode(string name) {
|
||||
exists(Variable var, Import imp, Alias alias |
|
||||
alias = imp.getAName() and
|
||||
alias.getAsname() = var.getAStore() and
|
||||
(
|
||||
name = alias.getValue().(ImportMember).getImportedModuleName()
|
||||
or
|
||||
name = alias.getValue().(ImportExpr).getImportedModuleName()
|
||||
) and
|
||||
result.asExpr() = alias.getValue()
|
||||
)
|
||||
or
|
||||
// Although it may seem superfluous to consider the `foo` part of `from foo import bar as baz` to
|
||||
// be a reference to a module (since that reference only makes sense locally within the `import`
|
||||
// statement), it's important for our use of type trackers to consider this local reference to
|
||||
// also refer to the `foo` module. That way, if one wants to track references to the `bar`
|
||||
// attribute using a type tracker, one can simply write
|
||||
//
|
||||
// ```ql
|
||||
// DataFlow::Node bar_attr_tracker(TypeTracker t) {
|
||||
// t.startInAttr("bar") and
|
||||
// result = foo_module_tracker()
|
||||
// or
|
||||
// exists(TypeTracker t2 | result = bar_attr_tracker(t2).track(t2, t))
|
||||
// }
|
||||
// ```
|
||||
//
|
||||
// Where `foo_module_tracker` is a type tracker that tracks references to the `foo` module.
|
||||
// Because named imports are modelled as `AttrRead`s, the statement `from foo import bar as baz`
|
||||
// is interpreted as if it was an assignment `baz = foo.bar`, which means `baz` gets tracked as a
|
||||
// reference to `foo.bar`, as desired.
|
||||
exists(ImportExpr imp_expr |
|
||||
imp_expr.getName() = name and
|
||||
result.asCfgNode().getNode() = imp_expr and
|
||||
// in `import foo.bar` we DON'T want to give a result for `importNode("foo.bar")`,
|
||||
// only for `importNode("foo")`. We exclude those cases with the following clause.
|
||||
not exists(Import imp | imp.getAName().getValue() = imp_expr)
|
||||
)
|
||||
}
|
||||
@@ -1,225 +0,0 @@
|
||||
/**
|
||||
* Provides support for intra-procedural tracking of a customizable
|
||||
* set of data flow nodes.
|
||||
*
|
||||
* Note that unlike `TypeTracker.qll`, this library only performs
|
||||
* local tracking within a function.
|
||||
*/
|
||||
|
||||
import python
|
||||
import DataFlowPublic
|
||||
private import DataFlowPrivate
|
||||
|
||||
/**
|
||||
* A data flow node that is a source of local flow. This includes things like
|
||||
* - Expressions
|
||||
* - Function parameters
|
||||
*
|
||||
*
|
||||
* Local source nodes and the `flowsTo` relation should be thought of in terms of the reference
|
||||
* semantics of the underlying object. For instance, in the following snippet of code
|
||||
*
|
||||
* ```python
|
||||
* x = []
|
||||
* x.append(1)
|
||||
* x.append(2)
|
||||
* ```
|
||||
*
|
||||
* the local source node corresponding to the occurrences of `x` is the empty list that is assigned to `x`
|
||||
* originally. Even though the two `append` calls modify the value of `x`, they do not change the fact that
|
||||
* `x` still points to the same object. If, however, we next do `x = x + [3]`, then the expression `x + [3]`
|
||||
* will be the new local source of what `x` now points to.
|
||||
*/
|
||||
class LocalSourceNode extends Node {
|
||||
cached
|
||||
LocalSourceNode() {
|
||||
this instanceof ExprNode and
|
||||
not simpleLocalFlowStep(_, this)
|
||||
or
|
||||
// We include all module variable nodes, as these act as stepping stones between writes and
|
||||
// reads of global variables. Without them, type tracking based on `LocalSourceNode`s would be
|
||||
// unable to track across global variables.
|
||||
//
|
||||
// Once the `track` and `backtrack` methods have been fully deprecated, this disjunct can be
|
||||
// removed, and the entire class can extend `ExprNode`. At that point, `TypeTrackingNode` should
|
||||
// be used for type tracking instead of `LocalSourceNode`.
|
||||
this instanceof ModuleVariableNode
|
||||
or
|
||||
// We explicitly include any read of a global variable, as some of these may have local flow going
|
||||
// into them.
|
||||
this = any(ModuleVariableNode mvn).getARead()
|
||||
}
|
||||
|
||||
/** Holds if this `LocalSourceNode` can flow to `nodeTo` in one or more local flow steps. */
|
||||
pragma[inline]
|
||||
predicate flowsTo(Node nodeTo) { Cached::hasLocalSource(nodeTo, this) }
|
||||
|
||||
/**
|
||||
* Gets a reference (read or write) of attribute `attrName` on this node.
|
||||
*/
|
||||
AttrRef getAnAttributeReference(string attrName) { Cached::namedAttrRef(this, attrName, result) }
|
||||
|
||||
/**
|
||||
* Gets a read of attribute `attrName` on this node.
|
||||
*/
|
||||
AttrRead getAnAttributeRead(string attrName) { result = getAnAttributeReference(attrName) }
|
||||
|
||||
/**
|
||||
* Gets a write of attribute `attrName` on this node.
|
||||
*/
|
||||
AttrWrite getAnAttributeWrite(string attrName) { result = getAnAttributeReference(attrName) }
|
||||
|
||||
/**
|
||||
* Gets a reference (read or write) of any attribute on this node.
|
||||
*/
|
||||
AttrRef getAnAttributeReference() {
|
||||
Cached::namedAttrRef(this, _, result)
|
||||
or
|
||||
Cached::dynamicAttrRef(this, result)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a read of any attribute on this node.
|
||||
*/
|
||||
AttrRead getAnAttributeRead() { result = getAnAttributeReference() }
|
||||
|
||||
/**
|
||||
* Gets a write of any attribute on this node.
|
||||
*/
|
||||
AttrWrite getAnAttributeWrite() { result = getAnAttributeReference() }
|
||||
|
||||
/**
|
||||
* Gets a call to this node.
|
||||
*/
|
||||
CallCfgNode getACall() { Cached::call(this, result) }
|
||||
|
||||
/**
|
||||
* Gets a call to the method `methodName` on this node.
|
||||
*
|
||||
* Includes both calls that have the syntactic shape of a method call (as in `obj.m(...)`), and
|
||||
* calls where the callee undergoes some additional local data flow (as in `tmp = obj.m; m(...)`).
|
||||
*/
|
||||
MethodCallNode getAMethodCall(string methodName) {
|
||||
result = this.getAnAttributeRead(methodName).getACall()
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a node that this node may flow to using one heap and/or interprocedural step.
|
||||
*
|
||||
* See `TypeTracker` for more details about how to use this.
|
||||
*/
|
||||
pragma[inline]
|
||||
LocalSourceNode track(TypeTracker t2, TypeTracker t) { t = t2.step(this, result) }
|
||||
|
||||
/**
|
||||
* Gets a node that may flow into this one using one heap and/or interprocedural step.
|
||||
*
|
||||
* See `TypeBackTracker` for more details about how to use this.
|
||||
*/
|
||||
pragma[inline]
|
||||
LocalSourceNode backtrack(TypeBackTracker t2, TypeBackTracker t) { t2 = t.step(result, this) }
|
||||
}
|
||||
|
||||
/**
|
||||
* A node that can be used for type tracking or type back-tracking.
|
||||
*
|
||||
* All steps made during type tracking should be between instances of this class.
|
||||
*/
|
||||
class TypeTrackingNode = LocalSourceNode;
|
||||
|
||||
/** Temporary holding ground for the `TypeTrackingNode` class. */
|
||||
private module FutureWork {
|
||||
class FutureTypeTrackingNode extends Node {
|
||||
FutureTypeTrackingNode() {
|
||||
this instanceof LocalSourceNode
|
||||
or
|
||||
this instanceof ModuleVariableNode
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if this node can flow to `nodeTo` in one or more local flow steps.
|
||||
*
|
||||
* For `ModuleVariableNode`s, the only "local" step is to the node itself.
|
||||
* For `LocalSourceNode`s, this is the usual notion of local flow.
|
||||
*/
|
||||
pragma[inline]
|
||||
predicate flowsTo(Node node) {
|
||||
this instanceof ModuleVariableNode and this = node
|
||||
or
|
||||
this.(LocalSourceNode).flowsTo(node)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a node that this node may flow to using one heap and/or interprocedural step.
|
||||
*
|
||||
* See `TypeTracker` for more details about how to use this.
|
||||
*/
|
||||
pragma[inline]
|
||||
TypeTrackingNode track(TypeTracker t2, TypeTracker t) { t = t2.step(this, result) }
|
||||
|
||||
/**
|
||||
* Gets a node that may flow into this one using one heap and/or interprocedural step.
|
||||
*
|
||||
* See `TypeBackTracker` for more details about how to use this.
|
||||
*/
|
||||
pragma[inline]
|
||||
TypeTrackingNode backtrack(TypeBackTracker t2, TypeBackTracker t) { t2 = t.step(result, this) }
|
||||
}
|
||||
}
|
||||
|
||||
cached
|
||||
private module Cached {
|
||||
/**
|
||||
* Holds if `source` is a `LocalSourceNode` that can reach `sink` via local flow steps.
|
||||
*
|
||||
* The slightly backwards parametering ordering is to force correct indexing.
|
||||
*/
|
||||
cached
|
||||
predicate hasLocalSource(Node sink, LocalSourceNode source) {
|
||||
source = sink
|
||||
or
|
||||
exists(Node second |
|
||||
localSourceFlowStep(source, second) and
|
||||
localSourceFlowStep*(second, sink)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper predicate for `hasLocalSource`. Removes any steps go to module variable reads, as these
|
||||
* are already local source nodes in their own right.
|
||||
*/
|
||||
cached
|
||||
private predicate localSourceFlowStep(Node nodeFrom, Node nodeTo) {
|
||||
simpleLocalFlowStep(nodeFrom, nodeTo) and
|
||||
not nodeTo = any(ModuleVariableNode v).getARead()
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `base` flows to the base of `ref` and `ref` has attribute name `attr`.
|
||||
*/
|
||||
cached
|
||||
predicate namedAttrRef(LocalSourceNode base, string attr, AttrRef ref) {
|
||||
base.flowsTo(ref.getObject()) and
|
||||
ref.getAttributeName() = attr
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `base` flows to the base of `ref` and `ref` has no known attribute name.
|
||||
*/
|
||||
cached
|
||||
predicate dynamicAttrRef(LocalSourceNode base, AttrRef ref) {
|
||||
base.flowsTo(ref.getObject()) and
|
||||
not exists(ref.getAttributeName())
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `func` flows to the callee of `call`.
|
||||
*/
|
||||
cached
|
||||
predicate call(LocalSourceNode func, CallCfgNode call) {
|
||||
exists(CfgNode n |
|
||||
func.flowsTo(n) and
|
||||
n = call.getFunction()
|
||||
)
|
||||
}
|
||||
}
|
||||
@@ -1,226 +0,0 @@
|
||||
private import python
|
||||
private import semmle.python.dataflow.new.DataFlow
|
||||
private import semmle.python.dataflow.new.internal.DataFlowPrivate as DataFlowPrivate
|
||||
private import semmle.python.dataflow.new.internal.TaintTrackingPublic
|
||||
private import semmle.python.ApiGraphs
|
||||
|
||||
/**
|
||||
* Holds if `node` should be a sanitizer in all global taint flow configurations
|
||||
* but not in local taint.
|
||||
*/
|
||||
predicate defaultTaintSanitizer(DataFlow::Node node) { none() }
|
||||
|
||||
/**
|
||||
* Holds if default `TaintTracking::Configuration`s should allow implicit reads
|
||||
* of `c` at sinks and inputs to additional taint steps.
|
||||
*/
|
||||
bindingset[node]
|
||||
predicate defaultImplicitTaintRead(DataFlow::Node node, DataFlow::Content c) { none() }
|
||||
|
||||
private module Cached {
|
||||
/**
|
||||
* Holds if the additional step from `nodeFrom` to `nodeTo` should be included in all
|
||||
* global taint flow configurations.
|
||||
*/
|
||||
cached
|
||||
predicate defaultAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
|
||||
localAdditionalTaintStep(nodeFrom, nodeTo)
|
||||
or
|
||||
any(AdditionalTaintStep a).step(nodeFrom, nodeTo)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if taint can flow in one local step from `nodeFrom` to `nodeTo` excluding
|
||||
* local data flow steps. That is, `nodeFrom` and `nodeTo` are likely to represent
|
||||
* different objects.
|
||||
*/
|
||||
cached
|
||||
predicate localAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
|
||||
concatStep(nodeFrom, nodeTo)
|
||||
or
|
||||
subscriptStep(nodeFrom, nodeTo)
|
||||
or
|
||||
stringManipulation(nodeFrom, nodeTo)
|
||||
or
|
||||
containerStep(nodeFrom, nodeTo)
|
||||
or
|
||||
copyStep(nodeFrom, nodeTo)
|
||||
or
|
||||
forStep(nodeFrom, nodeTo)
|
||||
or
|
||||
unpackingAssignmentStep(nodeFrom, nodeTo)
|
||||
}
|
||||
}
|
||||
|
||||
import Cached
|
||||
|
||||
/**
|
||||
* Holds if taint can flow from `nodeFrom` to `nodeTo` with a step related to concatenation.
|
||||
*
|
||||
* Note that since we cannot easily distinguish interesting types (like string, list, tuple),
|
||||
* we consider any `+` operation to propagate taint. This is what is done in the JS libraries,
|
||||
* and isn't a big problem in practice.
|
||||
*/
|
||||
predicate concatStep(DataFlow::CfgNode nodeFrom, DataFlow::CfgNode nodeTo) {
|
||||
exists(BinaryExprNode add | add = nodeTo.getNode() |
|
||||
add.getOp() instanceof Add and add.getAnOperand() = nodeFrom.getNode()
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if taint can flow from `nodeFrom` to `nodeTo` with a step related to subscripting.
|
||||
*/
|
||||
predicate subscriptStep(DataFlow::CfgNode nodeFrom, DataFlow::CfgNode nodeTo) {
|
||||
nodeTo.getNode().(SubscriptNode).getObject() = nodeFrom.getNode()
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if taint can flow from `nodeFrom` to `nodeTo` with a step related to string
|
||||
* manipulation.
|
||||
*
|
||||
* Note that since we cannot easily distinguish when something is a string, this can
|
||||
* also make taint flow on `<non string>.replace(foo, bar)`.
|
||||
*/
|
||||
predicate stringManipulation(DataFlow::CfgNode nodeFrom, DataFlow::CfgNode nodeTo) {
|
||||
// transforming something tainted into a string will make the string tainted
|
||||
exists(DataFlow::CallCfgNode call | call = nodeTo |
|
||||
(
|
||||
call = API::builtin(["str", "bytes", "unicode"]).getACall()
|
||||
or
|
||||
call.getFunction().asCfgNode().(NameNode).getId() in ["str", "bytes", "unicode"]
|
||||
) and
|
||||
nodeFrom in [call.getArg(0), call.getArgByName("object")]
|
||||
)
|
||||
or
|
||||
// String methods. Note that this doesn't recognize `meth = "foo".upper; meth()`
|
||||
exists(CallNode call, string method_name, ControlFlowNode object |
|
||||
call = nodeTo.getNode() and
|
||||
object = call.getFunction().(AttrNode).getObject(method_name)
|
||||
|
|
||||
nodeFrom.getNode() = object and
|
||||
method_name in [
|
||||
"capitalize", "casefold", "center", "expandtabs", "format", "format_map", "join", "ljust",
|
||||
"lstrip", "lower", "replace", "rjust", "rstrip", "strip", "swapcase", "title", "upper",
|
||||
"zfill", "encode", "decode"
|
||||
]
|
||||
or
|
||||
method_name = "replace" and
|
||||
nodeFrom.getNode() = call.getArg(1)
|
||||
or
|
||||
method_name = "format" and
|
||||
nodeFrom.getNode() = call.getAnArg()
|
||||
or
|
||||
// str -> List[str]
|
||||
// TODO: check if these should be handled differently in regards to content
|
||||
nodeFrom.getNode() = object and
|
||||
method_name in ["partition", "rpartition", "rsplit", "split", "splitlines"]
|
||||
or
|
||||
// Iterable[str] -> str
|
||||
// TODO: check if these should be handled differently in regards to content
|
||||
method_name = "join" and
|
||||
nodeFrom.getNode() = call.getArg(0)
|
||||
or
|
||||
// Mapping[str, Any] -> str
|
||||
method_name = "format_map" and
|
||||
nodeFrom.getNode() = call.getArg(0)
|
||||
)
|
||||
or
|
||||
// % formatting
|
||||
exists(BinaryExprNode fmt | fmt = nodeTo.getNode() |
|
||||
fmt.getOp() instanceof Mod and
|
||||
(
|
||||
fmt.getLeft() = nodeFrom.getNode()
|
||||
or
|
||||
fmt.getRight() = nodeFrom.getNode()
|
||||
)
|
||||
)
|
||||
or
|
||||
// string multiplication -- `"foo" * 10`
|
||||
exists(BinaryExprNode mult | mult = nodeTo.getNode() |
|
||||
mult.getOp() instanceof Mult and
|
||||
mult.getLeft() = nodeFrom.getNode()
|
||||
)
|
||||
or
|
||||
// f-strings
|
||||
nodeTo.asExpr().(Fstring).getAValue() = nodeFrom.asExpr()
|
||||
// TODO: Handle encode/decode from base64/quopri
|
||||
// TODO: Handle functions in https://docs.python.org/3/library/binascii.html
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if taint can flow from `nodeFrom` to `nodeTo` with a step related to containers
|
||||
* (lists/sets/dictionaries): literals, constructor invocation, methods. Note that this
|
||||
* is currently very imprecise, as an example, since we model `dict.get`, we treat any
|
||||
* `<tainted object>.get(<arg>)` will be tainted, whether it's true or not.
|
||||
*/
|
||||
predicate containerStep(DataFlow::CfgNode nodeFrom, DataFlow::Node nodeTo) {
|
||||
// construction by literal
|
||||
// TODO: Not limiting the content argument here feels like a BIG hack, but we currently get nothing for free :|
|
||||
DataFlowPrivate::storeStep(nodeFrom, _, nodeTo)
|
||||
or
|
||||
// constructor call
|
||||
exists(DataFlow::CallCfgNode call | call = nodeTo |
|
||||
call = API::builtin(["list", "set", "frozenset", "dict", "tuple"]).getACall() and
|
||||
call.getArg(0) = nodeFrom
|
||||
// TODO: Properly handle defaultdict/namedtuple
|
||||
)
|
||||
or
|
||||
// functions operating on collections
|
||||
exists(DataFlow::CallCfgNode call | call = nodeTo |
|
||||
call = API::builtin(["sorted", "reversed", "iter", "next"]).getACall() and
|
||||
call.getArg(0) = nodeFrom
|
||||
)
|
||||
or
|
||||
// methods
|
||||
exists(DataFlow::MethodCallNode call, string methodName | call = nodeTo |
|
||||
methodName in [
|
||||
// general
|
||||
"copy", "pop",
|
||||
// dict
|
||||
"values", "items", "get", "popitem"
|
||||
] and
|
||||
call.calls(nodeFrom, methodName)
|
||||
)
|
||||
or
|
||||
// list.append, set.add
|
||||
exists(DataFlow::MethodCallNode call, DataFlow::Node obj |
|
||||
call.calls(obj, ["append", "add"]) and
|
||||
obj = nodeTo.(DataFlow::PostUpdateNode).getPreUpdateNode() and
|
||||
call.getArg(0) = nodeFrom
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if taint can flow from `nodeFrom` to `nodeTo` with a step related to copying.
|
||||
*/
|
||||
predicate copyStep(DataFlow::CfgNode nodeFrom, DataFlow::CfgNode nodeTo) {
|
||||
exists(DataFlow::CallCfgNode call | call = nodeTo |
|
||||
call = API::moduleImport("copy").getMember(["copy", "deepcopy"]).getACall() and
|
||||
call.getArg(0) = nodeFrom
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if taint can flow from `nodeFrom` to `nodeTo` with a step related to `for`-iteration,
|
||||
* for example `for x in xs`, or `for x,y in points`.
|
||||
*/
|
||||
predicate forStep(DataFlow::CfgNode nodeFrom, DataFlow::EssaNode nodeTo) {
|
||||
exists(EssaNodeDefinition defn, For for |
|
||||
for.getTarget().getAChildNode*() = defn.getDefiningNode().getNode() and
|
||||
nodeTo.getVar() = defn and
|
||||
nodeFrom.asExpr() = for.getIter()
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if taint can flow from `nodeFrom` to `nodeTo` with a step related to iterable unpacking.
|
||||
* Only handles normal assignment (`x,y = calc_point()`), since `for x,y in points` is handled by `forStep`.
|
||||
*/
|
||||
predicate unpackingAssignmentStep(DataFlow::CfgNode nodeFrom, DataFlow::EssaNode nodeTo) {
|
||||
// `a, b = myiterable` or `head, *tail = myiterable` (only Python 3)
|
||||
exists(MultiAssignmentDefinition defn, Assign assign |
|
||||
assign.getATarget().contains(defn.getDefiningNode().getNode()) and
|
||||
nodeTo.getVar() = defn and
|
||||
nodeFrom.asExpr() = assign.getValue()
|
||||
)
|
||||
}
|
||||
@@ -1,50 +0,0 @@
|
||||
/**
|
||||
* Provides classes for performing local (intra-procedural) and
|
||||
* global (inter-procedural) taint-tracking analyses.
|
||||
*/
|
||||
|
||||
private import python
|
||||
private import TaintTrackingPrivate
|
||||
private import semmle.python.dataflow.new.DataFlow
|
||||
// Need to import since frameworks can extend `AdditionalTaintStep`
|
||||
private import semmle.python.Frameworks
|
||||
|
||||
// Local taint flow and helpers
|
||||
/**
|
||||
* Holds if taint propagates from `source` to `sink` in zero or more local
|
||||
* (intra-procedural) steps.
|
||||
*/
|
||||
predicate localTaint(DataFlow::Node source, DataFlow::Node sink) { localTaintStep*(source, sink) }
|
||||
|
||||
/**
|
||||
* Holds if taint can flow from `e1` to `e2` in zero or more local (intra-procedural)
|
||||
* steps.
|
||||
*/
|
||||
predicate localExprTaint(Expr e1, Expr e2) {
|
||||
localTaint(DataFlow::exprNode(e1), DataFlow::exprNode(e2))
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if taint propagates from `nodeFrom` to `nodeTo` in exactly one local
|
||||
* (intra-procedural) step.
|
||||
*/
|
||||
predicate localTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
|
||||
// Ordinary data flow
|
||||
DataFlow::localFlowStep(nodeFrom, nodeTo)
|
||||
or
|
||||
localAdditionalTaintStep(nodeFrom, nodeTo)
|
||||
}
|
||||
|
||||
/**
|
||||
* A unit class for adding additional taint steps.
|
||||
*
|
||||
* Extend this class to add additional taint steps that should apply to all
|
||||
* taint configurations.
|
||||
*/
|
||||
class AdditionalTaintStep extends Unit {
|
||||
/**
|
||||
* Holds if the step from `nodeFrom` to `nodeTo` should be considered a taint
|
||||
* step for all configurations.
|
||||
*/
|
||||
abstract predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo);
|
||||
}
|
||||
@@ -1,470 +0,0 @@
|
||||
/** Step Summaries and Type Tracking */
|
||||
|
||||
private import TypeTrackerSpecific
|
||||
|
||||
/**
|
||||
* Any string that may appear as the name of a piece of content. This will usually include things like:
|
||||
* - Attribute names (in Python)
|
||||
* - Property names (in JavaScript)
|
||||
*
|
||||
* In general, this can also be used to model things like stores to specific list indices. To ensure
|
||||
* correctness, it is important that
|
||||
*
|
||||
* - different types of content do not have overlapping names, and
|
||||
* - the empty string `""` is not a valid piece of content, as it is used to indicate the absence of
|
||||
* content instead.
|
||||
*/
|
||||
class ContentName extends string {
|
||||
ContentName() { this = getPossibleContentName() }
|
||||
}
|
||||
|
||||
/** Either a content name, or the empty string (representing no content). */
|
||||
class OptionalContentName extends string {
|
||||
OptionalContentName() { this instanceof ContentName or this = "" }
|
||||
}
|
||||
|
||||
cached
|
||||
private module Cached {
|
||||
/**
|
||||
* A description of a step on an inter-procedural data flow path.
|
||||
*/
|
||||
cached
|
||||
newtype TStepSummary =
|
||||
LevelStep() or
|
||||
CallStep() or
|
||||
ReturnStep() or
|
||||
StoreStep(ContentName content) or
|
||||
LoadStep(ContentName content)
|
||||
|
||||
/** Gets the summary resulting from appending `step` to type-tracking summary `tt`. */
|
||||
cached
|
||||
TypeTracker append(TypeTracker tt, StepSummary step) {
|
||||
exists(Boolean hasCall, OptionalContentName content | tt = MkTypeTracker(hasCall, content) |
|
||||
step = LevelStep() and result = tt
|
||||
or
|
||||
step = CallStep() and result = MkTypeTracker(true, content)
|
||||
or
|
||||
step = ReturnStep() and hasCall = false and result = tt
|
||||
or
|
||||
step = LoadStep(content) and result = MkTypeTracker(hasCall, "")
|
||||
or
|
||||
exists(string p | step = StoreStep(p) and content = "" and result = MkTypeTracker(hasCall, p))
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the summary that corresponds to having taken a forwards
|
||||
* heap and/or intra-procedural step from `nodeFrom` to `nodeTo`.
|
||||
*
|
||||
* Steps contained in this predicate should _not_ depend on the call graph.
|
||||
*/
|
||||
cached
|
||||
predicate stepNoCall(TypeTrackingNode nodeFrom, TypeTrackingNode nodeTo, StepSummary summary) {
|
||||
exists(Node mid | nodeFrom.flowsTo(mid) and smallstepNoCall(mid, nodeTo, summary))
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the summary that corresponds to having taken a forwards
|
||||
* inter-procedural step from `nodeFrom` to `nodeTo`.
|
||||
*/
|
||||
cached
|
||||
predicate stepCall(TypeTrackingNode nodeFrom, TypeTrackingNode nodeTo, StepSummary summary) {
|
||||
exists(Node mid | nodeFrom.flowsTo(mid) and smallstepCall(mid, nodeTo, summary))
|
||||
}
|
||||
}
|
||||
|
||||
private import Cached
|
||||
|
||||
/**
|
||||
* INTERNAL: Use `TypeTracker` or `TypeBackTracker` instead.
|
||||
*
|
||||
* A description of a step on an inter-procedural data flow path.
|
||||
*/
|
||||
class StepSummary extends TStepSummary {
|
||||
/** Gets a textual representation of this step summary. */
|
||||
string toString() {
|
||||
this instanceof LevelStep and result = "level"
|
||||
or
|
||||
this instanceof CallStep and result = "call"
|
||||
or
|
||||
this instanceof ReturnStep and result = "return"
|
||||
or
|
||||
exists(string content | this = StoreStep(content) | result = "store " + content)
|
||||
or
|
||||
exists(string content | this = LoadStep(content) | result = "load " + content)
|
||||
}
|
||||
}
|
||||
|
||||
pragma[noinline]
|
||||
private predicate smallstepNoCall(Node nodeFrom, TypeTrackingNode nodeTo, StepSummary summary) {
|
||||
jumpStep(nodeFrom, nodeTo) and
|
||||
summary = LevelStep()
|
||||
or
|
||||
exists(string content |
|
||||
StepSummary::localSourceStoreStep(nodeFrom, nodeTo, content) and
|
||||
summary = StoreStep(content)
|
||||
or
|
||||
basicLoadStep(nodeFrom, nodeTo, content) and summary = LoadStep(content)
|
||||
)
|
||||
}
|
||||
|
||||
pragma[noinline]
|
||||
private predicate smallstepCall(Node nodeFrom, TypeTrackingNode nodeTo, StepSummary summary) {
|
||||
callStep(nodeFrom, nodeTo) and summary = CallStep()
|
||||
or
|
||||
returnStep(nodeFrom, nodeTo) and
|
||||
summary = ReturnStep()
|
||||
}
|
||||
|
||||
/** Provides predicates for updating step summaries (`StepSummary`s). */
|
||||
module StepSummary {
|
||||
/**
|
||||
* Gets the summary that corresponds to having taken a forwards
|
||||
* heap and/or inter-procedural step from `nodeFrom` to `nodeTo`.
|
||||
*
|
||||
* This predicate is inlined, which enables better join-orders when
|
||||
* the call graph construction and type tracking are mutually recursive.
|
||||
* In such cases, non-linear recursion involving `step` will be limited
|
||||
* to non-linear recursion for the parts of `step` that involve the
|
||||
* call graph.
|
||||
*/
|
||||
pragma[inline]
|
||||
predicate step(TypeTrackingNode nodeFrom, TypeTrackingNode nodeTo, StepSummary summary) {
|
||||
stepNoCall(nodeFrom, nodeTo, summary)
|
||||
or
|
||||
stepCall(nodeFrom, nodeTo, summary)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the summary that corresponds to having taken a forwards
|
||||
* local, heap and/or inter-procedural step from `nodeFrom` to `nodeTo`.
|
||||
*
|
||||
* Unlike `StepSummary::step`, this predicate does not compress
|
||||
* type-preserving steps.
|
||||
*/
|
||||
pragma[inline]
|
||||
predicate smallstep(Node nodeFrom, TypeTrackingNode nodeTo, StepSummary summary) {
|
||||
smallstepNoCall(nodeFrom, nodeTo, summary)
|
||||
or
|
||||
smallstepCall(nodeFrom, nodeTo, summary)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `nodeFrom` is being written to the `content` content of the object in `nodeTo`.
|
||||
*
|
||||
* Note that `nodeTo` will always be a local source node that flows to the place where the content
|
||||
* is written in `basicStoreStep`. This may lead to the flow of information going "back in time"
|
||||
* from the point of view of the execution of the program.
|
||||
*
|
||||
* For instance, if we interpret attribute writes in Python as writing to content with the same
|
||||
* name as the attribute and consider the following snippet
|
||||
*
|
||||
* ```python
|
||||
* def foo(y):
|
||||
* x = Foo()
|
||||
* bar(x)
|
||||
* x.attr = y
|
||||
* baz(x)
|
||||
*
|
||||
* def bar(x):
|
||||
* z = x.attr
|
||||
* ```
|
||||
* for the attribute write `x.attr = y`, we will have `content` being the literal string `"attr"`,
|
||||
* `nodeFrom` will be `y`, and `nodeTo` will be the object `Foo()` created on the first line of the
|
||||
* function. This means we will track the fact that `x.attr` can have the type of `y` into the
|
||||
* assignment to `z` inside `bar`, even though this attribute write happens _after_ `bar` is called.
|
||||
*/
|
||||
predicate localSourceStoreStep(Node nodeFrom, TypeTrackingNode nodeTo, string content) {
|
||||
exists(Node obj | nodeTo.flowsTo(obj) and basicStoreStep(nodeFrom, obj, content))
|
||||
}
|
||||
}
|
||||
|
||||
private newtype TTypeTracker = MkTypeTracker(Boolean hasCall, OptionalContentName content)
|
||||
|
||||
/**
|
||||
* Summary of the steps needed to track a value to a given dataflow node.
|
||||
*
|
||||
* This can be used to track objects that implement a certain API in order to
|
||||
* recognize calls to that API. Note that type-tracking does not by itself provide a
|
||||
* source/sink relation, that is, it may determine that a node has a given type,
|
||||
* but it won't determine where that type came from.
|
||||
*
|
||||
* It is recommended that all uses of this type are written in the following form,
|
||||
* for tracking some type `myType`:
|
||||
* ```ql
|
||||
* DataFlow::TypeTrackingNode myType(DataFlow::TypeTracker t) {
|
||||
* t.start() and
|
||||
* result = < source of myType >
|
||||
* or
|
||||
* exists (DataFlow::TypeTracker t2 |
|
||||
* result = myType(t2).track(t2, t)
|
||||
* )
|
||||
* }
|
||||
*
|
||||
* DataFlow::Node myType() { myType(DataFlow::TypeTracker::end()).flowsTo(result) }
|
||||
* ```
|
||||
*
|
||||
* Instead of `result = myType(t2).track(t2, t)`, you can also use the equivalent
|
||||
* `t = t2.step(myType(t2), result)`. If you additionally want to track individual
|
||||
* intra-procedural steps, use `t = t2.smallstep(myCallback(t2), result)`.
|
||||
*/
|
||||
class TypeTracker extends TTypeTracker {
|
||||
Boolean hasCall;
|
||||
OptionalContentName content;
|
||||
|
||||
TypeTracker() { this = MkTypeTracker(hasCall, content) }
|
||||
|
||||
/** Gets the summary resulting from appending `step` to this type-tracking summary. */
|
||||
TypeTracker append(StepSummary step) { result = append(this, step) }
|
||||
|
||||
/** Gets a textual representation of this summary. */
|
||||
string toString() {
|
||||
exists(string withCall, string withContent |
|
||||
(if hasCall = true then withCall = "with" else withCall = "without") and
|
||||
(if content != "" then withContent = " with content " + content else withContent = "") and
|
||||
result = "type tracker " + withCall + " call steps" + withContent
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if this is the starting point of type tracking.
|
||||
*/
|
||||
predicate start() { hasCall = false and content = "" }
|
||||
|
||||
/**
|
||||
* Holds if this is the starting point of type tracking, and the value starts in the content named `contentName`.
|
||||
* The type tracking only ends after the content has been loaded.
|
||||
*/
|
||||
predicate startInContent(ContentName contentName) { hasCall = false and content = contentName }
|
||||
|
||||
/**
|
||||
* Holds if this is the starting point of type tracking
|
||||
* when tracking a parameter into a call, but not out of it.
|
||||
*/
|
||||
predicate call() { hasCall = true and content = "" }
|
||||
|
||||
/**
|
||||
* Holds if this is the end point of type tracking.
|
||||
*/
|
||||
predicate end() { content = "" }
|
||||
|
||||
/**
|
||||
* INTERNAL. DO NOT USE.
|
||||
*
|
||||
* Holds if this type has been tracked into a call.
|
||||
*/
|
||||
boolean hasCall() { result = hasCall }
|
||||
|
||||
/**
|
||||
* INTERNAL. DO NOT USE.
|
||||
*
|
||||
* Gets the content associated with this type tracker.
|
||||
*/
|
||||
string getContent() { result = content }
|
||||
|
||||
/**
|
||||
* Gets a type tracker that starts where this one has left off to allow continued
|
||||
* tracking.
|
||||
*
|
||||
* This predicate is only defined if the type is not associated to a piece of content.
|
||||
*/
|
||||
TypeTracker continue() { content = "" and result = this }
|
||||
|
||||
/**
|
||||
* Gets the summary that corresponds to having taken a forwards
|
||||
* heap and/or inter-procedural step from `nodeFrom` to `nodeTo`.
|
||||
*/
|
||||
pragma[inline]
|
||||
TypeTracker step(TypeTrackingNode nodeFrom, TypeTrackingNode nodeTo) {
|
||||
exists(StepSummary summary |
|
||||
StepSummary::step(nodeFrom, pragma[only_bind_out](nodeTo), pragma[only_bind_into](summary)) and
|
||||
result = this.append(pragma[only_bind_into](summary))
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the summary that corresponds to having taken a forwards
|
||||
* local, heap and/or inter-procedural step from `nodeFrom` to `nodeTo`.
|
||||
*
|
||||
* Unlike `TypeTracker::step`, this predicate exposes all edges
|
||||
* in the flow graph, and not just the edges between `Node`s.
|
||||
* It may therefore be less performant.
|
||||
*
|
||||
* Type tracking predicates using small steps typically take the following form:
|
||||
* ```ql
|
||||
* DataFlow::Node myType(DataFlow::TypeTracker t) {
|
||||
* t.start() and
|
||||
* result = < source of myType >
|
||||
* or
|
||||
* exists (DataFlow::TypeTracker t2 |
|
||||
* t = t2.smallstep(myType(t2), result)
|
||||
* )
|
||||
* }
|
||||
*
|
||||
* DataFlow::Node myType() {
|
||||
* result = myType(DataFlow::TypeTracker::end())
|
||||
* }
|
||||
* ```
|
||||
*/
|
||||
pragma[inline]
|
||||
TypeTracker smallstep(Node nodeFrom, Node nodeTo) {
|
||||
exists(StepSummary summary |
|
||||
StepSummary::smallstep(nodeFrom, nodeTo, summary) and
|
||||
result = this.append(summary)
|
||||
)
|
||||
or
|
||||
simpleLocalFlowStep(nodeFrom, nodeTo) and
|
||||
result = this
|
||||
}
|
||||
}
|
||||
|
||||
/** Provides predicates for implementing custom `TypeTracker`s. */
|
||||
module TypeTracker {
|
||||
/**
|
||||
* Gets a valid end point of type tracking.
|
||||
*/
|
||||
TypeTracker end() { result.end() }
|
||||
}
|
||||
|
||||
private newtype TTypeBackTracker = MkTypeBackTracker(Boolean hasReturn, OptionalContentName content)
|
||||
|
||||
/**
|
||||
* Summary of the steps needed to back-track a use of a value to a given dataflow node.
|
||||
*
|
||||
* This can for example be used to track callbacks that are passed to a certain API,
|
||||
* so we can model specific parameters of that callback as having a certain type.
|
||||
*
|
||||
* Note that type back-tracking does not provide a source/sink relation, that is,
|
||||
* it may determine that a node will be used in an API call somewhere, but it won't
|
||||
* determine exactly where that use was, or the path that led to the use.
|
||||
*
|
||||
* It is recommended that all uses of this type are written in the following form,
|
||||
* for back-tracking some callback type `myCallback`:
|
||||
*
|
||||
* ```ql
|
||||
* DataFlow::TypeTrackingNode myCallback(DataFlow::TypeBackTracker t) {
|
||||
* t.start() and
|
||||
* result = (< some API call >).getArgument(< n >).getALocalSource()
|
||||
* or
|
||||
* exists (DataFlow::TypeBackTracker t2 |
|
||||
* result = myCallback(t2).backtrack(t2, t)
|
||||
* )
|
||||
* }
|
||||
*
|
||||
* DataFlow::TypeTrackingNode myCallback() { result = myCallback(DataFlow::TypeBackTracker::end()) }
|
||||
* ```
|
||||
*
|
||||
* Instead of `result = myCallback(t2).backtrack(t2, t)`, you can also use the equivalent
|
||||
* `t2 = t.step(result, myCallback(t2))`. If you additionally want to track individual
|
||||
* intra-procedural steps, use `t2 = t.smallstep(result, myCallback(t2))`.
|
||||
*/
|
||||
class TypeBackTracker extends TTypeBackTracker {
|
||||
Boolean hasReturn;
|
||||
string content;
|
||||
|
||||
TypeBackTracker() { this = MkTypeBackTracker(hasReturn, content) }
|
||||
|
||||
/** Gets the summary resulting from prepending `step` to this type-tracking summary. */
|
||||
TypeBackTracker prepend(StepSummary step) {
|
||||
step = LevelStep() and result = this
|
||||
or
|
||||
step = CallStep() and hasReturn = false and result = this
|
||||
or
|
||||
step = ReturnStep() and result = MkTypeBackTracker(true, content)
|
||||
or
|
||||
exists(string p |
|
||||
step = LoadStep(p) and content = "" and result = MkTypeBackTracker(hasReturn, p)
|
||||
)
|
||||
or
|
||||
step = StoreStep(content) and result = MkTypeBackTracker(hasReturn, "")
|
||||
}
|
||||
|
||||
/** Gets a textual representation of this summary. */
|
||||
string toString() {
|
||||
exists(string withReturn, string withContent |
|
||||
(if hasReturn = true then withReturn = "with" else withReturn = "without") and
|
||||
(if content != "" then withContent = " with content " + content else withContent = "") and
|
||||
result = "type back-tracker " + withReturn + " return steps" + withContent
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if this is the starting point of type tracking.
|
||||
*/
|
||||
predicate start() { hasReturn = false and content = "" }
|
||||
|
||||
/**
|
||||
* Holds if this is the end point of type tracking.
|
||||
*/
|
||||
predicate end() { content = "" }
|
||||
|
||||
/**
|
||||
* INTERNAL. DO NOT USE.
|
||||
*
|
||||
* Holds if this type has been back-tracked into a call through return edge.
|
||||
*/
|
||||
boolean hasReturn() { result = hasReturn }
|
||||
|
||||
/**
|
||||
* Gets a type tracker that starts where this one has left off to allow continued
|
||||
* tracking.
|
||||
*
|
||||
* This predicate is only defined if the type has not been tracked into a piece of content.
|
||||
*/
|
||||
TypeBackTracker continue() { content = "" and result = this }
|
||||
|
||||
/**
|
||||
* Gets the summary that corresponds to having taken a backwards
|
||||
* heap and/or inter-procedural step from `nodeTo` to `nodeFrom`.
|
||||
*/
|
||||
pragma[inline]
|
||||
TypeBackTracker step(TypeTrackingNode nodeFrom, TypeTrackingNode nodeTo) {
|
||||
exists(StepSummary summary |
|
||||
StepSummary::step(pragma[only_bind_out](nodeFrom), nodeTo, pragma[only_bind_into](summary)) and
|
||||
this = result.prepend(pragma[only_bind_into](summary))
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the summary that corresponds to having taken a backwards
|
||||
* local, heap and/or inter-procedural step from `nodeTo` to `nodeFrom`.
|
||||
*
|
||||
* Unlike `TypeBackTracker::step`, this predicate exposes all edges
|
||||
* in the flowgraph, and not just the edges between
|
||||
* `TypeTrackingNode`s. It may therefore be less performant.
|
||||
*
|
||||
* Type tracking predicates using small steps typically take the following form:
|
||||
* ```ql
|
||||
* DataFlow::Node myType(DataFlow::TypeBackTracker t) {
|
||||
* t.start() and
|
||||
* result = < some API call >.getArgument(< n >)
|
||||
* or
|
||||
* exists (DataFlow::TypeBackTracker t2 |
|
||||
* t = t2.smallstep(result, myType(t2))
|
||||
* )
|
||||
* }
|
||||
*
|
||||
* DataFlow::Node myType() {
|
||||
* result = myType(DataFlow::TypeBackTracker::end())
|
||||
* }
|
||||
* ```
|
||||
*/
|
||||
pragma[inline]
|
||||
TypeBackTracker smallstep(Node nodeFrom, Node nodeTo) {
|
||||
exists(StepSummary summary |
|
||||
StepSummary::smallstep(nodeFrom, nodeTo, summary) and
|
||||
this = result.prepend(summary)
|
||||
)
|
||||
or
|
||||
simpleLocalFlowStep(nodeFrom, nodeTo) and
|
||||
this = result
|
||||
}
|
||||
}
|
||||
|
||||
/** Provides predicates for implementing custom `TypeBackTracker`s. */
|
||||
module TypeBackTracker {
|
||||
/**
|
||||
* Gets a valid end point of type back-tracking.
|
||||
*/
|
||||
TypeBackTracker end() { result.end() }
|
||||
}
|
||||
@@ -1,82 +0,0 @@
|
||||
/**
|
||||
* Provides Python-specific definitions for use in the type tracker library.
|
||||
*/
|
||||
|
||||
private import python
|
||||
private import semmle.python.dataflow.new.internal.DataFlowPublic as DataFlowPublic
|
||||
private import semmle.python.dataflow.new.internal.DataFlowPrivate as DataFlowPrivate
|
||||
|
||||
class Node = DataFlowPublic::Node;
|
||||
|
||||
class TypeTrackingNode = DataFlowPublic::TypeTrackingNode;
|
||||
|
||||
predicate simpleLocalFlowStep = DataFlowPrivate::simpleLocalFlowStep/2;
|
||||
|
||||
predicate jumpStep = DataFlowPrivate::jumpStep/2;
|
||||
|
||||
/**
|
||||
* Gets the name of a possible piece of content. For Python, this is currently only attribute names,
|
||||
* using the name of the attribute for the corresponding content.
|
||||
*/
|
||||
string getPossibleContentName() { result = any(DataFlowPublic::AttrRef a).getAttributeName() }
|
||||
|
||||
/**
|
||||
* Gets a callable for the call where `nodeFrom` is used as the `i`'th argument.
|
||||
*
|
||||
* Helper predicate to avoid bad join order experienced in `callStep`.
|
||||
* This happened when `isParameterOf` was joined _before_ `getCallable`.
|
||||
*/
|
||||
pragma[nomagic]
|
||||
private DataFlowPrivate::DataFlowCallable getCallableForArgument(
|
||||
DataFlowPublic::ArgumentNode nodeFrom, int i
|
||||
) {
|
||||
exists(DataFlowPrivate::DataFlowCall call |
|
||||
nodeFrom.argumentOf(call, i) and
|
||||
result = call.getCallable()
|
||||
)
|
||||
}
|
||||
|
||||
/** Holds if `nodeFrom` steps to `nodeTo` by being passed as a parameter in a call. */
|
||||
predicate callStep(DataFlowPublic::ArgumentNode nodeFrom, DataFlowPublic::ParameterNode nodeTo) {
|
||||
// TODO: Support special methods?
|
||||
exists(DataFlowPrivate::DataFlowCallable callable, int i |
|
||||
callable = getCallableForArgument(nodeFrom, i) and
|
||||
nodeTo.isParameterOf(callable, i)
|
||||
)
|
||||
}
|
||||
|
||||
/** Holds if `nodeFrom` steps to `nodeTo` by being returned from a call. */
|
||||
predicate returnStep(DataFlowPrivate::ReturnNode nodeFrom, Node nodeTo) {
|
||||
exists(DataFlowPrivate::DataFlowCall call |
|
||||
nodeFrom.getEnclosingCallable() = call.getCallable() and nodeTo.asCfgNode() = call.getNode()
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `nodeFrom` is being written to the `content` content of the object in `nodeTo`.
|
||||
*/
|
||||
predicate basicStoreStep(Node nodeFrom, Node nodeTo, string content) {
|
||||
exists(DataFlowPublic::AttrWrite a |
|
||||
a.mayHaveAttributeName(content) and
|
||||
nodeFrom = a.getValue() and
|
||||
nodeTo = a.getObject()
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `nodeTo` is the result of accessing the `content` content of `nodeFrom`.
|
||||
*/
|
||||
predicate basicLoadStep(Node nodeFrom, Node nodeTo, string content) {
|
||||
exists(DataFlowPublic::AttrRead a |
|
||||
a.mayHaveAttributeName(content) and
|
||||
nodeFrom = a.getObject() and
|
||||
nodeTo = a
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* A utility class that is equivalent to `boolean` but does not require type joining.
|
||||
*/
|
||||
class Boolean extends boolean {
|
||||
Boolean() { this = true or this = false }
|
||||
}
|
||||
@@ -1,138 +0,0 @@
|
||||
# Using the shared dataflow library
|
||||
|
||||
## File organisation
|
||||
|
||||
The files currently live in `experimental` (whereas the existing implementation lives in `semmle\python\dataflow`).
|
||||
|
||||
In there is found `DataFlow.qll`, `DataFlow2.qll` etc. which refer to `internal\DataFlowImpl`, `internal\DataFlowImpl2` etc. respectively. The `DataFlowImplN`-files are all identical copies to avoid mutual recursion. They start off by including two files `internal\DataFlowImplCommon` and `internal\DataFlowImplSpecific`. The former contains all the language-agnostic definitions, while the latter is where we describe our favorite language. `Sepcific` simply forwards to two other files `internal\DataFlowPrivate.qll` and `internal\DataFlowPublic.qll`. Definitions in the former will be hidden behind a `private` modifier, while those in the latter can be referred to in data flow queries. For instance, the definition of `DataFlow::Node` should likely be in `DataFlowPublic.qll`.
|
||||
|
||||
## Define the dataflow graph
|
||||
|
||||
In order to use the dataflow library, we need to define the dataflow graph,
|
||||
that is define the nodes and the edges.
|
||||
|
||||
### Define the nodes
|
||||
|
||||
The nodes are defined in the type `DataFlow::Node` (found in `DataFlowPublic.qll`).
|
||||
This should likely be an IPA type, so we can extend it as needed.
|
||||
|
||||
Typical cases needed to construct the call graph include
|
||||
- argument node
|
||||
- parameter node
|
||||
- return node
|
||||
|
||||
Typical extensions include
|
||||
- postupdate nodes
|
||||
- implicit `this`-nodes
|
||||
|
||||
### Define the edges
|
||||
|
||||
The edges split into local flow (within a function) and global flow (the call graph, between functions/procedures).
|
||||
|
||||
Extra flow, such as reading from and writing to global variables, can be captured in `jumpStep`.
|
||||
The local flow should be obtainalble from an SSA computation.
|
||||
Local flow nodes are generally either control flow nodes or SSA variables.
|
||||
Flow from control flow nodes to SSA variables comes from SSA variable definitions, while flow from SSA variables to control flow nodes comes from def-use pairs.
|
||||
|
||||
The global flow should be obtainable from a `PointsTo` analysis. It is specified via `viableCallable` and
|
||||
`getAnOutNode`. Consider making `ReturnKind` a singleton IPA type as in java.
|
||||
|
||||
Global flow includes local flow within a consistent call context. Thus, for local flow to count as global flow, all relevant nodes should implement `getEnclosingCallable`.
|
||||
|
||||
If complicated dispatch needs to be modelled, try using the `[reduced|pruned]viable*` predicates.
|
||||
|
||||
## Field flow
|
||||
|
||||
To track flow through fields we need to provide a model of fields, that is the `Content` class.
|
||||
|
||||
Field access is specified via `read_step` and `store_step`.
|
||||
|
||||
Work is being done to make field flow handle lists and dictionaries and the like.
|
||||
|
||||
`PostUpdateNode`s become important when field flow is used, as they track modifications to fields resulting from function calls.
|
||||
|
||||
## Type pruning
|
||||
|
||||
If type information is available, flows can be discarded on the grounds of type mismatch.
|
||||
|
||||
Tracked types are given by the class `DataFlowType` and the predicate `getTypeBound`, and compatibility is recorded in the predicate `compatibleTypes`.
|
||||
If type pruning is not used, `compatibleTypes` should be implemented as `any`; if it is implemented, say, as `none`, all flows will be pruned.
|
||||
|
||||
Further, possible casts are given by the class `CastNode`.
|
||||
|
||||
---
|
||||
|
||||
# Plan
|
||||
|
||||
## Stage I, data flow
|
||||
|
||||
### Phase 0, setup
|
||||
Define minimal IPA type for `DataFlow::Node`
|
||||
Define all required predicates empty (via `none()`),
|
||||
except `compatibleTypes` which should be `any()`.
|
||||
Define `ReturnKind`, `DataFlowType`, and `Content` as singleton IPA types.
|
||||
|
||||
|
||||
### Phase 1, local flow
|
||||
Implement `simpleLocalFlowStep` based on the existing SSA computation
|
||||
|
||||
### Phase 2, local flow
|
||||
Implement `viableCallable` and `getAnOutNode` based on the existing predicate `PointsTo`.
|
||||
|
||||
### Phase 3, field flow
|
||||
Redefine `Content` and implement `read_step` and `store_step`.
|
||||
|
||||
Review use of post-update nodes.
|
||||
|
||||
### Phase 4, type pruning
|
||||
Use type trackers to obtain relevant type information and redefine `DataFlowType` to contain appropriate cases. Record the type information in `getTypeBound`.
|
||||
|
||||
Implement `compatibleTypes` (perhaps simply as the identity).
|
||||
|
||||
If necessary, re-implement `getErasedRepr` and `ppReprType`.
|
||||
|
||||
If necessary, redefine `CastNode`.
|
||||
|
||||
### Phase 5, bonus
|
||||
Review possible use of `[reduced|pruned]viable*` predicates.
|
||||
|
||||
Review need for more elaborate `ReturnKind`.
|
||||
|
||||
Review need for non-empty `jumpStep`.
|
||||
|
||||
Review need for non-empty `isUnreachableInCall`.
|
||||
|
||||
## Stage II, taint tracking
|
||||
|
||||
# Phase 0, setup
|
||||
Implement all predicates empty.
|
||||
|
||||
# Phase 1, experiments
|
||||
Try recovering an existing taint tracking query by implementing sources, sinks, sanitizers, and barriers.
|
||||
|
||||
---
|
||||
|
||||
# Status
|
||||
|
||||
## Achieved
|
||||
|
||||
- Copy of shared library; implemented enough predicates to make it compile.
|
||||
- Simple flow into, out of, and through functions.
|
||||
- Some tests, in particular a sceleton for something comprehensive.
|
||||
|
||||
## TODO
|
||||
|
||||
- Implementation has largely been done by finding a plausibly-sounding predicate in the python library to refer to. We should review that we actually have the intended semantics in all places.
|
||||
- Comprehensive testing.
|
||||
- The regression tests track the value of guards in order to eliminate impossible data flow. We currently have regressions because of this. We cannot readily replicate the existing method, as it uses the interdefinedness of data flow and taint tracking (there is a boolean taint kind). C++ [does something similar](https://github.com/github/codeql/blob/master/cpp/ql/src/semmle/code/cpp/controlflow/internal/ConstantExprs.qll#L27-L36) for eliminating impossible control flow, which we might be able to replicate (they infer values of "interesting" control flow nodes, which are those needed to determine values of guards).
|
||||
- Flow for some syntactic constructs are done via extra taint steps in the existing implementation, we should find a way to get data flow for it. Some of this should be covered by field flow.
|
||||
- A document is being written about proper use of the shared data flow library, this should be adhered to. In particular, we should consider replacing def-use with def-to-first-use and use-to-next-use in local flow.
|
||||
- We seem to get duplicated results for global flow, as well as flow with and without type (so four times the "unique" results).
|
||||
- We currently consider control flow nodes like exit nodes for functions, we should probably filter down which ones are of interest.
|
||||
- We should probably override ToString for a number of data flow nodes.
|
||||
- Test flow through classes, constructors and methods.
|
||||
- What happens with named arguments? What does C# do?
|
||||
- What should the enclosable callable for global variables be? C++ [makes it the variable itself](https://github.com/github/codeql/blob/master/cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll#L417), C# seems to not have nodes for these but only for their reads and writes.
|
||||
- Is `yield` another return type? If not, how is it handled?
|
||||
- Should `OutNode` include magic function calls?
|
||||
- Consider creating an internal abstract class for nodes as C# does. Among other things, this can help the optimizer by stating that `getEnclosingCallable` [is functional](https://github.com/github/codeql/blob/master/csharp/ql/src/semmle/code/csharp/dataflow/internal/DataFlowPublic.qll#L62).
|
||||
@@ -1,120 +0,0 @@
|
||||
/**
|
||||
* Provides an implementation of global (interprocedural) taint tracking.
|
||||
* This file re-exports the local (intraprocedural) taint-tracking analysis
|
||||
* from `TaintTrackingParameter::Public` and adds a global analysis, mainly
|
||||
* exposed through the `Configuration` class. For some languages, this file
|
||||
* exists in several identical copies, allowing queries to use multiple
|
||||
* `Configuration` classes that depend on each other without introducing
|
||||
* mutual recursion among those configurations.
|
||||
*/
|
||||
|
||||
import TaintTrackingParameter::Public
|
||||
private import TaintTrackingParameter::Private
|
||||
|
||||
/**
|
||||
* A configuration of interprocedural taint tracking analysis. This defines
|
||||
* sources, sinks, and any other configurable aspect of the analysis. Each
|
||||
* use of the taint tracking library must define its own unique extension of
|
||||
* this abstract class.
|
||||
*
|
||||
* A taint-tracking configuration is a special data flow configuration
|
||||
* (`DataFlow::Configuration`) that allows for flow through nodes that do not
|
||||
* necessarily preserve values but are still relevant from a taint tracking
|
||||
* perspective. (For example, string concatenation, where one of the operands
|
||||
* is tainted.)
|
||||
*
|
||||
* To create a configuration, extend this class with a subclass whose
|
||||
* characteristic predicate is a unique singleton string. For example, write
|
||||
*
|
||||
* ```ql
|
||||
* class MyAnalysisConfiguration extends TaintTracking::Configuration {
|
||||
* MyAnalysisConfiguration() { this = "MyAnalysisConfiguration" }
|
||||
* // Override `isSource` and `isSink`.
|
||||
* // Optionally override `isSanitizer`.
|
||||
* // Optionally override `isSanitizerIn`.
|
||||
* // Optionally override `isSanitizerOut`.
|
||||
* // Optionally override `isSanitizerGuard`.
|
||||
* // Optionally override `isAdditionalTaintStep`.
|
||||
* }
|
||||
* ```
|
||||
*
|
||||
* Then, to query whether there is flow between some `source` and `sink`,
|
||||
* write
|
||||
*
|
||||
* ```ql
|
||||
* exists(MyAnalysisConfiguration cfg | cfg.hasFlow(source, sink))
|
||||
* ```
|
||||
*
|
||||
* Multiple configurations can coexist, but it is unsupported to depend on
|
||||
* another `TaintTracking::Configuration` or a `DataFlow::Configuration` in the
|
||||
* overridden predicates that define sources, sinks, or additional steps.
|
||||
* Instead, the dependency should go to a `TaintTracking2::Configuration` or a
|
||||
* `DataFlow2::Configuration`, `DataFlow3::Configuration`, etc.
|
||||
*/
|
||||
abstract class Configuration extends DataFlow::Configuration {
|
||||
bindingset[this]
|
||||
Configuration() { any() }
|
||||
|
||||
/**
|
||||
* Holds if `source` is a relevant taint source.
|
||||
*
|
||||
* The smaller this predicate is, the faster `hasFlow()` will converge.
|
||||
*/
|
||||
// overridden to provide taint-tracking specific qldoc
|
||||
abstract override predicate isSource(DataFlow::Node source);
|
||||
|
||||
/**
|
||||
* Holds if `sink` is a relevant taint sink.
|
||||
*
|
||||
* The smaller this predicate is, the faster `hasFlow()` will converge.
|
||||
*/
|
||||
// overridden to provide taint-tracking specific qldoc
|
||||
abstract override predicate isSink(DataFlow::Node sink);
|
||||
|
||||
/** Holds if the node `node` is a taint sanitizer. */
|
||||
predicate isSanitizer(DataFlow::Node node) { none() }
|
||||
|
||||
final override predicate isBarrier(DataFlow::Node node) {
|
||||
isSanitizer(node) or
|
||||
defaultTaintSanitizer(node)
|
||||
}
|
||||
|
||||
/** Holds if taint propagation into `node` is prohibited. */
|
||||
predicate isSanitizerIn(DataFlow::Node node) { none() }
|
||||
|
||||
final override predicate isBarrierIn(DataFlow::Node node) { isSanitizerIn(node) }
|
||||
|
||||
/** Holds if taint propagation out of `node` is prohibited. */
|
||||
predicate isSanitizerOut(DataFlow::Node node) { none() }
|
||||
|
||||
final override predicate isBarrierOut(DataFlow::Node node) { isSanitizerOut(node) }
|
||||
|
||||
/** Holds if taint propagation through nodes guarded by `guard` is prohibited. */
|
||||
predicate isSanitizerGuard(DataFlow::BarrierGuard guard) { none() }
|
||||
|
||||
final override predicate isBarrierGuard(DataFlow::BarrierGuard guard) { isSanitizerGuard(guard) }
|
||||
|
||||
/**
|
||||
* Holds if the additional taint propagation step from `node1` to `node2`
|
||||
* must be taken into account in the analysis.
|
||||
*/
|
||||
predicate isAdditionalTaintStep(DataFlow::Node node1, DataFlow::Node node2) { none() }
|
||||
|
||||
final override predicate isAdditionalFlowStep(DataFlow::Node node1, DataFlow::Node node2) {
|
||||
isAdditionalTaintStep(node1, node2) or
|
||||
defaultAdditionalTaintStep(node1, node2)
|
||||
}
|
||||
|
||||
override predicate allowImplicitRead(DataFlow::Node node, DataFlow::Content c) {
|
||||
(this.isSink(node) or this.isAdditionalTaintStep(node, _)) and
|
||||
defaultImplicitTaintRead(node, c)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if taint may flow from `source` to `sink` for this configuration.
|
||||
*/
|
||||
// overridden to provide taint-tracking specific qldoc
|
||||
override predicate hasFlow(DataFlow::Node source, DataFlow::Node sink) {
|
||||
super.hasFlow(source, sink)
|
||||
}
|
||||
}
|
||||
@@ -1,6 +0,0 @@
|
||||
import semmle.python.dataflow.new.internal.TaintTrackingPublic as Public
|
||||
|
||||
module Private {
|
||||
import semmle.python.dataflow.new.DataFlow::DataFlow as DataFlow
|
||||
import semmle.python.dataflow.new.internal.TaintTrackingPrivate
|
||||
}
|
||||
@@ -1,120 +0,0 @@
|
||||
/**
|
||||
* Provides an implementation of global (interprocedural) taint tracking.
|
||||
* This file re-exports the local (intraprocedural) taint-tracking analysis
|
||||
* from `TaintTrackingParameter::Public` and adds a global analysis, mainly
|
||||
* exposed through the `Configuration` class. For some languages, this file
|
||||
* exists in several identical copies, allowing queries to use multiple
|
||||
* `Configuration` classes that depend on each other without introducing
|
||||
* mutual recursion among those configurations.
|
||||
*/
|
||||
|
||||
import TaintTrackingParameter::Public
|
||||
private import TaintTrackingParameter::Private
|
||||
|
||||
/**
|
||||
* A configuration of interprocedural taint tracking analysis. This defines
|
||||
* sources, sinks, and any other configurable aspect of the analysis. Each
|
||||
* use of the taint tracking library must define its own unique extension of
|
||||
* this abstract class.
|
||||
*
|
||||
* A taint-tracking configuration is a special data flow configuration
|
||||
* (`DataFlow::Configuration`) that allows for flow through nodes that do not
|
||||
* necessarily preserve values but are still relevant from a taint tracking
|
||||
* perspective. (For example, string concatenation, where one of the operands
|
||||
* is tainted.)
|
||||
*
|
||||
* To create a configuration, extend this class with a subclass whose
|
||||
* characteristic predicate is a unique singleton string. For example, write
|
||||
*
|
||||
* ```ql
|
||||
* class MyAnalysisConfiguration extends TaintTracking::Configuration {
|
||||
* MyAnalysisConfiguration() { this = "MyAnalysisConfiguration" }
|
||||
* // Override `isSource` and `isSink`.
|
||||
* // Optionally override `isSanitizer`.
|
||||
* // Optionally override `isSanitizerIn`.
|
||||
* // Optionally override `isSanitizerOut`.
|
||||
* // Optionally override `isSanitizerGuard`.
|
||||
* // Optionally override `isAdditionalTaintStep`.
|
||||
* }
|
||||
* ```
|
||||
*
|
||||
* Then, to query whether there is flow between some `source` and `sink`,
|
||||
* write
|
||||
*
|
||||
* ```ql
|
||||
* exists(MyAnalysisConfiguration cfg | cfg.hasFlow(source, sink))
|
||||
* ```
|
||||
*
|
||||
* Multiple configurations can coexist, but it is unsupported to depend on
|
||||
* another `TaintTracking::Configuration` or a `DataFlow::Configuration` in the
|
||||
* overridden predicates that define sources, sinks, or additional steps.
|
||||
* Instead, the dependency should go to a `TaintTracking2::Configuration` or a
|
||||
* `DataFlow2::Configuration`, `DataFlow3::Configuration`, etc.
|
||||
*/
|
||||
abstract class Configuration extends DataFlow::Configuration {
|
||||
bindingset[this]
|
||||
Configuration() { any() }
|
||||
|
||||
/**
|
||||
* Holds if `source` is a relevant taint source.
|
||||
*
|
||||
* The smaller this predicate is, the faster `hasFlow()` will converge.
|
||||
*/
|
||||
// overridden to provide taint-tracking specific qldoc
|
||||
abstract override predicate isSource(DataFlow::Node source);
|
||||
|
||||
/**
|
||||
* Holds if `sink` is a relevant taint sink.
|
||||
*
|
||||
* The smaller this predicate is, the faster `hasFlow()` will converge.
|
||||
*/
|
||||
// overridden to provide taint-tracking specific qldoc
|
||||
abstract override predicate isSink(DataFlow::Node sink);
|
||||
|
||||
/** Holds if the node `node` is a taint sanitizer. */
|
||||
predicate isSanitizer(DataFlow::Node node) { none() }
|
||||
|
||||
final override predicate isBarrier(DataFlow::Node node) {
|
||||
isSanitizer(node) or
|
||||
defaultTaintSanitizer(node)
|
||||
}
|
||||
|
||||
/** Holds if taint propagation into `node` is prohibited. */
|
||||
predicate isSanitizerIn(DataFlow::Node node) { none() }
|
||||
|
||||
final override predicate isBarrierIn(DataFlow::Node node) { isSanitizerIn(node) }
|
||||
|
||||
/** Holds if taint propagation out of `node` is prohibited. */
|
||||
predicate isSanitizerOut(DataFlow::Node node) { none() }
|
||||
|
||||
final override predicate isBarrierOut(DataFlow::Node node) { isSanitizerOut(node) }
|
||||
|
||||
/** Holds if taint propagation through nodes guarded by `guard` is prohibited. */
|
||||
predicate isSanitizerGuard(DataFlow::BarrierGuard guard) { none() }
|
||||
|
||||
final override predicate isBarrierGuard(DataFlow::BarrierGuard guard) { isSanitizerGuard(guard) }
|
||||
|
||||
/**
|
||||
* Holds if the additional taint propagation step from `node1` to `node2`
|
||||
* must be taken into account in the analysis.
|
||||
*/
|
||||
predicate isAdditionalTaintStep(DataFlow::Node node1, DataFlow::Node node2) { none() }
|
||||
|
||||
final override predicate isAdditionalFlowStep(DataFlow::Node node1, DataFlow::Node node2) {
|
||||
isAdditionalTaintStep(node1, node2) or
|
||||
defaultAdditionalTaintStep(node1, node2)
|
||||
}
|
||||
|
||||
override predicate allowImplicitRead(DataFlow::Node node, DataFlow::Content c) {
|
||||
(this.isSink(node) or this.isAdditionalTaintStep(node, _)) and
|
||||
defaultImplicitTaintRead(node, c)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if taint may flow from `source` to `sink` for this configuration.
|
||||
*/
|
||||
// overridden to provide taint-tracking specific qldoc
|
||||
override predicate hasFlow(DataFlow::Node source, DataFlow::Node sink) {
|
||||
super.hasFlow(source, sink)
|
||||
}
|
||||
}
|
||||
@@ -1,6 +0,0 @@
|
||||
import semmle.python.dataflow.new.internal.TaintTrackingPublic as Public
|
||||
|
||||
module Private {
|
||||
import semmle.python.dataflow.new.DataFlow2::DataFlow2 as DataFlow
|
||||
import semmle.python.dataflow.new.internal.TaintTrackingPrivate
|
||||
}
|
||||
@@ -1,120 +0,0 @@
|
||||
/**
|
||||
* Provides an implementation of global (interprocedural) taint tracking.
|
||||
* This file re-exports the local (intraprocedural) taint-tracking analysis
|
||||
* from `TaintTrackingParameter::Public` and adds a global analysis, mainly
|
||||
* exposed through the `Configuration` class. For some languages, this file
|
||||
* exists in several identical copies, allowing queries to use multiple
|
||||
* `Configuration` classes that depend on each other without introducing
|
||||
* mutual recursion among those configurations.
|
||||
*/
|
||||
|
||||
import TaintTrackingParameter::Public
|
||||
private import TaintTrackingParameter::Private
|
||||
|
||||
/**
|
||||
* A configuration of interprocedural taint tracking analysis. This defines
|
||||
* sources, sinks, and any other configurable aspect of the analysis. Each
|
||||
* use of the taint tracking library must define its own unique extension of
|
||||
* this abstract class.
|
||||
*
|
||||
* A taint-tracking configuration is a special data flow configuration
|
||||
* (`DataFlow::Configuration`) that allows for flow through nodes that do not
|
||||
* necessarily preserve values but are still relevant from a taint tracking
|
||||
* perspective. (For example, string concatenation, where one of the operands
|
||||
* is tainted.)
|
||||
*
|
||||
* To create a configuration, extend this class with a subclass whose
|
||||
* characteristic predicate is a unique singleton string. For example, write
|
||||
*
|
||||
* ```ql
|
||||
* class MyAnalysisConfiguration extends TaintTracking::Configuration {
|
||||
* MyAnalysisConfiguration() { this = "MyAnalysisConfiguration" }
|
||||
* // Override `isSource` and `isSink`.
|
||||
* // Optionally override `isSanitizer`.
|
||||
* // Optionally override `isSanitizerIn`.
|
||||
* // Optionally override `isSanitizerOut`.
|
||||
* // Optionally override `isSanitizerGuard`.
|
||||
* // Optionally override `isAdditionalTaintStep`.
|
||||
* }
|
||||
* ```
|
||||
*
|
||||
* Then, to query whether there is flow between some `source` and `sink`,
|
||||
* write
|
||||
*
|
||||
* ```ql
|
||||
* exists(MyAnalysisConfiguration cfg | cfg.hasFlow(source, sink))
|
||||
* ```
|
||||
*
|
||||
* Multiple configurations can coexist, but it is unsupported to depend on
|
||||
* another `TaintTracking::Configuration` or a `DataFlow::Configuration` in the
|
||||
* overridden predicates that define sources, sinks, or additional steps.
|
||||
* Instead, the dependency should go to a `TaintTracking2::Configuration` or a
|
||||
* `DataFlow2::Configuration`, `DataFlow3::Configuration`, etc.
|
||||
*/
|
||||
abstract class Configuration extends DataFlow::Configuration {
|
||||
bindingset[this]
|
||||
Configuration() { any() }
|
||||
|
||||
/**
|
||||
* Holds if `source` is a relevant taint source.
|
||||
*
|
||||
* The smaller this predicate is, the faster `hasFlow()` will converge.
|
||||
*/
|
||||
// overridden to provide taint-tracking specific qldoc
|
||||
abstract override predicate isSource(DataFlow::Node source);
|
||||
|
||||
/**
|
||||
* Holds if `sink` is a relevant taint sink.
|
||||
*
|
||||
* The smaller this predicate is, the faster `hasFlow()` will converge.
|
||||
*/
|
||||
// overridden to provide taint-tracking specific qldoc
|
||||
abstract override predicate isSink(DataFlow::Node sink);
|
||||
|
||||
/** Holds if the node `node` is a taint sanitizer. */
|
||||
predicate isSanitizer(DataFlow::Node node) { none() }
|
||||
|
||||
final override predicate isBarrier(DataFlow::Node node) {
|
||||
isSanitizer(node) or
|
||||
defaultTaintSanitizer(node)
|
||||
}
|
||||
|
||||
/** Holds if taint propagation into `node` is prohibited. */
|
||||
predicate isSanitizerIn(DataFlow::Node node) { none() }
|
||||
|
||||
final override predicate isBarrierIn(DataFlow::Node node) { isSanitizerIn(node) }
|
||||
|
||||
/** Holds if taint propagation out of `node` is prohibited. */
|
||||
predicate isSanitizerOut(DataFlow::Node node) { none() }
|
||||
|
||||
final override predicate isBarrierOut(DataFlow::Node node) { isSanitizerOut(node) }
|
||||
|
||||
/** Holds if taint propagation through nodes guarded by `guard` is prohibited. */
|
||||
predicate isSanitizerGuard(DataFlow::BarrierGuard guard) { none() }
|
||||
|
||||
final override predicate isBarrierGuard(DataFlow::BarrierGuard guard) { isSanitizerGuard(guard) }
|
||||
|
||||
/**
|
||||
* Holds if the additional taint propagation step from `node1` to `node2`
|
||||
* must be taken into account in the analysis.
|
||||
*/
|
||||
predicate isAdditionalTaintStep(DataFlow::Node node1, DataFlow::Node node2) { none() }
|
||||
|
||||
final override predicate isAdditionalFlowStep(DataFlow::Node node1, DataFlow::Node node2) {
|
||||
isAdditionalTaintStep(node1, node2) or
|
||||
defaultAdditionalTaintStep(node1, node2)
|
||||
}
|
||||
|
||||
override predicate allowImplicitRead(DataFlow::Node node, DataFlow::Content c) {
|
||||
(this.isSink(node) or this.isAdditionalTaintStep(node, _)) and
|
||||
defaultImplicitTaintRead(node, c)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if taint may flow from `source` to `sink` for this configuration.
|
||||
*/
|
||||
// overridden to provide taint-tracking specific qldoc
|
||||
override predicate hasFlow(DataFlow::Node source, DataFlow::Node sink) {
|
||||
super.hasFlow(source, sink)
|
||||
}
|
||||
}
|
||||
@@ -1,6 +0,0 @@
|
||||
import semmle.python.dataflow.new.internal.TaintTrackingPublic as Public
|
||||
|
||||
module Private {
|
||||
import semmle.python.dataflow.new.DataFlow3::DataFlow3 as DataFlow
|
||||
import semmle.python.dataflow.new.internal.TaintTrackingPrivate
|
||||
}
|
||||
@@ -1,120 +0,0 @@
|
||||
/**
|
||||
* Provides an implementation of global (interprocedural) taint tracking.
|
||||
* This file re-exports the local (intraprocedural) taint-tracking analysis
|
||||
* from `TaintTrackingParameter::Public` and adds a global analysis, mainly
|
||||
* exposed through the `Configuration` class. For some languages, this file
|
||||
* exists in several identical copies, allowing queries to use multiple
|
||||
* `Configuration` classes that depend on each other without introducing
|
||||
* mutual recursion among those configurations.
|
||||
*/
|
||||
|
||||
import TaintTrackingParameter::Public
|
||||
private import TaintTrackingParameter::Private
|
||||
|
||||
/**
|
||||
* A configuration of interprocedural taint tracking analysis. This defines
|
||||
* sources, sinks, and any other configurable aspect of the analysis. Each
|
||||
* use of the taint tracking library must define its own unique extension of
|
||||
* this abstract class.
|
||||
*
|
||||
* A taint-tracking configuration is a special data flow configuration
|
||||
* (`DataFlow::Configuration`) that allows for flow through nodes that do not
|
||||
* necessarily preserve values but are still relevant from a taint tracking
|
||||
* perspective. (For example, string concatenation, where one of the operands
|
||||
* is tainted.)
|
||||
*
|
||||
* To create a configuration, extend this class with a subclass whose
|
||||
* characteristic predicate is a unique singleton string. For example, write
|
||||
*
|
||||
* ```ql
|
||||
* class MyAnalysisConfiguration extends TaintTracking::Configuration {
|
||||
* MyAnalysisConfiguration() { this = "MyAnalysisConfiguration" }
|
||||
* // Override `isSource` and `isSink`.
|
||||
* // Optionally override `isSanitizer`.
|
||||
* // Optionally override `isSanitizerIn`.
|
||||
* // Optionally override `isSanitizerOut`.
|
||||
* // Optionally override `isSanitizerGuard`.
|
||||
* // Optionally override `isAdditionalTaintStep`.
|
||||
* }
|
||||
* ```
|
||||
*
|
||||
* Then, to query whether there is flow between some `source` and `sink`,
|
||||
* write
|
||||
*
|
||||
* ```ql
|
||||
* exists(MyAnalysisConfiguration cfg | cfg.hasFlow(source, sink))
|
||||
* ```
|
||||
*
|
||||
* Multiple configurations can coexist, but it is unsupported to depend on
|
||||
* another `TaintTracking::Configuration` or a `DataFlow::Configuration` in the
|
||||
* overridden predicates that define sources, sinks, or additional steps.
|
||||
* Instead, the dependency should go to a `TaintTracking2::Configuration` or a
|
||||
* `DataFlow2::Configuration`, `DataFlow3::Configuration`, etc.
|
||||
*/
|
||||
abstract class Configuration extends DataFlow::Configuration {
|
||||
bindingset[this]
|
||||
Configuration() { any() }
|
||||
|
||||
/**
|
||||
* Holds if `source` is a relevant taint source.
|
||||
*
|
||||
* The smaller this predicate is, the faster `hasFlow()` will converge.
|
||||
*/
|
||||
// overridden to provide taint-tracking specific qldoc
|
||||
abstract override predicate isSource(DataFlow::Node source);
|
||||
|
||||
/**
|
||||
* Holds if `sink` is a relevant taint sink.
|
||||
*
|
||||
* The smaller this predicate is, the faster `hasFlow()` will converge.
|
||||
*/
|
||||
// overridden to provide taint-tracking specific qldoc
|
||||
abstract override predicate isSink(DataFlow::Node sink);
|
||||
|
||||
/** Holds if the node `node` is a taint sanitizer. */
|
||||
predicate isSanitizer(DataFlow::Node node) { none() }
|
||||
|
||||
final override predicate isBarrier(DataFlow::Node node) {
|
||||
isSanitizer(node) or
|
||||
defaultTaintSanitizer(node)
|
||||
}
|
||||
|
||||
/** Holds if taint propagation into `node` is prohibited. */
|
||||
predicate isSanitizerIn(DataFlow::Node node) { none() }
|
||||
|
||||
final override predicate isBarrierIn(DataFlow::Node node) { isSanitizerIn(node) }
|
||||
|
||||
/** Holds if taint propagation out of `node` is prohibited. */
|
||||
predicate isSanitizerOut(DataFlow::Node node) { none() }
|
||||
|
||||
final override predicate isBarrierOut(DataFlow::Node node) { isSanitizerOut(node) }
|
||||
|
||||
/** Holds if taint propagation through nodes guarded by `guard` is prohibited. */
|
||||
predicate isSanitizerGuard(DataFlow::BarrierGuard guard) { none() }
|
||||
|
||||
final override predicate isBarrierGuard(DataFlow::BarrierGuard guard) { isSanitizerGuard(guard) }
|
||||
|
||||
/**
|
||||
* Holds if the additional taint propagation step from `node1` to `node2`
|
||||
* must be taken into account in the analysis.
|
||||
*/
|
||||
predicate isAdditionalTaintStep(DataFlow::Node node1, DataFlow::Node node2) { none() }
|
||||
|
||||
final override predicate isAdditionalFlowStep(DataFlow::Node node1, DataFlow::Node node2) {
|
||||
isAdditionalTaintStep(node1, node2) or
|
||||
defaultAdditionalTaintStep(node1, node2)
|
||||
}
|
||||
|
||||
override predicate allowImplicitRead(DataFlow::Node node, DataFlow::Content c) {
|
||||
(this.isSink(node) or this.isAdditionalTaintStep(node, _)) and
|
||||
defaultImplicitTaintRead(node, c)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if taint may flow from `source` to `sink` for this configuration.
|
||||
*/
|
||||
// overridden to provide taint-tracking specific qldoc
|
||||
override predicate hasFlow(DataFlow::Node source, DataFlow::Node sink) {
|
||||
super.hasFlow(source, sink)
|
||||
}
|
||||
}
|
||||
@@ -1,6 +0,0 @@
|
||||
import semmle.python.dataflow.new.internal.TaintTrackingPublic as Public
|
||||
|
||||
module Private {
|
||||
import semmle.python.dataflow.new.DataFlow4::DataFlow4 as DataFlow
|
||||
import semmle.python.dataflow.new.internal.TaintTrackingPrivate
|
||||
}
|
||||
@@ -1,141 +0,0 @@
|
||||
import python
|
||||
import semmle.python.dataflow.TaintTracking
|
||||
private import semmle.python.objects.ObjectInternal
|
||||
private import semmle.python.dataflow.Implementation
|
||||
|
||||
module TaintTracking {
|
||||
class Source = TaintSource;
|
||||
|
||||
class Sink = TaintSink;
|
||||
|
||||
class Extension = DataFlowExtension::DataFlowNode;
|
||||
|
||||
class PathSource = TaintTrackingNode;
|
||||
|
||||
class PathSink = TaintTrackingNode;
|
||||
|
||||
abstract class Configuration extends string {
|
||||
/* Required to prevent compiler warning */
|
||||
bindingset[this]
|
||||
Configuration() { this = this }
|
||||
|
||||
/* Old implementation API */
|
||||
predicate isSource(Source src) { none() }
|
||||
|
||||
predicate isSink(Sink sink) { none() }
|
||||
|
||||
predicate isSanitizer(Sanitizer sanitizer) { none() }
|
||||
|
||||
predicate isExtension(Extension extension) { none() }
|
||||
|
||||
/* New implementation API */
|
||||
/**
|
||||
* Holds if `src` is a source of taint of `kind` that is relevant
|
||||
* for this configuration.
|
||||
*/
|
||||
predicate isSource(DataFlow::Node src, TaintKind kind) {
|
||||
exists(TaintSource taintSrc |
|
||||
this.isSource(taintSrc) and
|
||||
src.asCfgNode() = taintSrc and
|
||||
taintSrc.isSourceOf(kind)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `sink` is a sink of taint of `kind` that is relevant
|
||||
* for this configuration.
|
||||
*/
|
||||
predicate isSink(DataFlow::Node sink, TaintKind kind) {
|
||||
exists(TaintSink taintSink |
|
||||
this.isSink(taintSink) and
|
||||
sink.asCfgNode() = taintSink and
|
||||
taintSink.sinks(kind)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `src -> dest` should be considered as a flow edge
|
||||
* in addition to standard data flow edges.
|
||||
*/
|
||||
predicate isAdditionalFlowStep(DataFlow::Node src, DataFlow::Node dest) { none() }
|
||||
|
||||
/**
|
||||
* Holds if `src -> dest` is a flow edge converting taint from `srckind` to `destkind`.
|
||||
*/
|
||||
predicate isAdditionalFlowStep(
|
||||
DataFlow::Node src, DataFlow::Node dest, TaintKind srckind, TaintKind destkind
|
||||
) {
|
||||
none()
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `node` should be considered as a barrier to flow of any kind.
|
||||
*/
|
||||
predicate isBarrier(DataFlow::Node node) { none() }
|
||||
|
||||
/**
|
||||
* Holds if `node` should be considered as a barrier to flow of `kind`.
|
||||
*/
|
||||
predicate isBarrier(DataFlow::Node node, TaintKind kind) {
|
||||
exists(Sanitizer sanitizer | this.isSanitizer(sanitizer) |
|
||||
sanitizer.sanitizingNode(kind, node.asCfgNode())
|
||||
or
|
||||
sanitizer.sanitizingEdge(kind, node.asVariable())
|
||||
or
|
||||
sanitizer.sanitizingSingleEdge(kind, node.asVariable())
|
||||
or
|
||||
sanitizer.sanitizingDefinition(kind, node.asVariable())
|
||||
or
|
||||
exists(MethodCallsiteRefinement call, FunctionObject callee |
|
||||
call = node.asVariable().getDefinition() and
|
||||
callee.getACall() = call.getCall() and
|
||||
sanitizer.sanitizingCall(kind, callee)
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if flow from `src` to `dest` is prohibited.
|
||||
*/
|
||||
predicate isBarrierEdge(DataFlow::Node src, DataFlow::Node dest) { none() }
|
||||
|
||||
/**
|
||||
* Holds if control flow from `test` along the `isTrue` edge is prohibited.
|
||||
*/
|
||||
predicate isBarrierTest(ControlFlowNode test, boolean isTrue) { none() }
|
||||
|
||||
/**
|
||||
* Holds if flow from `src` to `dest` is prohibited when the incoming taint is `srckind` and the outgoing taint is `destkind`.
|
||||
* Note that `srckind` and `destkind` can be the same.
|
||||
*/
|
||||
predicate isBarrierEdge(
|
||||
DataFlow::Node src, DataFlow::Node dest, TaintKind srckind, TaintKind destkind
|
||||
) {
|
||||
none()
|
||||
}
|
||||
|
||||
/* Common query API */
|
||||
predicate hasFlowPath(PathSource src, PathSink sink) {
|
||||
this.(TaintTrackingImplementation).hasFlowPath(src, sink)
|
||||
}
|
||||
|
||||
/* Old query API */
|
||||
/* deprecated */
|
||||
deprecated predicate hasFlow(Source src, Sink sink) {
|
||||
exists(PathSource psrc, PathSink psink |
|
||||
this.hasFlowPath(psrc, psink) and
|
||||
src = psrc.getNode().asCfgNode() and
|
||||
sink = psink.getNode().asCfgNode()
|
||||
)
|
||||
}
|
||||
|
||||
/* New query API */
|
||||
predicate hasSimpleFlow(DataFlow::Node src, DataFlow::Node sink) {
|
||||
exists(PathSource psrc, PathSink psink |
|
||||
this.hasFlowPath(psrc, psink) and
|
||||
src = psrc.getNode() and
|
||||
sink = psink.getNode()
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1 +0,0 @@
|
||||
import semmle.python.dataflow.TaintTracking
|
||||
@@ -1,19 +0,0 @@
|
||||
import python
|
||||
import semmle.python.dataflow.TaintTracking
|
||||
|
||||
class OpenFile extends TaintKind {
|
||||
OpenFile() { this = "file.open" }
|
||||
|
||||
override string repr() { result = "an open file" }
|
||||
}
|
||||
|
||||
class OpenFileConfiguration extends TaintTracking::Configuration {
|
||||
OpenFileConfiguration() { this = "Open file configuration" }
|
||||
|
||||
override predicate isSource(DataFlow::Node src, TaintKind kind) {
|
||||
src.asCfgNode() = Value::named("open").getACall() and
|
||||
kind instanceof OpenFile
|
||||
}
|
||||
|
||||
override predicate isSink(DataFlow::Node sink, TaintKind kind) { none() }
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,68 +0,0 @@
|
||||
import semmle.python.dataflow.TaintTracking
|
||||
private import semmle.python.objects.ObjectInternal
|
||||
import semmle.python.dataflow.Implementation
|
||||
|
||||
/* For backwards compatibility -- Use `TaintTrackingContext` instead. */
|
||||
deprecated class CallContext extends TaintTrackingContext {
|
||||
TaintTrackingContext getCallee(CallNode call) { result.getCaller(call) = this }
|
||||
|
||||
predicate appliesToScope(Scope s) {
|
||||
exists(PythonFunctionObjectInternal func, TaintKind param, AttributePath path, int n |
|
||||
this = TParamContext(param, path, n) and
|
||||
exists(TaintTrackingImplementation impl |
|
||||
impl.callWithTaintedArgument(_, _, _, func, n, path, param) and
|
||||
s = func.getScope()
|
||||
)
|
||||
)
|
||||
or
|
||||
this.isTop()
|
||||
}
|
||||
}
|
||||
|
||||
/* Backwards compatibility with config-less taint-tracking */
|
||||
private class LegacyConfiguration extends TaintTracking::Configuration {
|
||||
LegacyConfiguration() {
|
||||
/* A name that won't be accidentally chosen by users */
|
||||
this = "Semmle: Internal legacy configuration"
|
||||
}
|
||||
|
||||
override predicate isSource(TaintSource src) { src = src }
|
||||
|
||||
override predicate isSink(TaintSink sink) { sink = sink }
|
||||
|
||||
override predicate isSanitizer(Sanitizer sanitizer) { sanitizer = sanitizer }
|
||||
|
||||
override predicate isAdditionalFlowStep(DataFlow::Node src, DataFlow::Node dest) {
|
||||
exists(DataFlowExtension::DataFlowNode legacyExtension | src.asCfgNode() = legacyExtension |
|
||||
dest.asCfgNode() = legacyExtension.getASuccessorNode()
|
||||
or
|
||||
dest.asVariable() = legacyExtension.getASuccessorVariable()
|
||||
or
|
||||
dest.asCfgNode() = legacyExtension.getAReturnSuccessorNode(_)
|
||||
or
|
||||
dest.asCfgNode() = legacyExtension.getACalleeSuccessorNode(_)
|
||||
)
|
||||
}
|
||||
|
||||
override predicate isAdditionalFlowStep(
|
||||
DataFlow::Node src, DataFlow::Node dest, TaintKind srckind, TaintKind destkind
|
||||
) {
|
||||
exists(DataFlowExtension::DataFlowNode legacyExtension | src.asCfgNode() = legacyExtension |
|
||||
dest.asCfgNode() = legacyExtension.getASuccessorNode(srckind, destkind)
|
||||
)
|
||||
}
|
||||
|
||||
override predicate isBarrierEdge(DataFlow::Node src, DataFlow::Node dest) {
|
||||
(
|
||||
exists(DataFlowExtension::DataFlowVariable legacyExtension |
|
||||
src.asVariable() = legacyExtension and
|
||||
legacyExtension.prunedSuccessor(dest.asVariable())
|
||||
)
|
||||
or
|
||||
exists(DataFlowExtension::DataFlowNode legacyExtension |
|
||||
src.asCfgNode() = legacyExtension and
|
||||
legacyExtension.prunedSuccessor(dest.asCfgNode())
|
||||
)
|
||||
)
|
||||
}
|
||||
}
|
||||
@@ -1,174 +0,0 @@
|
||||
/**
|
||||
* Provides classes and predicates for tracking global state across the control flow and call graphs.
|
||||
*
|
||||
* NOTE: State tracking tracks both whether a state may apply to a given node in a given context *and*
|
||||
* whether it may not apply.
|
||||
* That `state.appliesTo(f, ctx)` holds implies nothing about whether `state.mayNotApplyTo(f, ctx)` holds.
|
||||
* Neither may hold which merely means that `f` with context `ctx` is not reached during the analysis.
|
||||
* Conversely, both may hold, which means that `state` may or may not apply depending on how `f` was reached.
|
||||
*/
|
||||
|
||||
import python
|
||||
private import semmle.python.pointsto.Base
|
||||
private import semmle.python.pointsto.PointsTo
|
||||
private import semmle.python.pointsto.PointsToContext
|
||||
private import semmle.python.objects.ObjectInternal
|
||||
|
||||
/** A state that should be tracked. */
|
||||
abstract class TrackableState extends string {
|
||||
bindingset[this]
|
||||
TrackableState() { this = this }
|
||||
|
||||
/** Holds if this state may apply to the control flow node `f`, regardless of the context. */
|
||||
final predicate appliesTo(ControlFlowNode f) { this.appliesTo(f, _) }
|
||||
|
||||
/** Holds if this state may not apply to the control flow node `f`, given the context `ctx`. */
|
||||
final predicate appliesTo(ControlFlowNode f, Context ctx) {
|
||||
StateTracking::appliesToNode(this, f, ctx, true)
|
||||
}
|
||||
|
||||
/** Holds if this state may apply to the control flow node `f`, given the context `ctx`. */
|
||||
final predicate mayNotApplyTo(ControlFlowNode f, Context ctx) {
|
||||
StateTracking::appliesToNode(this, f, ctx, false)
|
||||
}
|
||||
|
||||
/** Holds if this state may apply to the control flow node `f`, regardless of the context. */
|
||||
final predicate mayNotApplyTo(ControlFlowNode f) { this.mayNotApplyTo(f, _) }
|
||||
|
||||
/** Holds if `test` shows value to be untainted with `taint`, given the context `ctx`. */
|
||||
predicate testsFor(PyEdgeRefinement test, Context ctx, boolean sense) {
|
||||
ctx.appliesToScope(test.getScope()) and this.testsFor(test, sense)
|
||||
}
|
||||
|
||||
/** Holds if `test` shows value to be untainted with `taint` */
|
||||
predicate testsFor(PyEdgeRefinement test, boolean sense) { none() }
|
||||
|
||||
/**
|
||||
* Holds if state starts at `f`.
|
||||
* Either this predicate or `startsAt(ControlFlowNode f, Context ctx)`
|
||||
* should be overriden by sub-classes.
|
||||
*/
|
||||
predicate startsAt(ControlFlowNode f) { none() }
|
||||
|
||||
/**
|
||||
* Holds if state starts at `f` given context `ctx`.
|
||||
* Either this predicate or `startsAt(ControlFlowNode f)`
|
||||
* should be overriden by sub-classes.
|
||||
*/
|
||||
pragma[noinline]
|
||||
predicate startsAt(ControlFlowNode f, Context ctx) { ctx.appliesTo(f) and this.startsAt(f) }
|
||||
|
||||
/**
|
||||
* Holds if state ends at `f`.
|
||||
* Either this predicate or `endsAt(ControlFlowNode f, Context ctx)`
|
||||
* may be overriden by sub-classes.
|
||||
*/
|
||||
predicate endsAt(ControlFlowNode f) { none() }
|
||||
|
||||
/**
|
||||
* Holds if state ends at `f` given context `ctx`.
|
||||
* Either this predicate or `endsAt(ControlFlowNode f)`
|
||||
* may be overriden by sub-classes.
|
||||
*/
|
||||
pragma[noinline]
|
||||
predicate endsAt(ControlFlowNode f, Context ctx) { ctx.appliesTo(f) and this.endsAt(f) }
|
||||
}
|
||||
|
||||
module StateTracking {
|
||||
private predicate not_allowed(TrackableState state, ControlFlowNode f, Context ctx, boolean sense) {
|
||||
state.endsAt(f, ctx) and sense = true
|
||||
or
|
||||
state.startsAt(f, ctx) and sense = false
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `state` may apply (with `sense` = true) or may not apply (with `sense` = false) to
|
||||
* control flow node `f` given the context `ctx`.
|
||||
*/
|
||||
predicate appliesToNode(TrackableState state, ControlFlowNode f, Context ctx, boolean sense) {
|
||||
state.endsAt(f, ctx) and sense = false
|
||||
or
|
||||
state.startsAt(f, ctx) and sense = true
|
||||
or
|
||||
not not_allowed(state, f, ctx, sense) and
|
||||
(
|
||||
exists(BasicBlock b |
|
||||
/* First node in a block */
|
||||
f = b.getNode(0) and appliesAtBlockStart(state, b, ctx, sense)
|
||||
or
|
||||
/* Other nodes in block, except trackable calls */
|
||||
exists(int n |
|
||||
f = b.getNode(n) and
|
||||
appliesToNode(state, b.getNode(n - 1), ctx, sense) and
|
||||
not exists(PythonFunctionObjectInternal func, Context callee |
|
||||
callee.fromCall(f, func, ctx)
|
||||
)
|
||||
)
|
||||
)
|
||||
or
|
||||
/* Function entry via call */
|
||||
exists(PythonFunctionObjectInternal func, CallNode call, Context caller |
|
||||
ctx.fromCall(call, func, caller) and
|
||||
func.getScope().getEntryNode() = f and
|
||||
appliesToNode(state, call.getAPredecessor(), caller, sense)
|
||||
)
|
||||
or
|
||||
/* Function return */
|
||||
exists(PythonFunctionObjectInternal func, Context callee |
|
||||
callee.fromCall(f, func, ctx) and
|
||||
appliesToNode(state, func.getScope().getANormalExit(), callee, sense)
|
||||
)
|
||||
or
|
||||
/* Other scope entries */
|
||||
exists(Scope s |
|
||||
s.getEntryNode() = f and
|
||||
ctx.appliesToScope(s)
|
||||
|
|
||||
not exists(Scope pred | pred.precedes(s)) and
|
||||
(ctx.isImport() or ctx.isRuntime()) and
|
||||
sense = false
|
||||
or
|
||||
exists(Scope pred, Context pred_ctx |
|
||||
appliesToNode(state, pred.getANormalExit(), pred_ctx, sense) and
|
||||
pred.precedes(s) and
|
||||
ctx.isRuntime()
|
||||
|
|
||||
pred_ctx.isRuntime() or pred_ctx.isImport()
|
||||
)
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `state` may apply (with `sense` = true) or may not apply (with `sense` = false) at the
|
||||
* start of basic block `block` given the context `ctx`.
|
||||
*/
|
||||
private predicate appliesAtBlockStart(
|
||||
TrackableState state, BasicBlock block, Context ctx, boolean sense
|
||||
) {
|
||||
exists(PyEdgeRefinement test |
|
||||
test.getSuccessor() = block and
|
||||
state.testsFor(test, ctx, sense)
|
||||
)
|
||||
or
|
||||
exists(BasicBlock pred |
|
||||
pred.getASuccessor() = block and
|
||||
appliesAtBlockEnd(state, pred, ctx, sense) and
|
||||
not exists(PyEdgeRefinement test |
|
||||
test.getPredecessor() = pred and
|
||||
test.getSuccessor() = block and
|
||||
state.testsFor(test, sense.booleanNot())
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `state` may apply (with `sense` = true) or may not apply (with `sense` = false) at the
|
||||
* end of basic block `block` given the context `ctx`.
|
||||
*/
|
||||
private predicate appliesAtBlockEnd(
|
||||
TrackableState state, BasicBlock block, Context ctx, boolean sense
|
||||
) {
|
||||
appliesToNode(state, block.getLastNode(), ctx, sense)
|
||||
}
|
||||
}
|
||||
@@ -1,751 +0,0 @@
|
||||
/**
|
||||
* # Python Taint Tracking Library
|
||||
*
|
||||
* The taint tracking library is described in three parts.
|
||||
*
|
||||
* 1. Specification of kinds, sources, sinks and flows.
|
||||
* 2. The high level query API
|
||||
* 3. The implementation.
|
||||
*
|
||||
*
|
||||
* ## Specification
|
||||
*
|
||||
* There are four parts to the specification of a taint tracking query.
|
||||
* These are:
|
||||
*
|
||||
* 1. Kinds
|
||||
*
|
||||
* The Python taint tracking library supports arbitrary kinds of taint.
|
||||
* This is useful where you want to track something related to "taint", but that is in itself not dangerous.
|
||||
* For example, we might want to track the flow of request objects.
|
||||
* Request objects are not in themselves tainted, but they do contain tainted data.
|
||||
* For example, the length or timestamp of a request may not pose a risk, but the GET or POST string probably do.
|
||||
* So, we would want to track request objects distinctly from the request data in the GET or POST field.
|
||||
*
|
||||
* Kinds can also specify additional flow steps, but we recommend using the `DataFlowExtension` module,
|
||||
* which is less likely to cause issues with unwanted recursion.
|
||||
*
|
||||
* 2. Sources
|
||||
*
|
||||
* Sources of taint can be added by importing a predefined sub-type of `TaintSource`, or by defining new ones.
|
||||
*
|
||||
* 3. Sinks (or vulnerabilities)
|
||||
*
|
||||
* Sinks can be added by importing a predefined sub-type of `TaintSink`, or by defining new ones.
|
||||
*
|
||||
* 4. Flow extensions
|
||||
*
|
||||
* Additional flow can be added by importing predefined sub-types of `DataFlowExtension::DataFlowNode`
|
||||
* or `DataFlowExtension::DataFlowVariable` or by defining new ones.
|
||||
*
|
||||
*
|
||||
* ## The high-level query API
|
||||
*
|
||||
* The `TaintedNode` fully describes the taint flow graph.
|
||||
* The full graph can be expressed as:
|
||||
*
|
||||
* ```ql
|
||||
* from TaintedNode n, TaintedNode s
|
||||
* where s = n.getASuccessor()
|
||||
* select n, s
|
||||
* ```
|
||||
*
|
||||
* The source -> sink relation can be expressed either using `TaintedNode`:
|
||||
* ```ql
|
||||
* from TaintedNode src, TaintedNode sink
|
||||
* where src.isSource() and sink.isSink() and src.getASuccessor*() = sink
|
||||
* select src, sink
|
||||
* ```
|
||||
* or, using the specification API:
|
||||
* ```ql
|
||||
* from TaintSource src, TaintSink sink
|
||||
* where src.flowsToSink(sink)
|
||||
* select src, sink
|
||||
* ```
|
||||
*
|
||||
* ## The implementation
|
||||
*
|
||||
* The data-flow graph used by the taint-tracking library is the one created by the points-to analysis,
|
||||
* and consists of the base data-flow graph defined in `semmle/python/essa/Essa.qll`
|
||||
* enhanced with precise variable flows, call graph and type information.
|
||||
* This graph is then enhanced with additional flows as specified above.
|
||||
* Since the call graph and points-to information is context sensitive, the taint graph must also be context sensitive.
|
||||
*
|
||||
* The taint graph is a directed graph where each node consists of a
|
||||
* `(CFG node, context, taint)` triple although it could be thought of more naturally
|
||||
* as a number of distinct graphs, one for each input taint-kind consisting of data flow nodes,
|
||||
* `(CFG node, context)` pairs, labelled with their `taint`.
|
||||
*
|
||||
* The `TrackedValue` used in the implementation is not the taint kind specified by the user,
|
||||
* but describes both the kind of taint and how that taint relates to any object referred to by a data-flow graph node or edge.
|
||||
* Currently, only two types of `taint` are supported: simple taint, where the object is actually tainted;
|
||||
* and attribute taint where a named attribute of the referred object is tainted.
|
||||
*
|
||||
* Support for tainted members (both specific members of tuples and the like,
|
||||
* and generic members for mutable collections) are likely to be added in the near future and other forms are possible.
|
||||
* The types of taints are hard-wired with no user-visible extension method at the moment.
|
||||
*/
|
||||
|
||||
import python
|
||||
private import semmle.python.pointsto.Filters as Filters
|
||||
private import semmle.python.objects.ObjectInternal
|
||||
private import semmle.python.dataflow.Implementation
|
||||
import semmle.python.dataflow.Configuration
|
||||
|
||||
/**
|
||||
* A 'kind' of taint. This may be almost anything,
|
||||
* but it is typically something like a "user-defined string".
|
||||
* Examples include, data from a http request object,
|
||||
* data from an SMS or other mobile data source,
|
||||
* or, for a super secure system, environment variables or
|
||||
* the local file system.
|
||||
*/
|
||||
abstract class TaintKind extends string {
|
||||
bindingset[this]
|
||||
TaintKind() { any() }
|
||||
|
||||
/**
|
||||
* Gets the kind of taint that the named attribute will have if an object is tainted with this taint.
|
||||
* In other words, if `x` has this kind of taint then it implies that `x.name`
|
||||
* has `result` kind of taint.
|
||||
*/
|
||||
TaintKind getTaintOfAttribute(string name) { none() }
|
||||
|
||||
/**
|
||||
* Gets the kind of taint results from calling the named method if an object is tainted with this taint.
|
||||
* In other words, if `x` has this kind of taint then it implies that `x.name()`
|
||||
* has `result` kind of taint.
|
||||
*/
|
||||
TaintKind getTaintOfMethodResult(string name) { none() }
|
||||
|
||||
/**
|
||||
* Gets the taint resulting from the flow step `fromnode` -> `tonode`.
|
||||
*/
|
||||
TaintKind getTaintForFlowStep(ControlFlowNode fromnode, ControlFlowNode tonode) { none() }
|
||||
|
||||
/**
|
||||
* Gets the taint resulting from the flow step `fromnode` -> `tonode`, with `edgeLabel`
|
||||
*/
|
||||
TaintKind getTaintForFlowStep(ControlFlowNode fromnode, ControlFlowNode tonode, string edgeLabel) {
|
||||
result = this.getTaintForFlowStep(fromnode, tonode) and
|
||||
edgeLabel = "custom taint flow step for " + this
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if this kind of taint "taints" `expr`.
|
||||
*/
|
||||
final predicate taints(ControlFlowNode expr) {
|
||||
exists(TaintedNode n | n.getTaintKind() = this and n.getCfgNode() = expr)
|
||||
}
|
||||
|
||||
/** DEPRECATED -- Use getType() instead */
|
||||
deprecated ClassObject getClass() { none() }
|
||||
|
||||
/**
|
||||
* Gets the class of this kind of taint.
|
||||
* For example, if this were a kind of string taint
|
||||
* the `result` would be `theStrType()`.
|
||||
*/
|
||||
ClassValue getType() { none() }
|
||||
|
||||
/**
|
||||
* Gets the boolean values (may be one, neither, or both) that
|
||||
* may result from the Python expression `bool(this)`
|
||||
*/
|
||||
boolean booleanValue() {
|
||||
/*
|
||||
* Default to true as the vast majority of taint is strings and
|
||||
* the empty string is almost always benign.
|
||||
*/
|
||||
|
||||
result = true
|
||||
}
|
||||
|
||||
string repr() { result = this }
|
||||
|
||||
/**
|
||||
* Gets the taint resulting from iterating over this kind of taint.
|
||||
* For example iterating over a text file produces lines. So iterating
|
||||
* over a tainted file would result in tainted strings
|
||||
*/
|
||||
TaintKind getTaintForIteration() { none() }
|
||||
|
||||
predicate flowStep(DataFlow::Node fromnode, DataFlow::Node tonode, string edgeLabel) {
|
||||
exists(DataFlowExtension::DataFlowVariable v |
|
||||
v = fromnode.asVariable() and
|
||||
v.getASuccessorVariable() = tonode.asVariable()
|
||||
) and
|
||||
edgeLabel = "custom taint variable step"
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Alias of `TaintKind`, so the two types can be used interchangeably.
|
||||
*/
|
||||
class FlowLabel = TaintKind;
|
||||
|
||||
/**
|
||||
* Taint kinds representing collections of other taint kind.
|
||||
* We use `{kind}` to represent a mapping of string to `kind` and
|
||||
* `[kind]` to represent a flat collection of `kind`.
|
||||
* The use of `{` and `[` is chosen to reflect dict and list literals
|
||||
* in Python. We choose a single character prefix and suffix for simplicity
|
||||
* and ease of preventing infinite recursion.
|
||||
*/
|
||||
abstract class CollectionKind extends TaintKind {
|
||||
bindingset[this]
|
||||
CollectionKind() {
|
||||
(this.charAt(0) = "[" or this.charAt(0) = "{") and
|
||||
/* Prevent any collection kinds more than 2 deep */
|
||||
not this.charAt(2) = "[" and
|
||||
not this.charAt(2) = "{"
|
||||
}
|
||||
|
||||
abstract TaintKind getMember();
|
||||
|
||||
abstract predicate flowFromMember(DataFlow::Node fromnode, DataFlow::Node tonode);
|
||||
|
||||
abstract predicate flowToMember(DataFlow::Node fromnode, DataFlow::Node tonode);
|
||||
}
|
||||
|
||||
/**
|
||||
* A taint kind representing a flat collections of kinds.
|
||||
* Typically a sequence, but can include sets.
|
||||
*/
|
||||
class SequenceKind extends CollectionKind {
|
||||
TaintKind itemKind;
|
||||
|
||||
SequenceKind() { this = "[" + itemKind + "]" }
|
||||
|
||||
TaintKind getItem() { result = itemKind }
|
||||
|
||||
override TaintKind getTaintForFlowStep(ControlFlowNode fromnode, ControlFlowNode tonode) {
|
||||
exists(BinaryExprNode mod |
|
||||
mod = tonode and
|
||||
mod.getOp() instanceof Mod and
|
||||
mod.getAnOperand() = fromnode and
|
||||
result = this.getItem() and
|
||||
result.getType() = ObjectInternal::builtin("str")
|
||||
)
|
||||
}
|
||||
|
||||
override TaintKind getTaintOfMethodResult(string name) {
|
||||
name = "pop" and result = this.getItem()
|
||||
}
|
||||
|
||||
override string repr() { result = "sequence of " + itemKind }
|
||||
|
||||
override TaintKind getTaintForIteration() { result = itemKind }
|
||||
|
||||
override TaintKind getMember() { result = itemKind }
|
||||
|
||||
override predicate flowFromMember(DataFlow::Node fromnode, DataFlow::Node tonode) {
|
||||
sequence_construct(fromnode.asCfgNode(), tonode.asCfgNode())
|
||||
}
|
||||
|
||||
override predicate flowToMember(DataFlow::Node fromnode, DataFlow::Node tonode) {
|
||||
SequenceKind::itemFlowStep(fromnode.asCfgNode(), tonode.asCfgNode())
|
||||
}
|
||||
}
|
||||
|
||||
module SequenceKind {
|
||||
predicate flowStep(ControlFlowNode fromnode, ControlFlowNode tonode, string edgeLabel) {
|
||||
tonode.(BinaryExprNode).getAnOperand() = fromnode and edgeLabel = "binary operation"
|
||||
or
|
||||
Implementation::copyCall(fromnode, tonode) and
|
||||
edgeLabel = "dict copy"
|
||||
or
|
||||
sequence_call(fromnode, tonode) and edgeLabel = "sequence construction"
|
||||
or
|
||||
subscript_slice(fromnode, tonode) and edgeLabel = "slicing"
|
||||
}
|
||||
|
||||
predicate itemFlowStep(ControlFlowNode fromnode, ControlFlowNode tonode) {
|
||||
subscript_index(fromnode, tonode)
|
||||
}
|
||||
}
|
||||
|
||||
module DictKind {
|
||||
predicate flowStep(ControlFlowNode fromnode, ControlFlowNode tonode, string edgeLabel) {
|
||||
Implementation::copyCall(fromnode, tonode) and
|
||||
edgeLabel = "dict copy"
|
||||
or
|
||||
tonode.(CallNode).getFunction().pointsTo(ObjectInternal::builtin("dict")) and
|
||||
tonode.(CallNode).getArg(0) = fromnode and
|
||||
edgeLabel = "dict() call"
|
||||
}
|
||||
}
|
||||
|
||||
/* Helper for sequence flow steps */
|
||||
pragma[noinline]
|
||||
private predicate subscript_index(ControlFlowNode obj, SubscriptNode sub) {
|
||||
sub.isLoad() and
|
||||
sub.getObject() = obj and
|
||||
not sub.getNode().getIndex() instanceof Slice
|
||||
}
|
||||
|
||||
pragma[noinline]
|
||||
private predicate subscript_slice(ControlFlowNode obj, SubscriptNode sub) {
|
||||
sub.isLoad() and
|
||||
sub.getObject() = obj and
|
||||
sub.getNode().getIndex() instanceof Slice
|
||||
}
|
||||
|
||||
/**
|
||||
* A taint kind representing a mapping of objects to kinds.
|
||||
* Typically a dict, but can include other mappings.
|
||||
*/
|
||||
class DictKind extends CollectionKind {
|
||||
TaintKind valueKind;
|
||||
|
||||
DictKind() { this = "{" + valueKind + "}" }
|
||||
|
||||
TaintKind getValue() { result = valueKind }
|
||||
|
||||
override TaintKind getTaintOfMethodResult(string name) {
|
||||
name = "get" and result = valueKind
|
||||
or
|
||||
name = "values" and result.(SequenceKind).getItem() = valueKind
|
||||
or
|
||||
name = "itervalues" and result.(SequenceKind).getItem() = valueKind
|
||||
}
|
||||
|
||||
override string repr() { result = "dict of " + valueKind }
|
||||
|
||||
override TaintKind getMember() { result = valueKind }
|
||||
|
||||
override predicate flowFromMember(DataFlow::Node fromnode, DataFlow::Node tonode) {
|
||||
dict_construct(fromnode.asCfgNode(), tonode.asCfgNode())
|
||||
}
|
||||
|
||||
override predicate flowToMember(DataFlow::Node fromnode, DataFlow::Node tonode) {
|
||||
subscript_index(fromnode.asCfgNode(), tonode.asCfgNode())
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A type of sanitizer of untrusted data.
|
||||
* Examples include sanitizers for http responses, for DB access or for shell commands.
|
||||
* Usually a sanitizer can only sanitize data for one particular use.
|
||||
* For example, a sanitizer for DB commands would not be safe to use for http responses.
|
||||
*/
|
||||
abstract class Sanitizer extends string {
|
||||
bindingset[this]
|
||||
Sanitizer() { any() }
|
||||
|
||||
/** Holds if `taint` cannot flow through `node`. */
|
||||
predicate sanitizingNode(TaintKind taint, ControlFlowNode node) { none() }
|
||||
|
||||
/** Holds if `call` removes removes the `taint` */
|
||||
predicate sanitizingCall(TaintKind taint, FunctionObject callee) { none() }
|
||||
|
||||
/** Holds if `test` shows value to be untainted with `taint` */
|
||||
predicate sanitizingEdge(TaintKind taint, PyEdgeRefinement test) { none() }
|
||||
|
||||
/** Holds if `test` shows value to be untainted with `taint` */
|
||||
predicate sanitizingSingleEdge(TaintKind taint, SingleSuccessorGuard test) { none() }
|
||||
|
||||
/** Holds if `def` shows value to be untainted with `taint` */
|
||||
predicate sanitizingDefinition(TaintKind taint, EssaDefinition def) { none() }
|
||||
}
|
||||
|
||||
/**
|
||||
* A source of taintedness.
|
||||
* Users of the taint tracking library should override this
|
||||
* class to provide their own sources.
|
||||
*/
|
||||
abstract class TaintSource extends @py_flow_node {
|
||||
/** Gets a textual representation of this element. */
|
||||
string toString() { result = "Taint source" }
|
||||
|
||||
/**
|
||||
* Holds if `this` is a source of taint kind `kind`
|
||||
*
|
||||
* This must be overridden by subclasses to specify sources of taint.
|
||||
*
|
||||
* The smaller this predicate is, the faster `Taint.flowsTo()` will converge.
|
||||
*/
|
||||
abstract predicate isSourceOf(TaintKind kind);
|
||||
|
||||
/**
|
||||
* Holds if `this` is a source of taint kind `kind` for the given context.
|
||||
* Generally, this should not need to be overridden; overriding `isSourceOf(kind)` should be sufficient.
|
||||
*
|
||||
* The smaller this predicate is, the faster `Taint.flowsTo()` will converge.
|
||||
*/
|
||||
predicate isSourceOf(TaintKind kind, TaintTrackingContext context) {
|
||||
context.isTop() and this.isSourceOf(kind)
|
||||
}
|
||||
|
||||
Location getLocation() { result = this.(ControlFlowNode).getLocation() }
|
||||
|
||||
/**
|
||||
* Holds if this element is at the specified location.
|
||||
* The location spans column `startcolumn` of line `startline` to
|
||||
* column `endcolumn` of line `endline` in file `filepath`.
|
||||
* For more information, see
|
||||
* [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html).
|
||||
*/
|
||||
predicate hasLocationInfo(
|
||||
string filepath, int startline, int startcolumn, int endline, int endcolumn
|
||||
) {
|
||||
this.getLocation().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
|
||||
}
|
||||
|
||||
/** Gets a TaintedNode for this taint source */
|
||||
TaintedNode getATaintNode() {
|
||||
result.getCfgNode() = this and
|
||||
this.isSourceOf(result.getTaintKind(), result.getContext()) and
|
||||
result.getPath().noAttribute()
|
||||
}
|
||||
|
||||
/** Holds if taint can flow from this source to sink `sink` */
|
||||
final predicate flowsToSink(TaintKind srckind, TaintSink sink) {
|
||||
exists(TaintedNode src, TaintedNode tsink |
|
||||
src = this.getATaintNode() and
|
||||
src.getTaintKind() = srckind and
|
||||
src.flowsTo(tsink) and
|
||||
this.isSourceOf(srckind, _) and
|
||||
sink = tsink.getCfgNode() and
|
||||
sink.sinks(tsink.getTaintKind()) and
|
||||
tsink.getPath().noAttribute() and
|
||||
tsink.isSink()
|
||||
)
|
||||
}
|
||||
|
||||
/** Holds if taint can flow from this source to taint sink `sink` */
|
||||
final predicate flowsToSink(TaintSink sink) { this.flowsToSink(_, sink) }
|
||||
}
|
||||
|
||||
/**
|
||||
* Warning: Advanced feature. Users are strongly recommended to use `TaintSource` instead.
|
||||
* A source of taintedness on the ESSA data-flow graph.
|
||||
* Users of the taint tracking library can override this
|
||||
* class to provide their own sources on the ESSA graph.
|
||||
*/
|
||||
abstract class TaintedDefinition extends EssaNodeDefinition {
|
||||
/**
|
||||
* Holds if `this` is a source of taint kind `kind`
|
||||
*
|
||||
* This should be overridden by subclasses to specify sources of taint.
|
||||
*
|
||||
* The smaller this predicate is, the faster `Taint.flowsTo()` will converge.
|
||||
*/
|
||||
abstract predicate isSourceOf(TaintKind kind);
|
||||
|
||||
/**
|
||||
* Holds if `this` is a source of taint kind `kind` for the given context.
|
||||
* Generally, this should not need to be overridden; overriding `isSourceOf(kind)` should be sufficient.
|
||||
*
|
||||
* The smaller this predicate is, the faster `Taint.flowsTo()` will converge.
|
||||
*/
|
||||
predicate isSourceOf(TaintKind kind, TaintTrackingContext context) {
|
||||
context.isTop() and this.isSourceOf(kind)
|
||||
}
|
||||
}
|
||||
|
||||
private class DictUpdate extends DataFlowExtension::DataFlowNode {
|
||||
MethodCallsiteRefinement call;
|
||||
|
||||
DictUpdate() {
|
||||
exists(CallNode c | c = call.getCall() |
|
||||
c.getFunction().(AttrNode).getName() = "update" and
|
||||
c.getArg(0) = this
|
||||
)
|
||||
}
|
||||
|
||||
override EssaVariable getASuccessorVariable() { call.getVariable() = result }
|
||||
}
|
||||
|
||||
private class SequenceExtends extends DataFlowExtension::DataFlowNode {
|
||||
MethodCallsiteRefinement call;
|
||||
|
||||
SequenceExtends() {
|
||||
exists(CallNode c | c = call.getCall() |
|
||||
c.getFunction().(AttrNode).getName() = "extend" and
|
||||
c.getArg(0) = this
|
||||
)
|
||||
}
|
||||
|
||||
override EssaVariable getASuccessorVariable() { call.getVariable() = result }
|
||||
}
|
||||
|
||||
/**
|
||||
* A node that is vulnerable to one or more types of taint.
|
||||
* These nodes provide the sinks when computing the taint flow graph.
|
||||
* An example would be an argument to a write to a http response object,
|
||||
* such an argument would be vulnerable to unsanitized user-input (XSS).
|
||||
*
|
||||
* Users of the taint tracking library should extend this
|
||||
* class to provide their own sink nodes.
|
||||
*/
|
||||
abstract class TaintSink extends @py_flow_node {
|
||||
/** Gets a textual representation of this element. */
|
||||
string toString() { result = "Taint sink" }
|
||||
|
||||
/**
|
||||
* Holds if `this` "sinks" taint kind `kind`
|
||||
* Typically this means that `this` is vulnerable to taint kind `kind`.
|
||||
*
|
||||
* This must be overridden by subclasses to specify vulnerabilities or other sinks of taint.
|
||||
*/
|
||||
abstract predicate sinks(TaintKind taint);
|
||||
|
||||
Location getLocation() { result = this.(ControlFlowNode).getLocation() }
|
||||
|
||||
/**
|
||||
* Holds if this element is at the specified location.
|
||||
* The location spans column `startcolumn` of line `startline` to
|
||||
* column `endcolumn` of line `endline` in file `filepath`.
|
||||
* For more information, see
|
||||
* [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html).
|
||||
*/
|
||||
predicate hasLocationInfo(
|
||||
string filepath, int startline, int startcolumn, int endline, int endcolumn
|
||||
) {
|
||||
this.getLocation().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Extension for data-flow, to help express data-flow paths that are
|
||||
* library or framework specific and cannot be inferred by the general
|
||||
* data-flow machinery.
|
||||
*/
|
||||
module DataFlowExtension {
|
||||
/** A control flow node that modifies the basic data-flow. */
|
||||
abstract class DataFlowNode extends @py_flow_node {
|
||||
/** Gets a textual representation of this element. */
|
||||
string toString() { result = "Dataflow extension node" }
|
||||
|
||||
/**
|
||||
* Gets a successor node for data-flow.
|
||||
* Data (all forms) is assumed to flow from `this` to `result`
|
||||
*/
|
||||
ControlFlowNode getASuccessorNode() { none() }
|
||||
|
||||
/**
|
||||
* Gets a successor variable for data-flow.
|
||||
* Data (all forms) is assumed to flow from `this` to `result`.
|
||||
* Note: This is an unlikely form of flow. See `DataFlowVariable.getASuccessorVariable()`
|
||||
*/
|
||||
EssaVariable getASuccessorVariable() { none() }
|
||||
|
||||
/**
|
||||
* Holds if data cannot flow from `this` to `succ`,
|
||||
* even though it would normally do so.
|
||||
*/
|
||||
predicate prunedSuccessor(ControlFlowNode succ) { none() }
|
||||
|
||||
/**
|
||||
* Gets a successor node, where the successor node will be tainted with `tokind`
|
||||
* when `this` is tainted with `fromkind`.
|
||||
* Extensions to `DataFlowNode` should override this to provide additional taint steps.
|
||||
*/
|
||||
ControlFlowNode getASuccessorNode(TaintKind fromkind, TaintKind tokind) { none() }
|
||||
|
||||
/**
|
||||
* Gets a successor node for data-flow with a change of context from callee to caller
|
||||
* (going *up* the call-stack) across call-site `call`.
|
||||
* Data (all forms) is assumed to flow from `this` to `result`
|
||||
* Extensions to `DataFlowNode` should override this to provide additional taint steps.
|
||||
*/
|
||||
ControlFlowNode getAReturnSuccessorNode(CallNode call) { none() }
|
||||
|
||||
/**
|
||||
* Gets a successor node for data-flow with a change of context from caller to callee
|
||||
* (going *down* the call-stack) across call-site `call`.
|
||||
* Data (all forms) is assumed to flow from `this` to `result`
|
||||
* Extensions to `DataFlowNode` should override this to provide additional taint steps.
|
||||
*/
|
||||
ControlFlowNode getACalleeSuccessorNode(CallNode call) { none() }
|
||||
}
|
||||
|
||||
/** Data flow variable that modifies the basic data-flow. */
|
||||
class DataFlowVariable extends EssaVariable {
|
||||
/**
|
||||
* Gets a successor node for data-flow.
|
||||
* Data (all forms) is assumed to flow from `this` to `result`
|
||||
* Note: This is an unlikely form of flow. See `DataFlowNode.getASuccessorNode()`
|
||||
*/
|
||||
ControlFlowNode getASuccessorNode() { none() }
|
||||
|
||||
/**
|
||||
* Gets a successor variable for data-flow.
|
||||
* Data (all forms) is assumed to flow from `this` to `result`.
|
||||
*/
|
||||
EssaVariable getASuccessorVariable() { none() }
|
||||
|
||||
/**
|
||||
* Holds if data cannot flow from `this` to `succ`,
|
||||
* even though it would normally do so.
|
||||
*/
|
||||
predicate prunedSuccessor(EssaVariable succ) { none() }
|
||||
}
|
||||
}
|
||||
|
||||
class TaintedPathSource extends TaintTrackingNode {
|
||||
TaintedPathSource() { this.isSource() }
|
||||
|
||||
DataFlow::Node getSource() { result = this.getNode() }
|
||||
}
|
||||
|
||||
class TaintedPathSink extends TaintTrackingNode {
|
||||
TaintedPathSink() { this.isSink() }
|
||||
|
||||
DataFlow::Node getSink() { result = this.getNode() }
|
||||
}
|
||||
|
||||
/* Backwards compatible name */
|
||||
class TaintedNode = TaintTrackingNode;
|
||||
|
||||
/* Helpers for Validating classes */
|
||||
private import semmle.python.pointsto.PointsTo
|
||||
|
||||
/**
|
||||
* Data flow module providing an interface compatible with
|
||||
* the other language implementations.
|
||||
*/
|
||||
module DataFlow {
|
||||
/**
|
||||
* Generic taint kind, source and sink classes for convenience and
|
||||
* compatibility with other language libraries
|
||||
*/
|
||||
class Extension = DataFlowExtension::DataFlowNode;
|
||||
|
||||
abstract deprecated class Configuration extends string {
|
||||
bindingset[this]
|
||||
Configuration() { this = this }
|
||||
|
||||
abstract predicate isSource(ControlFlowNode source);
|
||||
|
||||
abstract predicate isSink(ControlFlowNode sink);
|
||||
|
||||
private predicate hasFlowPath(TaintedNode source, TaintedNode sink) {
|
||||
source.getConfiguration() = this and
|
||||
this.isSource(source.getCfgNode()) and
|
||||
this.isSink(sink.getCfgNode()) and
|
||||
source.flowsTo(sink)
|
||||
}
|
||||
|
||||
predicate hasFlow(ControlFlowNode source, ControlFlowNode sink) {
|
||||
exists(TaintedNode psource, TaintedNode psink |
|
||||
psource.getCfgNode() = source and
|
||||
psink.getCfgNode() = sink and
|
||||
this.isSource(source) and
|
||||
this.isSink(sink) and
|
||||
this.hasFlowPath(psource, psink)
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
deprecated private class ConfigurationAdapter extends TaintTracking::Configuration {
|
||||
ConfigurationAdapter() { this instanceof Configuration }
|
||||
|
||||
override predicate isSource(DataFlow::Node node, TaintKind kind) {
|
||||
this.(Configuration).isSource(node.asCfgNode()) and
|
||||
kind instanceof DataFlowType
|
||||
}
|
||||
|
||||
override predicate isSink(DataFlow::Node node, TaintKind kind) {
|
||||
this.(Configuration).isSink(node.asCfgNode()) and
|
||||
kind instanceof DataFlowType
|
||||
}
|
||||
}
|
||||
|
||||
private newtype TDataFlowNode =
|
||||
TEssaNode(EssaVariable var) or
|
||||
TCfgNode(ControlFlowNode node)
|
||||
|
||||
abstract class Node extends TDataFlowNode {
|
||||
abstract ControlFlowNode asCfgNode();
|
||||
|
||||
abstract EssaVariable asVariable();
|
||||
|
||||
/** Gets a textual representation of this element. */
|
||||
abstract string toString();
|
||||
|
||||
abstract Scope getScope();
|
||||
|
||||
abstract BasicBlock getBasicBlock();
|
||||
|
||||
abstract Location getLocation();
|
||||
|
||||
AstNode asAstNode() { result = this.asCfgNode().getNode() }
|
||||
|
||||
/** For backwards compatibility -- Use asAstNode() instead */
|
||||
deprecated AstNode getNode() { result = this.asAstNode() }
|
||||
}
|
||||
|
||||
class CfgNode extends Node, TCfgNode {
|
||||
override ControlFlowNode asCfgNode() { this = TCfgNode(result) }
|
||||
|
||||
override EssaVariable asVariable() { none() }
|
||||
|
||||
/** Gets a textual representation of this element. */
|
||||
override string toString() { result = this.asAstNode().toString() }
|
||||
|
||||
override Scope getScope() { result = this.asCfgNode().getScope() }
|
||||
|
||||
override BasicBlock getBasicBlock() { result = this.asCfgNode().getBasicBlock() }
|
||||
|
||||
override Location getLocation() { result = this.asCfgNode().getLocation() }
|
||||
}
|
||||
|
||||
class EssaNode extends Node, TEssaNode {
|
||||
override ControlFlowNode asCfgNode() { none() }
|
||||
|
||||
override EssaVariable asVariable() { this = TEssaNode(result) }
|
||||
|
||||
/** Gets a textual representation of this element. */
|
||||
override string toString() { result = this.asVariable().toString() }
|
||||
|
||||
override Scope getScope() { result = this.asVariable().getScope() }
|
||||
|
||||
override BasicBlock getBasicBlock() {
|
||||
result = this.asVariable().getDefinition().getBasicBlock()
|
||||
}
|
||||
|
||||
override Location getLocation() { result = this.asVariable().getDefinition().getLocation() }
|
||||
}
|
||||
}
|
||||
|
||||
deprecated private class DataFlowType extends TaintKind {
|
||||
DataFlowType() {
|
||||
this = "Data flow" and
|
||||
exists(DataFlow::Configuration c)
|
||||
}
|
||||
}
|
||||
|
||||
pragma[noinline]
|
||||
private predicate dict_construct(ControlFlowNode itemnode, ControlFlowNode dictnode) {
|
||||
dictnode.(DictNode).getAValue() = itemnode
|
||||
or
|
||||
dictnode.(CallNode).getFunction().pointsTo(ObjectInternal::builtin("dict")) and
|
||||
dictnode.(CallNode).getArgByName(_) = itemnode
|
||||
}
|
||||
|
||||
pragma[noinline]
|
||||
private predicate sequence_construct(ControlFlowNode itemnode, ControlFlowNode seqnode) {
|
||||
seqnode.isLoad() and
|
||||
(
|
||||
seqnode.(ListNode).getElement(_) = itemnode
|
||||
or
|
||||
seqnode.(TupleNode).getElement(_) = itemnode
|
||||
or
|
||||
seqnode.(SetNode).getAnElement() = itemnode
|
||||
)
|
||||
}
|
||||
|
||||
/* A call to construct a sequence from a sequence or iterator*/
|
||||
pragma[noinline]
|
||||
private predicate sequence_call(ControlFlowNode fromnode, CallNode tonode) {
|
||||
tonode.getArg(0) = fromnode and
|
||||
exists(ControlFlowNode cls | cls = tonode.getFunction() |
|
||||
cls.pointsTo(ObjectInternal::builtin("list"))
|
||||
or
|
||||
cls.pointsTo(ObjectInternal::builtin("tuple"))
|
||||
or
|
||||
cls.pointsTo(ObjectInternal::builtin("set"))
|
||||
)
|
||||
}
|
||||
@@ -1,174 +0,0 @@
|
||||
import python
|
||||
import semmle.python.dependencies.DependencyKind
|
||||
|
||||
private predicate importDependency(Object target, AstNode source) {
|
||||
source.getScope() != target.getOrigin() and
|
||||
/* Imports of own module are ignored */
|
||||
(
|
||||
exists(ModuleObject importee, ImportingStmt imp_stmt |
|
||||
source = imp_stmt and
|
||||
importee = target
|
||||
|
|
||||
exists(ImportMember im | imp_stmt.contains(im) |
|
||||
importee.importedAs(im.getImportedModuleName())
|
||||
)
|
||||
or
|
||||
exists(ImportExpr im | imp_stmt.contains(im) |
|
||||
importee.importedAs(im.getImportedModuleName())
|
||||
)
|
||||
or
|
||||
exists(ModuleObject mod |
|
||||
importDependency(mod, source) and
|
||||
target = mod.getPackage+()
|
||||
)
|
||||
)
|
||||
or
|
||||
/* from m import name, where m.name is not a submodule */
|
||||
exists(PythonModuleObject importee, ImportingStmt imp_stmt | source = imp_stmt |
|
||||
exists(ImportMember im | imp_stmt.contains(im) |
|
||||
importee.importedAs(im.getModule().(ImportExpr).getImportedModuleName()) and
|
||||
defn_of_module_attribute(target, importee.getModule(), im.getName())
|
||||
)
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
class PythonImport extends DependencyKind {
|
||||
PythonImport() { this = "import" }
|
||||
|
||||
override predicate isADependency(AstNode source, Object target) {
|
||||
this = this and
|
||||
importDependency(target, source)
|
||||
}
|
||||
}
|
||||
|
||||
private predicate interesting(Object target) {
|
||||
target.(ControlFlowNode).getNode() instanceof Scope
|
||||
or
|
||||
target instanceof FunctionObject
|
||||
or
|
||||
target instanceof ClassObject
|
||||
or
|
||||
target instanceof ModuleObject
|
||||
}
|
||||
|
||||
class PythonUse extends DependencyKind {
|
||||
PythonUse() { this = "use" }
|
||||
|
||||
override predicate isADependency(AstNode source, Object target) {
|
||||
interesting(target) and
|
||||
this = this and
|
||||
source != target.(ControlFlowNode).getNode() and
|
||||
exists(ControlFlowNode use, Object obj |
|
||||
use.getNode() = source and
|
||||
use.refersTo(obj) and
|
||||
use.isLoad()
|
||||
|
|
||||
interesting(obj) and target = obj
|
||||
) and
|
||||
not has_more_specific_dependency_source(source)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Whether there is a more specific dependency source than this one.
|
||||
* E.g. if the expression pack.mod.func is a dependency on the function 'func' in 'pack.mod'
|
||||
* don't make pack.mod depend on the module 'pack.mod'
|
||||
*/
|
||||
private predicate has_more_specific_dependency_source(Expr e) {
|
||||
exists(Attribute member | member.getObject() = e |
|
||||
attribute_access_dependency(_, member)
|
||||
or
|
||||
has_more_specific_dependency_source(member)
|
||||
)
|
||||
}
|
||||
|
||||
class PythonInheritance extends DependencyKind {
|
||||
PythonInheritance() { this = "inheritance" }
|
||||
|
||||
override predicate isADependency(AstNode source, Object target) {
|
||||
this = this and
|
||||
exists(ClassObject cls | source = cls.getOrigin() |
|
||||
target = cls.getASuperType()
|
||||
or
|
||||
target = cls.getAnInferredType()
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
class PythonAttribute extends DependencyKind {
|
||||
PythonAttribute() { this = "attribute" }
|
||||
|
||||
override predicate isADependency(AstNode source, Object target) {
|
||||
this = this and
|
||||
attribute_access_dependency(target, source)
|
||||
}
|
||||
}
|
||||
|
||||
private predicate attribute_access_dependency(Object target, AstNode source) {
|
||||
exists(Scope s, string name |
|
||||
use_of_attribute(source, s, name) and
|
||||
defn_of_attribute(target, s, name)
|
||||
)
|
||||
}
|
||||
|
||||
private predicate use_of_attribute(Attribute attr, Scope s, string name) {
|
||||
exists(AttrNode cfg | cfg.isLoad() and cfg.getNode() = attr |
|
||||
exists(Object obj | cfg.getObject(name).refersTo(obj) |
|
||||
s = obj.(PythonModuleObject).getModule() or
|
||||
s = obj.(ClassObject).getPyClass()
|
||||
)
|
||||
or
|
||||
exists(ClassObject cls | cfg.getObject(name).refersTo(_, cls, _) | s = cls.getPyClass())
|
||||
)
|
||||
or
|
||||
exists(SelfAttributeRead sar | sar = attr |
|
||||
sar.getClass() = s and
|
||||
sar.getName() = name
|
||||
)
|
||||
}
|
||||
|
||||
private predicate defn_of_attribute(Object target, Scope s, string name) {
|
||||
exists(Assign asgn | target.(ControlFlowNode).getNode() = asgn |
|
||||
defn_of_instance_attribute(asgn, s, name)
|
||||
or
|
||||
defn_of_class_attribute(asgn, s, name)
|
||||
)
|
||||
or
|
||||
defn_of_module_attribute(target, s, name)
|
||||
}
|
||||
|
||||
/*
|
||||
* Whether asgn defines an instance attribute, that is does
|
||||
* asgn take the form self.name = ... where self is an instance
|
||||
* of class c and asgn is not a redefinition.
|
||||
*/
|
||||
|
||||
private predicate defn_of_instance_attribute(Assign asgn, Class c, string name) {
|
||||
exists(SelfAttributeStore sas | asgn.getATarget() = sas |
|
||||
sas.getClass() = c and
|
||||
sas.getName() = name and
|
||||
not exists(SelfAttributeStore in_init |
|
||||
not sas.getScope().(Function).isInitMethod() and
|
||||
not sas = in_init and
|
||||
in_init.getClass() = c and
|
||||
in_init.getName() = name and
|
||||
in_init.getScope().(Function).isInitMethod()
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
/* Whether asgn defines an attribute of a class */
|
||||
private predicate defn_of_class_attribute(Assign asgn, Class c, string name) {
|
||||
asgn.getScope() = c and
|
||||
asgn.getATarget().(Name).getId() = name
|
||||
}
|
||||
|
||||
/* Holds if `value` is a value assigned to the `name`d attribute of module `m`. */
|
||||
private predicate defn_of_module_attribute(ControlFlowNode value, Module m, string name) {
|
||||
exists(DefinitionNode def |
|
||||
def.getScope() = m and
|
||||
def.getValue() = value and
|
||||
def.(NameNode).getId() = name
|
||||
)
|
||||
}
|
||||
@@ -1,27 +0,0 @@
|
||||
import semmle.python.dependencies.Dependencies
|
||||
|
||||
/**
|
||||
* A library describing an abstract mechanism for representing dependency categories.
|
||||
*/
|
||||
/*
|
||||
* A DependencyCategory is a unique string key used by Architect to identify different categories
|
||||
* of dependencies that might be viewed independently.
|
||||
* <p>
|
||||
* The string key defining the category must adhere to the isValid(), otherwise it will not be
|
||||
* accepted by Architect.
|
||||
* </p>
|
||||
*/
|
||||
|
||||
abstract class DependencyKind extends string {
|
||||
bindingset[this]
|
||||
DependencyKind() { this = this }
|
||||
|
||||
/* Tech inventory interface */
|
||||
/**
|
||||
* Identify dependencies associated with this category.
|
||||
* <p>
|
||||
* The source element is the source of the dependency.
|
||||
* </p>
|
||||
*/
|
||||
abstract predicate isADependency(AstNode source, Object target);
|
||||
}
|
||||
@@ -1,110 +0,0 @@
|
||||
import python
|
||||
import semmle.python.dependencies.Dependencies
|
||||
import semmle.python.dependencies.DependencyKind
|
||||
|
||||
/**
|
||||
* Combine the source-file and package into a single string:
|
||||
* /path/to/file.py<|>package-name-and-version
|
||||
*/
|
||||
string munge(File sourceFile, ExternalPackage package) {
|
||||
result =
|
||||
"/" + sourceFile.getRelativePath() + "<|>" + package.getName() + "<|>" + package.getVersion()
|
||||
or
|
||||
not exists(package.getVersion()) and
|
||||
result = "/" + sourceFile.getRelativePath() + "<|>" + package.getName() + "<|>unknown"
|
||||
}
|
||||
|
||||
abstract class ExternalPackage extends Object {
|
||||
ExternalPackage() { this instanceof ModuleObject }
|
||||
|
||||
abstract string getName();
|
||||
|
||||
abstract string getVersion();
|
||||
|
||||
Object getAttribute(string name) { result = this.(ModuleObject).attr(name) }
|
||||
|
||||
PackageObject getPackage() { result = this.(ModuleObject).getPackage() }
|
||||
}
|
||||
|
||||
bindingset[text]
|
||||
private predicate is_version(string text) { text.regexpMatch("\\d+\\.\\d+(\\.\\d+)?([ab]\\d+)?") }
|
||||
|
||||
bindingset[v]
|
||||
private string version_format(float v) {
|
||||
exists(int i, int f | i = (v + 0.05).floor() and f = ((v + 0.05 - i) * 10).floor() |
|
||||
result = i + "." + f
|
||||
)
|
||||
}
|
||||
|
||||
class DistPackage extends ExternalPackage {
|
||||
DistPackage() {
|
||||
exists(Folder parent |
|
||||
parent = this.(ModuleObject).getPath().getParent() and
|
||||
parent.isImportRoot() and
|
||||
/* Not in standard library */
|
||||
not parent.isStdLibRoot() and
|
||||
/* Not in the source */
|
||||
not exists(parent.getRelativePath())
|
||||
)
|
||||
}
|
||||
|
||||
/*
|
||||
* We don't extract the meta-data for dependencies (yet), so make a best guess from the source
|
||||
* https://www.python.org/dev/peps/pep-0396/
|
||||
*/
|
||||
|
||||
private predicate possibleVersion(string version, int priority) {
|
||||
exists(Object v | v = this.getAttribute("__version__") and priority = 3 |
|
||||
version = v.(StringObject).getText() and is_version(version)
|
||||
or
|
||||
version = version_format(v.(NumericObject).floatValue())
|
||||
or
|
||||
version = version_format(v.(NumericObject).intValue())
|
||||
)
|
||||
or
|
||||
exists(SequenceObject tuple, NumericObject major, NumericObject minor, string base_version |
|
||||
this.getAttribute("version_info") = tuple and
|
||||
major = tuple.getInferredElement(0) and
|
||||
minor = tuple.getInferredElement(1) and
|
||||
base_version = major.intValue() + "." + minor.intValue()
|
||||
|
|
||||
version = base_version + "." + tuple.getBuiltinElement(2).(NumericObject).intValue()
|
||||
or
|
||||
not exists(tuple.getBuiltinElement(2)) and version = base_version
|
||||
) and
|
||||
priority = 2
|
||||
or
|
||||
exists(string v | v.toLowerCase() = "version" |
|
||||
is_version(version) and
|
||||
version = this.getAttribute(v).(StringObject).getText()
|
||||
) and
|
||||
priority = 1
|
||||
}
|
||||
|
||||
override string getVersion() {
|
||||
this.possibleVersion(result, max(int priority | this.possibleVersion(_, priority)))
|
||||
}
|
||||
|
||||
override string getName() { result = this.(ModuleObject).getShortName() }
|
||||
|
||||
predicate fromSource(Object src) {
|
||||
exists(ModuleObject m |
|
||||
m.getModule() = src.(ControlFlowNode).getEnclosingModule() or
|
||||
src = m
|
||||
|
|
||||
m = this
|
||||
or
|
||||
m.getPackage+() = this and
|
||||
not exists(DistPackage inter |
|
||||
m.getPackage*() = inter and
|
||||
inter.getPackage+() = this
|
||||
)
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
predicate dependency(AstNode src, DistPackage package) {
|
||||
exists(DependencyKind cat, Object target | cat.isADependency(src, target) |
|
||||
package.fromSource(target)
|
||||
)
|
||||
}
|
||||
@@ -1,358 +0,0 @@
|
||||
import python
|
||||
|
||||
/*
|
||||
* Classification of variables. These should be non-overlapping and complete.
|
||||
*
|
||||
* Function local variables - Non escaping variables in a function, except 'self'
|
||||
* Self variables - The 'self' variable for a method.
|
||||
* Class local variables - Local variables declared in a class
|
||||
* Non-local variables - Escaping variables in a function
|
||||
* Built-in variables - Global variables with no definition
|
||||
* Non-escaping globals -- Global variables that have definitions and all of those definitions are in the module scope
|
||||
* Escaping globals -- Global variables that have definitions and at least one of those definitions is in another scope.
|
||||
*/
|
||||
|
||||
/** A source language variable, to be converted into a set of SSA variables. */
|
||||
abstract class SsaSourceVariable extends @py_variable {
|
||||
SsaSourceVariable() {
|
||||
/* Exclude `True`, `False` and `None` */
|
||||
not this.(Variable).getALoad() instanceof NameConstant
|
||||
}
|
||||
|
||||
/** Gets the name of this variable */
|
||||
string getName() { variable(this, _, result) }
|
||||
|
||||
Scope getScope() { variable(this, result, _) }
|
||||
|
||||
/** Gets an implicit use of this variable */
|
||||
abstract ControlFlowNode getAnImplicitUse();
|
||||
|
||||
abstract ControlFlowNode getScopeEntryDefinition();
|
||||
|
||||
/** Gets a textual representation of this element. */
|
||||
string toString() { result = "SsaSourceVariable " + this.getName() }
|
||||
|
||||
/** Gets a use of this variable, either explicit or implicit. */
|
||||
ControlFlowNode getAUse() {
|
||||
result = this.getASourceUse()
|
||||
or
|
||||
result = this.getAnImplicitUse()
|
||||
or
|
||||
/*
|
||||
* `import *` is a definition of *all* variables, so must be a use as well, for pass-through
|
||||
* once we have established that a variable is not redefined.
|
||||
*/
|
||||
|
||||
SsaSource::import_star_refinement(this, result, _)
|
||||
or
|
||||
/*
|
||||
* Add a use at the end of scope for all variables to keep them live
|
||||
* This is necessary for taint-tracking.
|
||||
*/
|
||||
|
||||
result = this.getScope().getANormalExit()
|
||||
}
|
||||
|
||||
/** Holds if `def` defines an ESSA variable for this variable. */
|
||||
predicate hasDefiningNode(ControlFlowNode def) {
|
||||
def = this.getScopeEntryDefinition()
|
||||
or
|
||||
SsaSource::assignment_definition(this, def, _)
|
||||
or
|
||||
SsaSource::multi_assignment_definition(this, def, _, _)
|
||||
or
|
||||
SsaSource::deletion_definition(this, def)
|
||||
or
|
||||
SsaSource::init_module_submodule_defn(this, def)
|
||||
or
|
||||
SsaSource::parameter_definition(this, def)
|
||||
or
|
||||
SsaSource::exception_capture(this, def)
|
||||
or
|
||||
SsaSource::with_definition(this, def)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `def` defines an ESSA variable for this variable in such a way
|
||||
* that the new variable is a refinement in some way of the variable used at `use`.
|
||||
*/
|
||||
predicate hasRefinement(ControlFlowNode use, ControlFlowNode def) {
|
||||
this.hasDefiningNode(_) and
|
||||
/* Can't have a refinement unless there is a definition */
|
||||
refinement(this, use, def)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if the edge `pred`->`succ` defines an ESSA variable for this variable in such a way
|
||||
* that the new variable is a refinement in some way of the variable used at `use`.
|
||||
*/
|
||||
predicate hasRefinementEdge(ControlFlowNode use, BasicBlock pred, BasicBlock succ) {
|
||||
test_contains(pred.getLastNode(), use) and
|
||||
use.(NameNode).uses(this) and
|
||||
(pred.getAFalseSuccessor() = succ or pred.getATrueSuccessor() = succ) and
|
||||
/* There is a store to this variable -- We don't want to refine builtins */
|
||||
exists(this.(Variable).getAStore())
|
||||
}
|
||||
|
||||
/** Gets a use of this variable that corresponds to an explicit use in the source. */
|
||||
ControlFlowNode getASourceUse() {
|
||||
result.(NameNode).uses(this)
|
||||
or
|
||||
result.(NameNode).deletes(this)
|
||||
}
|
||||
|
||||
abstract CallNode redefinedAtCallSite();
|
||||
}
|
||||
|
||||
private predicate refinement(SsaSourceVariable v, ControlFlowNode use, ControlFlowNode def) {
|
||||
SsaSource::import_star_refinement(v, use, def)
|
||||
or
|
||||
SsaSource::attribute_assignment_refinement(v, use, def)
|
||||
or
|
||||
SsaSource::argument_refinement(v, use, def)
|
||||
or
|
||||
SsaSource::attribute_deletion_refinement(v, use, def)
|
||||
or
|
||||
SsaSource::test_refinement(v, use, def)
|
||||
or
|
||||
SsaSource::method_call_refinement(v, use, def)
|
||||
or
|
||||
def = v.redefinedAtCallSite() and def = use
|
||||
}
|
||||
|
||||
class FunctionLocalVariable extends SsaSourceVariable {
|
||||
FunctionLocalVariable() {
|
||||
this.(LocalVariable).getScope() instanceof Function and
|
||||
not this instanceof NonLocalVariable
|
||||
}
|
||||
|
||||
override ControlFlowNode getAnImplicitUse() {
|
||||
this.(Variable).isSelf() and this.(Variable).getScope().getANormalExit() = result
|
||||
}
|
||||
|
||||
override ControlFlowNode getScopeEntryDefinition() {
|
||||
exists(Scope s | s.getEntryNode() = result |
|
||||
s = this.(LocalVariable).getScope() and
|
||||
not this.(LocalVariable).isParameter()
|
||||
or
|
||||
s != this.(LocalVariable).getScope() and
|
||||
s = this.(LocalVariable).getALoad().getScope()
|
||||
)
|
||||
}
|
||||
|
||||
override CallNode redefinedAtCallSite() { none() }
|
||||
}
|
||||
|
||||
class NonLocalVariable extends SsaSourceVariable {
|
||||
NonLocalVariable() {
|
||||
exists(Function f |
|
||||
this.(LocalVariable).getScope() = f and
|
||||
this.(LocalVariable).getAStore().getScope() != f
|
||||
)
|
||||
}
|
||||
|
||||
override ControlFlowNode getAnImplicitUse() {
|
||||
result.(CallNode).getScope().getScope*() = this.(LocalVariable).getScope()
|
||||
}
|
||||
|
||||
override ControlFlowNode getScopeEntryDefinition() {
|
||||
exists(Function f |
|
||||
f.getScope+() = this.(LocalVariable).getScope() and
|
||||
f.getEntryNode() = result
|
||||
)
|
||||
or
|
||||
not this.(LocalVariable).isParameter() and
|
||||
this.(LocalVariable).getScope().getEntryNode() = result
|
||||
}
|
||||
|
||||
pragma[noinline]
|
||||
Scope scope_as_local_variable() { result = this.(LocalVariable).getScope() }
|
||||
|
||||
override CallNode redefinedAtCallSite() {
|
||||
result.getScope().getScope*() = this.scope_as_local_variable()
|
||||
}
|
||||
}
|
||||
|
||||
class ClassLocalVariable extends SsaSourceVariable {
|
||||
ClassLocalVariable() { this.(LocalVariable).getScope() instanceof Class }
|
||||
|
||||
override ControlFlowNode getAnImplicitUse() { none() }
|
||||
|
||||
override ControlFlowNode getScopeEntryDefinition() {
|
||||
result = this.(LocalVariable).getScope().getEntryNode()
|
||||
}
|
||||
|
||||
override CallNode redefinedAtCallSite() { none() }
|
||||
}
|
||||
|
||||
class BuiltinVariable extends SsaSourceVariable {
|
||||
BuiltinVariable() {
|
||||
this instanceof GlobalVariable and
|
||||
not exists(this.(Variable).getAStore()) and
|
||||
not this.(Variable).getId() = "__name__" and
|
||||
not this.(Variable).getId() = "__package__" and
|
||||
not exists(ImportStar is | is.getScope() = this.(Variable).getScope())
|
||||
}
|
||||
|
||||
override ControlFlowNode getAnImplicitUse() { none() }
|
||||
|
||||
override ControlFlowNode getScopeEntryDefinition() { none() }
|
||||
|
||||
override CallNode redefinedAtCallSite() { none() }
|
||||
}
|
||||
|
||||
class ModuleVariable extends SsaSourceVariable {
|
||||
ModuleVariable() {
|
||||
this instanceof GlobalVariable and
|
||||
(
|
||||
exists(this.(Variable).getAStore())
|
||||
or
|
||||
this.(Variable).getId() = "__name__"
|
||||
or
|
||||
this.(Variable).getId() = "__package__"
|
||||
or
|
||||
exists(ImportStar is | is.getScope() = this.(Variable).getScope())
|
||||
)
|
||||
}
|
||||
|
||||
pragma[noinline]
|
||||
CallNode global_variable_callnode() { result.getScope() = this.(GlobalVariable).getScope() }
|
||||
|
||||
pragma[noinline]
|
||||
ImportMemberNode global_variable_import() {
|
||||
result.getScope() = this.(GlobalVariable).getScope() and
|
||||
import_from_dot_in_init(result.(ImportMemberNode).getModule(this.getName()))
|
||||
}
|
||||
|
||||
override ControlFlowNode getAnImplicitUse() {
|
||||
result = global_variable_callnode()
|
||||
or
|
||||
result = global_variable_import()
|
||||
or
|
||||
exists(ImportTimeScope scope | scope.entryEdge(result, _) |
|
||||
this = scope.getOuterVariable(_) or
|
||||
this.(Variable).getAUse().getScope() = scope
|
||||
)
|
||||
or
|
||||
/* For implicit use of __metaclass__ when constructing class */
|
||||
exists(Class c |
|
||||
class_with_global_metaclass(c, this) and
|
||||
c.(ImportTimeScope).entryEdge(result, _)
|
||||
)
|
||||
or
|
||||
exists(ImportTimeScope s |
|
||||
result = s.getANormalExit() and
|
||||
this.(Variable).getScope() = s and
|
||||
implicit_definition(this)
|
||||
)
|
||||
}
|
||||
|
||||
override ControlFlowNode getScopeEntryDefinition() {
|
||||
exists(Scope s | s.getEntryNode() = result |
|
||||
/* Module entry point */
|
||||
this.(GlobalVariable).getScope() = s
|
||||
or
|
||||
/* For implicit use of __metaclass__ when constructing class */
|
||||
class_with_global_metaclass(s, this)
|
||||
or
|
||||
/* Variable is used in scope */
|
||||
this.(GlobalVariable).getAUse().getScope() = s
|
||||
)
|
||||
or
|
||||
exists(ImportTimeScope scope | scope.entryEdge(_, result) |
|
||||
this = scope.getOuterVariable(_) or
|
||||
this.(Variable).getAUse().getScope() = scope
|
||||
)
|
||||
}
|
||||
|
||||
override CallNode redefinedAtCallSite() { none() }
|
||||
}
|
||||
|
||||
class NonEscapingGlobalVariable extends ModuleVariable {
|
||||
NonEscapingGlobalVariable() {
|
||||
this instanceof GlobalVariable and
|
||||
exists(this.(Variable).getAStore()) and
|
||||
not variable_or_attribute_defined_out_of_scope(this)
|
||||
}
|
||||
}
|
||||
|
||||
class EscapingGlobalVariable extends ModuleVariable {
|
||||
EscapingGlobalVariable() {
|
||||
this instanceof GlobalVariable and
|
||||
exists(this.(Variable).getAStore()) and
|
||||
variable_or_attribute_defined_out_of_scope(this)
|
||||
}
|
||||
|
||||
override ControlFlowNode getAnImplicitUse() {
|
||||
result = ModuleVariable.super.getAnImplicitUse()
|
||||
or
|
||||
result.(CallNode).getScope().getScope+() = this.(GlobalVariable).getScope()
|
||||
or
|
||||
result = this.innerScope().getANormalExit()
|
||||
}
|
||||
|
||||
private Scope innerScope() {
|
||||
result.getScope+() = this.(GlobalVariable).getScope() and
|
||||
not result instanceof ImportTimeScope
|
||||
}
|
||||
|
||||
override ControlFlowNode getScopeEntryDefinition() {
|
||||
result = ModuleVariable.super.getScopeEntryDefinition()
|
||||
or
|
||||
result = this.innerScope().getEntryNode()
|
||||
}
|
||||
|
||||
pragma[noinline]
|
||||
Scope scope_as_global_variable() { result = this.(GlobalVariable).getScope() }
|
||||
|
||||
override CallNode redefinedAtCallSite() {
|
||||
result.(CallNode).getScope().getScope*() = this.scope_as_global_variable()
|
||||
}
|
||||
}
|
||||
|
||||
class EscapingAssignmentGlobalVariable extends EscapingGlobalVariable {
|
||||
EscapingAssignmentGlobalVariable() {
|
||||
exists(NameNode n | n.defines(this) and not n.getScope() = this.getScope())
|
||||
}
|
||||
}
|
||||
|
||||
class SpecialSsaSourceVariable extends SsaSourceVariable {
|
||||
SpecialSsaSourceVariable() { variable(this, _, "*") or variable(this, _, "$") }
|
||||
|
||||
override ControlFlowNode getAnImplicitUse() {
|
||||
exists(ImportTimeScope s | result = s.getANormalExit() and this.getScope() = s)
|
||||
}
|
||||
|
||||
override ControlFlowNode getScopeEntryDefinition() {
|
||||
/* Module entry point */
|
||||
this.getScope().getEntryNode() = result
|
||||
}
|
||||
|
||||
pragma[noinline]
|
||||
Scope scope_as_global_variable() { result = this.(GlobalVariable).getScope() }
|
||||
|
||||
override CallNode redefinedAtCallSite() {
|
||||
result.(CallNode).getScope().getScope*() = this.scope_as_global_variable()
|
||||
}
|
||||
}
|
||||
|
||||
/** Holds if this variable is implicitly defined */
|
||||
private predicate implicit_definition(Variable v) {
|
||||
v.getId() = "*" or
|
||||
v.getId() = "$" or
|
||||
exists(ImportStar is | is.getScope() = v.getScope())
|
||||
}
|
||||
|
||||
private predicate variable_or_attribute_defined_out_of_scope(Variable v) {
|
||||
exists(NameNode n | n.defines(v) and not n.getScope() = v.getScope())
|
||||
or
|
||||
exists(AttrNode a |
|
||||
a.isStore() and a.getObject() = v.getAUse() and not a.getScope() = v.getScope()
|
||||
)
|
||||
}
|
||||
|
||||
private predicate class_with_global_metaclass(Class cls, GlobalVariable metaclass) {
|
||||
metaclass.getId() = "__metaclass__" and
|
||||
major_version() = 2 and
|
||||
cls.getEnclosingModule() = metaclass.getScope()
|
||||
}
|
||||
@@ -1,735 +0,0 @@
|
||||
/**
|
||||
* Library for SSA representation (Static Single Assignment form).
|
||||
*/
|
||||
|
||||
import python
|
||||
private import SsaCompute
|
||||
import semmle.python.essa.Definitions
|
||||
|
||||
/** An (enhanced) SSA variable derived from `SsaSourceVariable`. */
|
||||
class EssaVariable extends TEssaDefinition {
|
||||
/** Gets the (unique) definition of this variable. */
|
||||
EssaDefinition getDefinition() { this = result }
|
||||
|
||||
/**
|
||||
* Gets a use of this variable, where a "use" is defined by
|
||||
* `SsaSourceVariable.getAUse()`.
|
||||
* Note that this differs from `EssaVariable.getASourceUse()`.
|
||||
*/
|
||||
ControlFlowNode getAUse() { result = this.getDefinition().getAUse() }
|
||||
|
||||
/** Gets the source variable from which this variable is derived. */
|
||||
SsaSourceVariable getSourceVariable() { result = this.getDefinition().getSourceVariable() }
|
||||
|
||||
/** Gets the name of this variable. */
|
||||
string getName() { result = this.getSourceVariable().getName() }
|
||||
|
||||
/** Gets a textual representation of this element. */
|
||||
string toString() { result = "SSA variable " + this.getName() }
|
||||
|
||||
/**
|
||||
* Gets a string representation of this variable.
|
||||
* WARNING: The format of this may change and it may be very inefficient to compute.
|
||||
* To used for debugging and testing only.
|
||||
*/
|
||||
string getRepresentation() { result = this.getSourceVariable().getName() + "_" + var_rank(this) }
|
||||
|
||||
/**
|
||||
* Gets a use of this variable, where a "use" is defined by
|
||||
* `SsaSourceVariable.getASourceUse()`.
|
||||
* Note that this differs from `EssaVariable.getAUse()`.
|
||||
*/
|
||||
ControlFlowNode getASourceUse() {
|
||||
exists(SsaSourceVariable var |
|
||||
result = use_for_var(var) and
|
||||
result = var.getASourceUse()
|
||||
)
|
||||
}
|
||||
|
||||
pragma[nomagic]
|
||||
private ControlFlowNode use_for_var(SsaSourceVariable var) {
|
||||
result = this.getAUse() and
|
||||
var = this.getSourceVariable()
|
||||
}
|
||||
|
||||
/** Gets the scope of this variable. */
|
||||
Scope getScope() { result = this.getDefinition().getScope() }
|
||||
|
||||
/**
|
||||
* Holds if this the meta-variable for a scope.
|
||||
* This is used to attach attributes for undeclared variables implicitly
|
||||
* defined by `from ... import *` and the like.
|
||||
*/
|
||||
predicate isMetaVariable() { this.getName() = "$" }
|
||||
|
||||
/**
|
||||
* Gets the location of this variable.
|
||||
*
|
||||
* Yields the location of the corresponding definition of this variable.
|
||||
*/
|
||||
Location getLocation() { result = this.getDefinition().getLocation() }
|
||||
}
|
||||
|
||||
/*
|
||||
* Helper for location_string
|
||||
* NOTE: This is Python specific, to make `getRepresentation()` portable will require further work.
|
||||
*/
|
||||
|
||||
private int exception_handling(BasicBlock b) {
|
||||
b.reachesExit() and result = 0
|
||||
or
|
||||
not b.reachesExit() and result = 1
|
||||
}
|
||||
|
||||
/* Helper for var_index. Come up with a (probably) unique string per location. */
|
||||
pragma[noinline]
|
||||
private string location_string(EssaVariable v) {
|
||||
exists(EssaDefinition def, BasicBlock b, int index, int line, int col |
|
||||
def = v.getDefinition() and
|
||||
(
|
||||
if b.getNode(0).isNormalExit()
|
||||
then line = 100000 and col = 0
|
||||
else b.hasLocationInfo(_, line, col, _, _)
|
||||
) and
|
||||
/* Add large numbers to values to prevent 1000 sorting before 99 */
|
||||
result =
|
||||
(line + 100000) + ":" + (col * 2 + 10000 + exception_handling(b)) + ":" + (index + 100003)
|
||||
|
|
||||
def = TEssaNodeDefinition(_, b, index)
|
||||
or
|
||||
def = TEssaNodeRefinement(_, b, index)
|
||||
or
|
||||
def = TEssaEdgeDefinition(_, _, b) and index = piIndex()
|
||||
or
|
||||
def = TPhiFunction(_, b) and index = phiIndex()
|
||||
)
|
||||
}
|
||||
|
||||
/* Helper to compute an index for this SSA variable. */
|
||||
private int var_index(EssaVariable v) {
|
||||
location_string(v) = rank[result](string s | exists(EssaVariable x | location_string(x) = s) | s)
|
||||
}
|
||||
|
||||
/* Helper for `v.getRepresentation()` */
|
||||
private int var_rank(EssaVariable v) {
|
||||
exists(int r, SsaSourceVariable var |
|
||||
var = v.getSourceVariable() and
|
||||
var_index(v) = rank[r](EssaVariable x | x.getSourceVariable() = var | var_index(x)) and
|
||||
result = r - 1
|
||||
)
|
||||
}
|
||||
|
||||
/** Underlying IPA type for EssaDefinition and EssaVariable. */
|
||||
cached
|
||||
private newtype TEssaDefinition =
|
||||
TEssaNodeDefinition(SsaSourceVariable v, BasicBlock b, int i) {
|
||||
EssaDefinitions::variableDefinition(v, _, b, _, i)
|
||||
} or
|
||||
TEssaNodeRefinement(SsaSourceVariable v, BasicBlock b, int i) {
|
||||
EssaDefinitions::variableRefinement(v, _, b, _, i)
|
||||
} or
|
||||
TEssaEdgeDefinition(SsaSourceVariable v, BasicBlock pred, BasicBlock succ) {
|
||||
EssaDefinitions::piNode(v, pred, succ)
|
||||
} or
|
||||
TPhiFunction(SsaSourceVariable v, BasicBlock b) { EssaDefinitions::phiNode(v, b) }
|
||||
|
||||
/**
|
||||
* Definition of an extended-SSA (ESSA) variable.
|
||||
* There is exactly one definition for each variable,
|
||||
* and exactly one variable for each definition.
|
||||
*/
|
||||
abstract class EssaDefinition extends TEssaDefinition {
|
||||
/** Gets a textual representation of this element. */
|
||||
string toString() { result = "EssaDefinition" }
|
||||
|
||||
/** Gets the source variable for which this a definition, either explicit or implicit. */
|
||||
abstract SsaSourceVariable getSourceVariable();
|
||||
|
||||
/** Gets a use of this definition as defined by the `SsaSourceVariable` class. */
|
||||
abstract ControlFlowNode getAUse();
|
||||
|
||||
/** Holds if this definition reaches the end of `b`. */
|
||||
abstract predicate reachesEndOfBlock(BasicBlock b);
|
||||
|
||||
/**
|
||||
* Gets the location of a control flow node that is indicative of this definition.
|
||||
* Since definitions may occur on edges of the control flow graph, the given location may
|
||||
* be imprecise.
|
||||
* Distinct `EssaDefinitions` may return the same ControlFlowNode even for
|
||||
* the same variable.
|
||||
*/
|
||||
abstract Location getLocation();
|
||||
|
||||
/**
|
||||
* Gets a representation of this SSA definition for debugging purposes.
|
||||
* Since this is primarily for debugging and testing, performance may be poor.
|
||||
*/
|
||||
abstract string getRepresentation();
|
||||
|
||||
abstract Scope getScope();
|
||||
|
||||
EssaVariable getVariable() { result.getDefinition() = this }
|
||||
|
||||
abstract BasicBlock getBasicBlock();
|
||||
}
|
||||
|
||||
/**
|
||||
* An ESSA definition corresponding to an edge refinement of the underlying variable.
|
||||
* For example, the edges leaving a test on a variable both represent refinements of that
|
||||
* variable. On one edge the test is true, on the other it is false.
|
||||
*/
|
||||
class EssaEdgeRefinement extends EssaDefinition, TEssaEdgeDefinition {
|
||||
override string toString() { result = "SSA filter definition" }
|
||||
|
||||
boolean getSense() {
|
||||
this.getPredecessor().getATrueSuccessor() = this.getSuccessor() and result = true
|
||||
or
|
||||
this.getPredecessor().getAFalseSuccessor() = this.getSuccessor() and result = false
|
||||
}
|
||||
|
||||
override SsaSourceVariable getSourceVariable() { this = TEssaEdgeDefinition(result, _, _) }
|
||||
|
||||
/** Gets the basic block preceding the edge on which this refinement occurs. */
|
||||
BasicBlock getPredecessor() { this = TEssaEdgeDefinition(_, result, _) }
|
||||
|
||||
/** Gets the basic block succeeding the edge on which this refinement occurs. */
|
||||
BasicBlock getSuccessor() { this = TEssaEdgeDefinition(_, _, result) }
|
||||
|
||||
override ControlFlowNode getAUse() {
|
||||
SsaDefinitions::reachesUse(this.getSourceVariable(), this.getSuccessor(), piIndex(), result)
|
||||
}
|
||||
|
||||
override predicate reachesEndOfBlock(BasicBlock b) {
|
||||
SsaDefinitions::reachesEndOfBlock(this.getSourceVariable(), this.getSuccessor(), piIndex(), b)
|
||||
}
|
||||
|
||||
override Location getLocation() { result = this.getSuccessor().getNode(0).getLocation() }
|
||||
|
||||
/** Gets the SSA variable to which this refinement applies. */
|
||||
EssaVariable getInput() {
|
||||
exists(SsaSourceVariable var, EssaDefinition def |
|
||||
var = this.getSourceVariable() and
|
||||
var = def.getSourceVariable() and
|
||||
def.reachesEndOfBlock(this.getPredecessor()) and
|
||||
result.getDefinition() = def
|
||||
)
|
||||
}
|
||||
|
||||
override string getRepresentation() {
|
||||
result = this.getAQlClass() + "(" + this.getInput().getRepresentation() + ")"
|
||||
}
|
||||
|
||||
/** Gets the scope of the variable defined by this definition. */
|
||||
override Scope getScope() { result = this.getPredecessor().getScope() }
|
||||
|
||||
override BasicBlock getBasicBlock() { result = this.getSuccessor() }
|
||||
}
|
||||
|
||||
/** A Phi-function as specified in classic SSA form. */
|
||||
class PhiFunction extends EssaDefinition, TPhiFunction {
|
||||
override ControlFlowNode getAUse() {
|
||||
SsaDefinitions::reachesUse(this.getSourceVariable(), this.getBasicBlock(), phiIndex(), result)
|
||||
}
|
||||
|
||||
override predicate reachesEndOfBlock(BasicBlock b) {
|
||||
SsaDefinitions::reachesEndOfBlock(this.getSourceVariable(), this.getBasicBlock(), phiIndex(), b)
|
||||
}
|
||||
|
||||
override SsaSourceVariable getSourceVariable() { this = TPhiFunction(result, _) }
|
||||
|
||||
/** Gets an input refinement that exists on one of the incoming edges to this phi node. */
|
||||
private EssaEdgeRefinement inputEdgeRefinement(BasicBlock pred) {
|
||||
result.getSourceVariable() = this.getSourceVariable() and
|
||||
result.getSuccessor() = this.getBasicBlock() and
|
||||
result.getPredecessor() = pred
|
||||
}
|
||||
|
||||
private BasicBlock nonPiInput() {
|
||||
result = this.getBasicBlock().getAPredecessor() and
|
||||
not exists(this.inputEdgeRefinement(result))
|
||||
}
|
||||
|
||||
pragma[noinline]
|
||||
private SsaSourceVariable pred_var(BasicBlock pred) {
|
||||
result = this.getSourceVariable() and
|
||||
pred = this.nonPiInput()
|
||||
}
|
||||
|
||||
/** Gets another definition of the same source variable that reaches this definition. */
|
||||
private EssaDefinition reachingDefinition(BasicBlock pred) {
|
||||
result.getScope() = this.getScope() and
|
||||
result.getSourceVariable() = pred_var(pred) and
|
||||
result.reachesEndOfBlock(pred)
|
||||
}
|
||||
|
||||
/** Gets the input variable for this phi node on the edge `pred` -> `this.getBasicBlock()`, if any. */
|
||||
cached
|
||||
EssaVariable getInput(BasicBlock pred) {
|
||||
result.getDefinition() = this.reachingDefinition(pred)
|
||||
or
|
||||
result.getDefinition() = this.inputEdgeRefinement(pred)
|
||||
}
|
||||
|
||||
/** Gets an input variable for this phi node. */
|
||||
EssaVariable getAnInput() { result = this.getInput(_) }
|
||||
|
||||
/** Holds if forall incoming edges in the flow graph, there is an input variable */
|
||||
predicate isComplete() {
|
||||
forall(BasicBlock pred | pred = this.getBasicBlock().getAPredecessor() |
|
||||
exists(this.getInput(pred))
|
||||
)
|
||||
}
|
||||
|
||||
override string toString() { result = "SSA Phi Function" }
|
||||
|
||||
/** Gets the basic block that succeeds this phi node. */
|
||||
override BasicBlock getBasicBlock() { this = TPhiFunction(_, result) }
|
||||
|
||||
override Location getLocation() { result = this.getBasicBlock().getNode(0).getLocation() }
|
||||
|
||||
/** Helper for `argList(n)`. */
|
||||
private int rankInput(EssaVariable input) {
|
||||
input = this.getAnInput() and
|
||||
var_index(input) = rank[result](EssaVariable v | v = this.getAnInput() | var_index(v))
|
||||
}
|
||||
|
||||
/** Helper for `argList()`. */
|
||||
private string argList(int n) {
|
||||
exists(EssaVariable input | n = this.rankInput(input) |
|
||||
n = 1 and result = input.getRepresentation()
|
||||
or
|
||||
n > 1 and result = this.argList(n - 1) + ", " + input.getRepresentation()
|
||||
)
|
||||
}
|
||||
|
||||
/** Helper for `getRepresentation()`. */
|
||||
private string argList() {
|
||||
exists(int last |
|
||||
last = (max(int x | x = this.rankInput(_))) and
|
||||
result = this.argList(last)
|
||||
)
|
||||
}
|
||||
|
||||
override string getRepresentation() {
|
||||
not exists(this.getAnInput()) and result = "phi()"
|
||||
or
|
||||
result = "phi(" + this.argList() + ")"
|
||||
or
|
||||
exists(this.getAnInput()) and
|
||||
not exists(this.argList()) and
|
||||
result = "phi(" + this.getSourceVariable().getName() + "??)"
|
||||
}
|
||||
|
||||
override Scope getScope() { result = this.getBasicBlock().getScope() }
|
||||
|
||||
private EssaEdgeRefinement piInputDefinition(EssaVariable input) {
|
||||
input = this.getAnInput() and
|
||||
result = input.getDefinition()
|
||||
or
|
||||
input = this.getAnInput() and result = input.getDefinition().(PhiFunction).piInputDefinition(_)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the variable which is the common and complete input to all pi-nodes that are themselves
|
||||
* inputs to this phi-node.
|
||||
* For example:
|
||||
* ```
|
||||
* x = y()
|
||||
* if complicated_test(x):
|
||||
* do_a()
|
||||
* else:
|
||||
* do_b()
|
||||
* phi
|
||||
* ```
|
||||
* Which gives us the ESSA form:
|
||||
* x0 = y()
|
||||
* x1 = pi(x0, complicated_test(x0))
|
||||
* x2 = pi(x0, not complicated_test(x0))
|
||||
* x3 = phi(x1, x2)
|
||||
* However we may not be able to track the value of `x` through `compilated_test`
|
||||
* meaning that we cannot track `x` from `x0` to `x3`.
|
||||
* By using `getShortCircuitInput()` we can do so, since the short-circuit input of `x3` is `x0`.
|
||||
*/
|
||||
pragma[noinline]
|
||||
EssaVariable getShortCircuitInput() {
|
||||
exists(BasicBlock common |
|
||||
forall(EssaVariable input | input = this.getAnInput() |
|
||||
common = this.piInputDefinition(input).getPredecessor()
|
||||
) and
|
||||
forall(BasicBlock succ | succ = common.getASuccessor() |
|
||||
succ = this.piInputDefinition(_).getSuccessor()
|
||||
) and
|
||||
exists(EssaEdgeRefinement ref |
|
||||
ref = this.piInputDefinition(_) and
|
||||
ref.getPredecessor() = common and
|
||||
ref.getInput() = result
|
||||
)
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A definition of an ESSA variable that is not directly linked to
|
||||
* another ESSA variable.
|
||||
*/
|
||||
class EssaNodeDefinition extends EssaDefinition, TEssaNodeDefinition {
|
||||
override string toString() { result = "Essa node definition" }
|
||||
|
||||
override ControlFlowNode getAUse() {
|
||||
exists(SsaSourceVariable v, BasicBlock b, int i |
|
||||
this = TEssaNodeDefinition(v, b, i) and
|
||||
SsaDefinitions::reachesUse(v, b, i, result)
|
||||
)
|
||||
}
|
||||
|
||||
override predicate reachesEndOfBlock(BasicBlock b) {
|
||||
exists(BasicBlock defb, int i |
|
||||
this = TEssaNodeDefinition(_, defb, i) and
|
||||
SsaDefinitions::reachesEndOfBlock(this.getSourceVariable(), defb, i, b)
|
||||
)
|
||||
}
|
||||
|
||||
override SsaSourceVariable getSourceVariable() { this = TEssaNodeDefinition(result, _, _) }
|
||||
|
||||
/** Gets the ControlFlowNode corresponding to this definition */
|
||||
ControlFlowNode getDefiningNode() { this.definedBy(_, result) }
|
||||
|
||||
override Location getLocation() { result = this.getDefiningNode().getLocation() }
|
||||
|
||||
override string getRepresentation() { result = this.getAQlClass() }
|
||||
|
||||
override Scope getScope() {
|
||||
exists(BasicBlock defb |
|
||||
this = TEssaNodeDefinition(_, defb, _) and
|
||||
result = defb.getScope()
|
||||
)
|
||||
}
|
||||
|
||||
predicate definedBy(SsaSourceVariable v, ControlFlowNode def) {
|
||||
exists(BasicBlock b, int i | def = b.getNode(i) |
|
||||
this = TEssaNodeDefinition(v, b, i + i)
|
||||
or
|
||||
this = TEssaNodeDefinition(v, b, i + i + 1)
|
||||
)
|
||||
}
|
||||
|
||||
override BasicBlock getBasicBlock() { result = this.getDefiningNode().getBasicBlock() }
|
||||
}
|
||||
|
||||
/** A definition of an ESSA variable that takes another ESSA variable as an input. */
|
||||
class EssaNodeRefinement extends EssaDefinition, TEssaNodeRefinement {
|
||||
override string toString() { result = "SSA filter definition" }
|
||||
|
||||
/** Gets the SSA variable to which this refinement applies. */
|
||||
EssaVariable getInput() {
|
||||
result = potential_input(this) and
|
||||
not result = potential_input(potential_input(this).getDefinition())
|
||||
}
|
||||
|
||||
override ControlFlowNode getAUse() {
|
||||
exists(SsaSourceVariable v, BasicBlock b, int i |
|
||||
this = TEssaNodeRefinement(v, b, i) and
|
||||
SsaDefinitions::reachesUse(v, b, i, result)
|
||||
)
|
||||
}
|
||||
|
||||
override predicate reachesEndOfBlock(BasicBlock b) {
|
||||
exists(BasicBlock defb, int i |
|
||||
this = TEssaNodeRefinement(_, defb, i) and
|
||||
SsaDefinitions::reachesEndOfBlock(this.getSourceVariable(), defb, i, b)
|
||||
)
|
||||
}
|
||||
|
||||
override SsaSourceVariable getSourceVariable() { this = TEssaNodeRefinement(result, _, _) }
|
||||
|
||||
/** Gets the ControlFlowNode corresponding to this definition */
|
||||
ControlFlowNode getDefiningNode() { this.definedBy(_, result) }
|
||||
|
||||
override Location getLocation() { result = this.getDefiningNode().getLocation() }
|
||||
|
||||
override string getRepresentation() {
|
||||
result = this.getAQlClass() + "(" + this.getInput().getRepresentation() + ")"
|
||||
or
|
||||
not exists(this.getInput()) and
|
||||
result = this.getAQlClass() + "(" + this.getSourceVariable().getName() + "??)"
|
||||
}
|
||||
|
||||
override Scope getScope() {
|
||||
exists(BasicBlock defb |
|
||||
this = TEssaNodeRefinement(_, defb, _) and
|
||||
result = defb.getScope()
|
||||
)
|
||||
}
|
||||
|
||||
predicate definedBy(SsaSourceVariable v, ControlFlowNode def) {
|
||||
exists(BasicBlock b, int i | def = b.getNode(i) |
|
||||
this = TEssaNodeRefinement(v, b, i + i)
|
||||
or
|
||||
this = TEssaNodeRefinement(v, b, i + i + 1)
|
||||
)
|
||||
}
|
||||
|
||||
override BasicBlock getBasicBlock() { result = this.getDefiningNode().getBasicBlock() }
|
||||
}
|
||||
|
||||
pragma[noopt]
|
||||
private EssaVariable potential_input(EssaNodeRefinement ref) {
|
||||
exists(ControlFlowNode use, SsaSourceVariable var, ControlFlowNode def |
|
||||
var.hasRefinement(use, def) and
|
||||
use = result.getAUse() and
|
||||
var = result.getSourceVariable() and
|
||||
def = ref.getDefiningNode() and
|
||||
var = ref.getSourceVariable()
|
||||
)
|
||||
}
|
||||
|
||||
/* For backwards compatibility */
|
||||
deprecated class PyNodeDefinition = EssaNodeDefinition;
|
||||
|
||||
/* For backwards compatibility */
|
||||
deprecated class PyNodeRefinement = EssaNodeRefinement;
|
||||
|
||||
/** An assignment to a variable `v = val` */
|
||||
class AssignmentDefinition extends EssaNodeDefinition {
|
||||
AssignmentDefinition() {
|
||||
SsaSource::assignment_definition(this.getSourceVariable(), this.getDefiningNode(), _)
|
||||
}
|
||||
|
||||
ControlFlowNode getValue() {
|
||||
SsaSource::assignment_definition(this.getSourceVariable(), this.getDefiningNode(), result)
|
||||
}
|
||||
|
||||
override string getRepresentation() { result = this.getValue().getNode().toString() }
|
||||
}
|
||||
|
||||
/** Capture of a raised exception `except ExceptionType ex:` */
|
||||
class ExceptionCapture extends EssaNodeDefinition {
|
||||
ExceptionCapture() {
|
||||
SsaSource::exception_capture(this.getSourceVariable(), this.getDefiningNode())
|
||||
}
|
||||
|
||||
ControlFlowNode getType() {
|
||||
exists(ExceptFlowNode ex |
|
||||
ex.getName() = this.getDefiningNode() and
|
||||
result = ex.getType()
|
||||
)
|
||||
}
|
||||
|
||||
override string getRepresentation() { result = "except " + this.getSourceVariable().getName() }
|
||||
}
|
||||
|
||||
/** An assignment to a variable as part of a multiple assignment `..., v, ... = val` */
|
||||
class MultiAssignmentDefinition extends EssaNodeDefinition {
|
||||
MultiAssignmentDefinition() {
|
||||
SsaSource::multi_assignment_definition(this.getSourceVariable(), this.getDefiningNode(), _, _)
|
||||
}
|
||||
|
||||
override string getRepresentation() {
|
||||
exists(ControlFlowNode value, int n |
|
||||
this.indexOf(n, value) and
|
||||
result = value.(DefinitionNode).getValue().getNode().toString() + "[" + n + "]"
|
||||
)
|
||||
}
|
||||
|
||||
/** Holds if `this` has (zero-based) index `index` in `lhs`. */
|
||||
predicate indexOf(int index, SequenceNode lhs) {
|
||||
SsaSource::multi_assignment_definition(this.getSourceVariable(), this.getDefiningNode(), index,
|
||||
lhs)
|
||||
}
|
||||
}
|
||||
|
||||
/** A definition of a variable in a `with` statement */
|
||||
class WithDefinition extends EssaNodeDefinition {
|
||||
WithDefinition() { SsaSource::with_definition(this.getSourceVariable(), this.getDefiningNode()) }
|
||||
|
||||
override string getRepresentation() { result = "with" }
|
||||
}
|
||||
|
||||
/** A definition of a variable by declaring it as a parameter */
|
||||
class ParameterDefinition extends EssaNodeDefinition {
|
||||
ParameterDefinition() {
|
||||
SsaSource::parameter_definition(this.getSourceVariable(), this.getDefiningNode())
|
||||
}
|
||||
|
||||
predicate isSelf() { this.getDefiningNode().getNode().(Parameter).isSelf() }
|
||||
|
||||
/** Gets the control flow node for the default value of this parameter */
|
||||
ControlFlowNode getDefault() { result.getNode() = this.getParameter().getDefault() }
|
||||
|
||||
/** Gets the annotation control flow node of this parameter */
|
||||
ControlFlowNode getAnnotation() { result.getNode() = this.getParameter().getAnnotation() }
|
||||
|
||||
/** Gets the name of this parameter definition */
|
||||
string getName() { result = this.getParameter().asName().getId() }
|
||||
|
||||
predicate isVarargs() {
|
||||
exists(Function func | func.getVararg() = this.getDefiningNode().getNode())
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if this parameter is a 'kwargs' parameter.
|
||||
* The `kwargs` in `f(a, b, **kwargs)`.
|
||||
*/
|
||||
predicate isKwargs() {
|
||||
exists(Function func | func.getKwarg() = this.getDefiningNode().getNode())
|
||||
}
|
||||
|
||||
/** Gets the `Parameter` this `ParameterDefinition` represents. */
|
||||
Parameter getParameter() { result = this.getDefiningNode().getNode() }
|
||||
}
|
||||
|
||||
/** A deletion of a variable `del v` */
|
||||
class DeletionDefinition extends EssaNodeDefinition {
|
||||
DeletionDefinition() {
|
||||
SsaSource::deletion_definition(this.getSourceVariable(), this.getDefiningNode())
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Definition of variable at the entry of a scope. Usually this represents the transfer of
|
||||
* a global or non-local variable from one scope to another.
|
||||
*/
|
||||
class ScopeEntryDefinition extends EssaNodeDefinition {
|
||||
ScopeEntryDefinition() {
|
||||
this.getDefiningNode() = this.getSourceVariable().getScopeEntryDefinition() and
|
||||
not this instanceof ImplicitSubModuleDefinition
|
||||
}
|
||||
|
||||
override Scope getScope() { result.getEntryNode() = this.getDefiningNode() }
|
||||
}
|
||||
|
||||
/** Possible redefinition of variable via `from ... import *` */
|
||||
class ImportStarRefinement extends EssaNodeRefinement {
|
||||
ImportStarRefinement() {
|
||||
SsaSource::import_star_refinement(this.getSourceVariable(), _, this.getDefiningNode())
|
||||
}
|
||||
}
|
||||
|
||||
/** Assignment of an attribute `obj.attr = val` */
|
||||
class AttributeAssignment extends EssaNodeRefinement {
|
||||
AttributeAssignment() {
|
||||
SsaSource::attribute_assignment_refinement(this.getSourceVariable(), _, this.getDefiningNode())
|
||||
}
|
||||
|
||||
string getName() { result = this.getDefiningNode().(AttrNode).getName() }
|
||||
|
||||
ControlFlowNode getValue() { result = this.getDefiningNode().(DefinitionNode).getValue() }
|
||||
|
||||
override string getRepresentation() {
|
||||
result =
|
||||
this.getAQlClass() + " '" + this.getName() + "'(" + this.getInput().getRepresentation() + ")"
|
||||
or
|
||||
not exists(this.getInput()) and
|
||||
result =
|
||||
this.getAQlClass() + " '" + this.getName() + "'(" + this.getSourceVariable().getName() + "??)"
|
||||
}
|
||||
}
|
||||
|
||||
/** A use of a variable as an argument, `foo(v)`, which might modify the object referred to. */
|
||||
class ArgumentRefinement extends EssaNodeRefinement {
|
||||
ControlFlowNode argument;
|
||||
|
||||
ArgumentRefinement() {
|
||||
SsaSource::argument_refinement(this.getSourceVariable(), argument, this.getDefiningNode())
|
||||
}
|
||||
|
||||
ControlFlowNode getArgument() { result = argument }
|
||||
|
||||
CallNode getCall() { result = this.getDefiningNode() }
|
||||
}
|
||||
|
||||
/** Deletion of an attribute `del obj.attr`. */
|
||||
class EssaAttributeDeletion extends EssaNodeRefinement {
|
||||
EssaAttributeDeletion() {
|
||||
SsaSource::attribute_deletion_refinement(this.getSourceVariable(), _, this.getDefiningNode())
|
||||
}
|
||||
|
||||
string getName() { result = this.getDefiningNode().(AttrNode).getName() }
|
||||
}
|
||||
|
||||
/** A pi-node (guard) with only one successor. */
|
||||
class SingleSuccessorGuard extends EssaNodeRefinement {
|
||||
SingleSuccessorGuard() {
|
||||
SsaSource::test_refinement(this.getSourceVariable(), _, this.getDefiningNode())
|
||||
}
|
||||
|
||||
boolean getSense() {
|
||||
exists(this.getDefiningNode().getAFalseSuccessor()) and result = false
|
||||
or
|
||||
exists(this.getDefiningNode().getATrueSuccessor()) and result = true
|
||||
}
|
||||
|
||||
override string getRepresentation() {
|
||||
result = EssaNodeRefinement.super.getRepresentation() + " [" + this.getSense().toString() + "]"
|
||||
or
|
||||
not exists(this.getSense()) and
|
||||
result = EssaNodeRefinement.super.getRepresentation() + " [??]"
|
||||
}
|
||||
|
||||
ControlFlowNode getTest() { result = this.getDefiningNode() }
|
||||
|
||||
predicate useAndTest(ControlFlowNode use, ControlFlowNode test) {
|
||||
test = this.getDefiningNode() and
|
||||
SsaSource::test_refinement(this.getSourceVariable(), use, test)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Implicit definition of the names of sub-modules in a package.
|
||||
* Although the interpreter does not pre-define these names, merely populating them
|
||||
* as they are imported, this is a good approximation for static analysis.
|
||||
*/
|
||||
class ImplicitSubModuleDefinition extends EssaNodeDefinition {
|
||||
ImplicitSubModuleDefinition() {
|
||||
SsaSource::init_module_submodule_defn(this.getSourceVariable(), this.getDefiningNode())
|
||||
}
|
||||
}
|
||||
|
||||
/** An implicit (possible) definition of an escaping variable at a call-site */
|
||||
class CallsiteRefinement extends EssaNodeRefinement {
|
||||
override string toString() { result = "CallsiteRefinement" }
|
||||
|
||||
CallsiteRefinement() {
|
||||
exists(SsaSourceVariable var, ControlFlowNode defn |
|
||||
defn = var.redefinedAtCallSite() and
|
||||
this.definedBy(var, defn) and
|
||||
not this instanceof ArgumentRefinement and
|
||||
not this instanceof MethodCallsiteRefinement and
|
||||
not this instanceof SingleSuccessorGuard
|
||||
)
|
||||
}
|
||||
|
||||
CallNode getCall() { this.getDefiningNode() = result }
|
||||
}
|
||||
|
||||
/** An implicit (possible) modification of the object referred at a method call */
|
||||
class MethodCallsiteRefinement extends EssaNodeRefinement {
|
||||
MethodCallsiteRefinement() {
|
||||
SsaSource::method_call_refinement(this.getSourceVariable(), _, this.getDefiningNode()) and
|
||||
not this instanceof SingleSuccessorGuard
|
||||
}
|
||||
|
||||
CallNode getCall() { this.getDefiningNode() = result }
|
||||
}
|
||||
|
||||
/** An implicit (possible) modification of `self` at a method call */
|
||||
class SelfCallsiteRefinement extends MethodCallsiteRefinement {
|
||||
SelfCallsiteRefinement() { this.getSourceVariable().(Variable).isSelf() }
|
||||
}
|
||||
|
||||
/** Python specific sub-class of generic EssaEdgeRefinement */
|
||||
class PyEdgeRefinement extends EssaEdgeRefinement {
|
||||
override string getRepresentation() {
|
||||
/*
|
||||
* This is for testing so use capital 'P' to make it sort before 'phi' and
|
||||
* be more visually distinctive.
|
||||
*/
|
||||
|
||||
result = "Pi(" + this.getInput().getRepresentation() + ") [" + this.getSense() + "]"
|
||||
or
|
||||
not exists(this.getInput()) and
|
||||
result = "Pi(" + this.getSourceVariable().getName() + "??) [" + this.getSense() + "]"
|
||||
}
|
||||
|
||||
ControlFlowNode getTest() { result = this.getPredecessor().getLastNode() }
|
||||
}
|
||||
@@ -1,564 +0,0 @@
|
||||
/**
|
||||
* Provides predicates for computing Enhanced SSA form
|
||||
* Computation of ESSA form is identical to plain SSA form,
|
||||
* but what counts as a use of definition differs.
|
||||
*
|
||||
* ## Language independent data-flow graph construction
|
||||
*
|
||||
* Construction of the data-flow graph is based on the principles behind SSA variables.
|
||||
*
|
||||
* The definition of an SSA variable is that (statically):
|
||||
*
|
||||
* * Each variable has exactly one definition
|
||||
* * A variable's definition dominates all its uses.
|
||||
*
|
||||
* SSA form was originally designed for compiler use and thus a "definition" of an SSA variable is
|
||||
* the same as a definition of the underlying source-code variable. For register allocation this is
|
||||
* sufficient to treat the variable as equivalent to the value held in the variable.
|
||||
*
|
||||
* However, this doesn't always work the way we want it for data-flow analysis.
|
||||
*
|
||||
* When we start to consider attribute assignment, tests on the value referred to be a variable,
|
||||
* escaping variables, implicit definitions, etc., we need something finer grained.
|
||||
*
|
||||
* A data-flow variable has the same properties as a normal SSA variable, but it also has the property that
|
||||
* *anything* that may change the way we view an object referred to by a variable should be treated as a definition of that variable.
|
||||
*
|
||||
* For example, tests are treated as definitions, so for the following Python code:
|
||||
* ```python
|
||||
* x = None
|
||||
* if not x:
|
||||
* x = True
|
||||
* ```
|
||||
* The data-flow graph (for `x`) is:
|
||||
* ```
|
||||
* x0 = None
|
||||
* x1 = pi(x0, not x)
|
||||
* x2 = True
|
||||
* x3 = phi(x1, x2)
|
||||
* ```
|
||||
* from which is it possible to infer that `x3` may not be None.
|
||||
* [ Phi functions are standard SSA, a Pi function is a filter or guard on the possible values that a variable
|
||||
* may hold]
|
||||
*
|
||||
* Attribute assignments are also treated as definitions, so for the following Python code:
|
||||
* ```python
|
||||
* x = C()
|
||||
* x.a = 1
|
||||
* y = C()
|
||||
* y.b = 2
|
||||
* ```
|
||||
* The data-flow graph is:
|
||||
* ```
|
||||
* x0 = C()
|
||||
* x1 = attr-assign(x0, .a = 1)
|
||||
* y0 = C()
|
||||
* y1 = attr-assign(y0, .b = 1)
|
||||
* ```
|
||||
* From which we can infer that `x1.a` is `1` but we know nothing about `y0.a` despite it being the same type.
|
||||
*
|
||||
* We can also insert "definitions" for transfers of values (say in global variables) where we do not yet know the call-graph. For example,
|
||||
* ```python
|
||||
* def foo():
|
||||
* global g
|
||||
* g = 1
|
||||
*
|
||||
* def bar():
|
||||
* foo()
|
||||
* g
|
||||
* ```
|
||||
* It should be clear in the above code that the use of `g` will have a value of `1`.
|
||||
* The data-flow graph looks like:
|
||||
* ```python
|
||||
* def foo():
|
||||
* g0 = scope-entry(g)
|
||||
* g1 = 1
|
||||
*
|
||||
* def bar():
|
||||
* g2 = scope-entry(g)
|
||||
* foo()
|
||||
* g3 = call-site(g, foo())
|
||||
* ```
|
||||
* Once we have established that `foo()` calls `foo`, then it is possible to link `call-site(g, foo())` to the final value of `g` in `foo`, i.e. `g1`, so effectively `g3 = call-site(g, foo())` becomes `g3 = g1` and the global data-flow graph for `g` effectively becomes:
|
||||
* ```
|
||||
* g0 = scope-entry(g)
|
||||
* g1 = 1
|
||||
* g2 = scope-entry(g)
|
||||
* g3 = g1
|
||||
* ```
|
||||
* and thus it falls out that `g3` must be `1`.
|
||||
*/
|
||||
|
||||
import python
|
||||
|
||||
cached
|
||||
private module SsaComputeImpl {
|
||||
cached
|
||||
module EssaDefinitionsImpl {
|
||||
/** Whether `n` is a live update that is a definition of the variable `v`. */
|
||||
cached
|
||||
predicate variableDefinition(
|
||||
SsaSourceVariable v, ControlFlowNode n, BasicBlock b, int rankix, int i
|
||||
) {
|
||||
SsaComputeImpl::variableDefine(v, n, b, i) and
|
||||
SsaComputeImpl::defUseRank(v, b, rankix, i) and
|
||||
(
|
||||
SsaComputeImpl::defUseRank(v, b, rankix + 1, _) and
|
||||
not SsaComputeImpl::defRank(v, b, rankix + 1, _)
|
||||
or
|
||||
not SsaComputeImpl::defUseRank(v, b, rankix + 1, _) and Liveness::liveAtExit(v, b)
|
||||
)
|
||||
}
|
||||
|
||||
/** Whether `n` is a live update that is a definition of the variable `v`. */
|
||||
cached
|
||||
predicate variableRefinement(
|
||||
SsaSourceVariable v, ControlFlowNode n, BasicBlock b, int rankix, int i
|
||||
) {
|
||||
SsaComputeImpl::variableRefine(v, n, b, i) and
|
||||
SsaComputeImpl::defUseRank(v, b, rankix, i) and
|
||||
(
|
||||
SsaComputeImpl::defUseRank(v, b, rankix + 1, _) and
|
||||
not SsaComputeImpl::defRank(v, b, rankix + 1, _)
|
||||
or
|
||||
not SsaComputeImpl::defUseRank(v, b, rankix + 1, _) and Liveness::liveAtExit(v, b)
|
||||
)
|
||||
}
|
||||
|
||||
cached
|
||||
predicate variableUpdate(SsaSourceVariable v, ControlFlowNode n, BasicBlock b, int rankix, int i) {
|
||||
variableDefinition(v, n, b, rankix, i)
|
||||
or
|
||||
variableRefinement(v, n, b, rankix, i)
|
||||
}
|
||||
|
||||
/** Holds if `def` is a pi-node for `v` on the edge `pred` -> `succ` */
|
||||
cached
|
||||
predicate piNode(SsaSourceVariable v, BasicBlock pred, BasicBlock succ) {
|
||||
v.hasRefinementEdge(_, pred, succ) and
|
||||
Liveness::liveAtEntry(v, succ)
|
||||
}
|
||||
|
||||
/** A phi node for `v` at the beginning of basic block `b`. */
|
||||
cached
|
||||
predicate phiNode(SsaSourceVariable v, BasicBlock b) {
|
||||
(
|
||||
exists(BasicBlock def | def.dominanceFrontier(b) | SsaComputeImpl::ssaDef(v, def))
|
||||
or
|
||||
piNode(v, _, b) and strictcount(b.getAPredecessor()) > 1
|
||||
) and
|
||||
Liveness::liveAtEntry(v, b)
|
||||
}
|
||||
}
|
||||
|
||||
cached
|
||||
predicate variableDefine(SsaSourceVariable v, ControlFlowNode n, BasicBlock b, int i) {
|
||||
v.hasDefiningNode(n) and
|
||||
exists(int j |
|
||||
n = b.getNode(j) and
|
||||
i = j * 2 + 1
|
||||
)
|
||||
}
|
||||
|
||||
cached
|
||||
predicate variableRefine(SsaSourceVariable v, ControlFlowNode n, BasicBlock b, int i) {
|
||||
v.hasRefinement(_, n) and
|
||||
exists(int j |
|
||||
n = b.getNode(j) and
|
||||
i = j * 2 + 1
|
||||
)
|
||||
}
|
||||
|
||||
cached
|
||||
predicate variableDef(SsaSourceVariable v, ControlFlowNode n, BasicBlock b, int i) {
|
||||
variableDefine(v, n, b, i) or variableRefine(v, n, b, i)
|
||||
}
|
||||
|
||||
/**
|
||||
* A ranking of the indices `i` at which there is an SSA definition or use of
|
||||
* `v` in the basic block `b`.
|
||||
*
|
||||
* Basic block indices are translated to rank indices in order to skip
|
||||
* irrelevant indices at which there is no definition or use when traversing
|
||||
* basic blocks.
|
||||
*/
|
||||
cached
|
||||
predicate defUseRank(SsaSourceVariable v, BasicBlock b, int rankix, int i) {
|
||||
i = rank[rankix](int j | variableDef(v, _, b, j) or variableUse(v, _, b, j))
|
||||
}
|
||||
|
||||
/** A definition of a variable occurring at the specified rank index in basic block `b`. */
|
||||
cached
|
||||
predicate defRank(SsaSourceVariable v, BasicBlock b, int rankix, int i) {
|
||||
variableDef(v, _, b, i) and
|
||||
defUseRank(v, b, rankix, i)
|
||||
}
|
||||
|
||||
/** A variable access `use` of `v` in `b` at index `i`. */
|
||||
cached
|
||||
predicate variableUse(SsaSourceVariable v, ControlFlowNode use, BasicBlock b, int i) {
|
||||
(v.getAUse() = use or v.hasRefinement(use, _)) and
|
||||
exists(int j |
|
||||
b.getNode(j) = use and
|
||||
i = 2 * j
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* A definition of an SSA variable occurring at the specified position.
|
||||
* This is either a phi node, a `VariableUpdate`, or a parameter.
|
||||
*/
|
||||
cached
|
||||
predicate ssaDef(SsaSourceVariable v, BasicBlock b) {
|
||||
EssaDefinitions::phiNode(v, b)
|
||||
or
|
||||
EssaDefinitions::variableUpdate(v, _, b, _, _)
|
||||
or
|
||||
EssaDefinitions::piNode(v, _, b)
|
||||
}
|
||||
|
||||
/*
|
||||
* The construction of SSA form ensures that each use of a variable is
|
||||
* dominated by its definition. A definition of an SSA variable therefore
|
||||
* reaches a `ControlFlowNode` if it is the _closest_ SSA variable definition
|
||||
* that dominates the node. If two definitions dominate a node then one must
|
||||
* dominate the other, so therefore the definition of _closest_ is given by the
|
||||
* dominator tree. Thus, reaching definitions can be calculated in terms of
|
||||
* dominance.
|
||||
*/
|
||||
|
||||
/** The maximum rank index for the given variable and basic block. */
|
||||
cached
|
||||
int lastRank(SsaSourceVariable v, BasicBlock b) {
|
||||
result = max(int rankix | defUseRank(v, b, rankix, _))
|
||||
or
|
||||
not defUseRank(v, b, _, _) and
|
||||
(EssaDefinitions::phiNode(v, b) or EssaDefinitions::piNode(v, _, b)) and
|
||||
result = 0
|
||||
}
|
||||
|
||||
private predicate ssaDefRank(SsaSourceVariable v, BasicBlock b, int rankix, int i) {
|
||||
EssaDefinitions::variableUpdate(v, _, b, rankix, i)
|
||||
or
|
||||
EssaDefinitions::phiNode(v, b) and rankix = 0 and i = phiIndex()
|
||||
or
|
||||
EssaDefinitions::piNode(v, _, b) and
|
||||
EssaDefinitions::phiNode(v, b) and
|
||||
rankix = -1 and
|
||||
i = piIndex()
|
||||
or
|
||||
EssaDefinitions::piNode(v, _, b) and
|
||||
not EssaDefinitions::phiNode(v, b) and
|
||||
rankix = 0 and
|
||||
i = piIndex()
|
||||
}
|
||||
|
||||
/** The SSA definition reaches the rank index `rankix` in its own basic block `b`. */
|
||||
cached
|
||||
predicate ssaDefReachesRank(SsaSourceVariable v, BasicBlock b, int i, int rankix) {
|
||||
ssaDefRank(v, b, rankix, i)
|
||||
or
|
||||
ssaDefReachesRank(v, b, i, rankix - 1) and
|
||||
rankix <= lastRank(v, b) and
|
||||
not ssaDefRank(v, b, rankix, _)
|
||||
}
|
||||
|
||||
/**
|
||||
* The SSA definition of `v` at `def` reaches `use` in the same basic block
|
||||
* without crossing another SSA definition of `v`.
|
||||
*/
|
||||
cached
|
||||
predicate ssaDefReachesUseWithinBlock(
|
||||
SsaSourceVariable v, BasicBlock b, int i, ControlFlowNode use
|
||||
) {
|
||||
exists(int rankix, int useix |
|
||||
ssaDefReachesRank(v, b, i, rankix) and
|
||||
defUseRank(v, b, rankix, useix) and
|
||||
variableUse(v, use, b, useix)
|
||||
)
|
||||
}
|
||||
|
||||
cached
|
||||
module LivenessImpl {
|
||||
cached
|
||||
predicate liveAtExit(SsaSourceVariable v, BasicBlock b) { liveAtEntry(v, b.getASuccessor()) }
|
||||
|
||||
cached
|
||||
predicate liveAtEntry(SsaSourceVariable v, BasicBlock b) {
|
||||
SsaComputeImpl::defUseRank(v, b, 1, _) and not SsaComputeImpl::defRank(v, b, 1, _)
|
||||
or
|
||||
not SsaComputeImpl::defUseRank(v, b, _, _) and liveAtExit(v, b)
|
||||
}
|
||||
}
|
||||
|
||||
cached
|
||||
module SsaDefinitionsImpl {
|
||||
pragma[noinline]
|
||||
private predicate reachesEndOfBlockRec(
|
||||
SsaSourceVariable v, BasicBlock defbb, int defindex, BasicBlock b
|
||||
) {
|
||||
exists(BasicBlock idom | reachesEndOfBlock(v, defbb, defindex, idom) |
|
||||
idom = b.getImmediateDominator()
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* The SSA definition of `v` at `def` reaches the end of a basic block `b`, at
|
||||
* which point it is still live, without crossing another SSA definition of `v`.
|
||||
*/
|
||||
cached
|
||||
predicate reachesEndOfBlock(SsaSourceVariable v, BasicBlock defbb, int defindex, BasicBlock b) {
|
||||
Liveness::liveAtExit(v, b) and
|
||||
(
|
||||
defbb = b and
|
||||
SsaComputeImpl::ssaDefReachesRank(v, defbb, defindex, SsaComputeImpl::lastRank(v, b))
|
||||
or
|
||||
// It is sufficient to traverse the dominator graph, cf. discussion above.
|
||||
reachesEndOfBlockRec(v, defbb, defindex, b) and
|
||||
not SsaComputeImpl::ssaDef(v, b)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* The SSA definition of `v` at `(defbb, defindex)` reaches `use` without crossing another
|
||||
* SSA definition of `v`.
|
||||
*/
|
||||
cached
|
||||
predicate reachesUse(SsaSourceVariable v, BasicBlock defbb, int defindex, ControlFlowNode use) {
|
||||
SsaComputeImpl::ssaDefReachesUseWithinBlock(v, defbb, defindex, use)
|
||||
or
|
||||
exists(BasicBlock b |
|
||||
SsaComputeImpl::variableUse(v, use, b, _) and
|
||||
reachesEndOfBlock(v, defbb, defindex, b.getAPredecessor()) and
|
||||
not SsaComputeImpl::ssaDefReachesUseWithinBlock(v, b, _, use)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `(defbb, defindex)` is an SSA definition of `v` that reaches an exit without crossing another
|
||||
* SSA definition of `v`.
|
||||
*/
|
||||
cached
|
||||
predicate reachesExit(SsaSourceVariable v, BasicBlock defbb, int defindex) {
|
||||
exists(BasicBlock last, ControlFlowNode use, int index |
|
||||
not Liveness::liveAtExit(v, last) and
|
||||
reachesUse(v, defbb, defindex, use) and
|
||||
SsaComputeImpl::defUseRank(v, last, SsaComputeImpl::lastRank(v, last), index) and
|
||||
SsaComputeImpl::variableUse(v, use, last, index)
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
cached
|
||||
module AdjacentUsesImpl {
|
||||
/**
|
||||
* Holds if `rankix` is the rank the index `i` at which there is an SSA definition or explicit use of
|
||||
* `v` in the basic block `b`.
|
||||
*/
|
||||
cached
|
||||
predicate defSourceUseRank(SsaSourceVariable v, BasicBlock b, int rankix, int i) {
|
||||
i = rank[rankix](int j | variableDefine(v, _, b, j) or variableSourceUse(v, _, b, j))
|
||||
}
|
||||
|
||||
/** A variable access `use` of `v` in `b` at index `i`. */
|
||||
cached
|
||||
predicate variableSourceUse(SsaSourceVariable v, ControlFlowNode use, BasicBlock b, int i) {
|
||||
v.getASourceUse() = use and
|
||||
exists(int j |
|
||||
b.getNode(j) = use and
|
||||
i = 2 * j
|
||||
)
|
||||
}
|
||||
|
||||
/** Gets the maximum rank index for the given variable and basic block. */
|
||||
private int lastSourceUseRank(SsaSourceVariable v, BasicBlock b) {
|
||||
result = max(int rankix | defSourceUseRank(v, b, rankix, _))
|
||||
}
|
||||
|
||||
/** Holds if `v` is defined or used in `b`. */
|
||||
private predicate varOccursInBlock(SsaSourceVariable v, BasicBlock b) {
|
||||
defSourceUseRank(v, b, _, _)
|
||||
}
|
||||
|
||||
/** Holds if `v` occurs in `b` or one of `b`'s transitive successors. */
|
||||
private predicate blockPrecedesVar(SsaSourceVariable v, BasicBlock b) {
|
||||
varOccursInBlock(v, b)
|
||||
or
|
||||
SsaDefinitionsImpl::reachesEndOfBlock(v, _, _, b)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `b2` is a transitive successor of `b1` and `v` occurs in `b1` and
|
||||
* in `b2` or one of its transitive successors but not in any block on the path
|
||||
* between `b1` and `b2`.
|
||||
*/
|
||||
private predicate varBlockReaches(SsaSourceVariable v, BasicBlock b1, BasicBlock b2) {
|
||||
varOccursInBlock(v, b1) and
|
||||
b2 = b1.getASuccessor() and
|
||||
blockPrecedesVar(v, b2)
|
||||
or
|
||||
exists(BasicBlock mid |
|
||||
varBlockReaches(v, b1, mid) and
|
||||
b2 = mid.getASuccessor() and
|
||||
not varOccursInBlock(v, mid) and
|
||||
blockPrecedesVar(v, b2)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `b2` is a transitive successor of `b1` and `v` occurs in `b1` and
|
||||
* `b2` but not in any block on the path between `b1` and `b2`.
|
||||
*/
|
||||
private predicate varBlockStep(SsaSourceVariable v, BasicBlock b1, BasicBlock b2) {
|
||||
varBlockReaches(v, b1, b2) and
|
||||
varOccursInBlock(v, b2)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `v` occurs at index `i1` in `b1` and at index `i2` in `b2` and
|
||||
* there is a path between them without any occurrence of `v`.
|
||||
*/
|
||||
cached
|
||||
predicate adjacentVarRefs(SsaSourceVariable v, BasicBlock b1, int i1, BasicBlock b2, int i2) {
|
||||
exists(int rankix |
|
||||
b1 = b2 and
|
||||
defSourceUseRank(v, b1, rankix, i1) and
|
||||
defSourceUseRank(v, b2, rankix + 1, i2)
|
||||
)
|
||||
or
|
||||
defSourceUseRank(v, b1, lastSourceUseRank(v, b1), i1) and
|
||||
varBlockStep(v, b1, b2) and
|
||||
defSourceUseRank(v, b2, 1, i2)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `use1` is a use of the variable `v`, and there exists an adjacent reference to `v`
|
||||
* in basic block `b1` at index `i1`.
|
||||
*
|
||||
* A helper predicate for `adjacentUseUseSameVar`, to prevent the first join from being between
|
||||
* the two instances of `variableSourceUse` in
|
||||
* ```ql
|
||||
* exists(SsaSourceVariable v, BasicBlock b1, int i1, BasicBlock b2, int i2 |
|
||||
* adjacentVarRefs(v, b1, i1, b2, i2) and
|
||||
* variableSourceUse(v, use1, b1, i1) and
|
||||
* variableSourceUse(v, use2, b2, i2)
|
||||
* )
|
||||
* ```
|
||||
*/
|
||||
pragma[nomagic]
|
||||
private predicate adjacentRefUse(
|
||||
SsaSourceVariable v, BasicBlock b2, int i2, ControlFlowNode use1
|
||||
) {
|
||||
exists(BasicBlock b1, int i1 |
|
||||
adjacentVarRefs(v, b1, i1, b2, i2) and
|
||||
variableSourceUse(v, use1, b1, i1)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `use1` and `use2` form an adjacent use-use-pair of the same SSA
|
||||
* variable, that is, the value read in `use1` can reach `use2` without passing
|
||||
* through any other use or any SSA definition of the variable.
|
||||
*/
|
||||
cached
|
||||
predicate adjacentUseUseSameVar(ControlFlowNode use1, ControlFlowNode use2) {
|
||||
exists(SsaSourceVariable v, BasicBlock b2, int i2 |
|
||||
adjacentRefUse(v, b2, i2, use1) and
|
||||
variableSourceUse(v, use2, b2, i2)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `use1` and `use2` form an adjacent use-use-pair of the same
|
||||
* `SsaSourceVariable`, that is, the value read in `use1` can reach `use2`
|
||||
* without passing through any other use or any SSA definition of the variable
|
||||
* except for phi nodes.
|
||||
*/
|
||||
cached
|
||||
predicate adjacentUseUse(ControlFlowNode use1, ControlFlowNode use2) {
|
||||
adjacentUseUseSameVar(use1, use2)
|
||||
or
|
||||
exists(SsaSourceVariable v, EssaDefinition def, BasicBlock b1, int i1, BasicBlock b2, int i2 |
|
||||
adjacentVarRefs(v, b1, i1, b2, i2) and
|
||||
variableUse(v, use1, b1, i1) and
|
||||
definesAt(def, v, b2, i2) and
|
||||
firstUse(def, use2) and
|
||||
def instanceof PhiFunction
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if the value defined at `def` can reach `use` without passing through
|
||||
* any other uses, but possibly through phi nodes.
|
||||
*/
|
||||
cached
|
||||
predicate firstUse(EssaDefinition def, ControlFlowNode use) {
|
||||
exists(SsaSourceVariable v, BasicBlock b1, int i1, BasicBlock b2, int i2 |
|
||||
adjacentVarRefs(v, b1, i1, b2, i2) and
|
||||
definesAt(def, v, b1, i1) and
|
||||
variableSourceUse(v, use, b2, i2)
|
||||
)
|
||||
or
|
||||
exists(
|
||||
SsaSourceVariable v, EssaDefinition redef, BasicBlock b1, int i1, BasicBlock b2, int i2
|
||||
|
|
||||
redef instanceof PhiFunction
|
||||
|
|
||||
adjacentVarRefs(v, b1, i1, b2, i2) and
|
||||
definesAt(def, v, b1, i1) and
|
||||
definesAt(redef, v, b2, i2) and
|
||||
firstUse(redef, use)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `def` defines `v` at the specified position.
|
||||
* Phi nodes are placed at index -1.
|
||||
*/
|
||||
cached
|
||||
predicate definesAt(EssaDefinition def, SsaSourceVariable v, BasicBlock b, int i) {
|
||||
exists(ControlFlowNode defNode |
|
||||
def.(EssaNodeDefinition).definedBy(v, defNode) and
|
||||
variableDefine(v, defNode, b, i)
|
||||
)
|
||||
or
|
||||
v = def.(PhiFunction).getSourceVariable() and
|
||||
b = def.(PhiFunction).getBasicBlock() and
|
||||
i = -1
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if the value defined at `def` can reach `use`, possibly through phi nodes.
|
||||
*/
|
||||
cached
|
||||
predicate useOfDef(EssaDefinition def, ControlFlowNode use) {
|
||||
exists(ControlFlowNode firstUse |
|
||||
firstUse(def, firstUse) and
|
||||
adjacentUseUse*(firstUse, use)
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
import SsaComputeImpl::SsaDefinitionsImpl as SsaDefinitions
|
||||
import SsaComputeImpl::EssaDefinitionsImpl as EssaDefinitions
|
||||
import SsaComputeImpl::LivenessImpl as Liveness
|
||||
import SsaComputeImpl::AdjacentUsesImpl as AdjacentUses
|
||||
|
||||
/* This is exported primarily for testing */
|
||||
/*
|
||||
* A note on numbering
|
||||
* In order to create an SSA graph, we need an order of definitions and uses within a basic block.
|
||||
* To do this we index definitions and uses as follows:
|
||||
* Phi-functions have an index of -1, so precede all normal uses and definitions in a block.
|
||||
* Pi-functions (on edges) have an index of -2 in the successor block, so precede all other uses and definitions, including phi-functions
|
||||
* A use of a variable at at a CFG node is assumed to occur before any definition at the same node, so:
|
||||
* * a use at the `j`th node of a block is given the index `2*j` and
|
||||
* * a definition at the `j`th node of a block is given the index `2*j + 1`.
|
||||
*/
|
||||
|
||||
pragma[inline]
|
||||
int phiIndex() { result = -1 }
|
||||
|
||||
pragma[inline]
|
||||
int piIndex() { result = -2 }
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user