Merge remote-tracking branch 'origin/main' into jty/python/emailInjection

This commit is contained in:
jorgectf
2022-02-26 01:22:55 +01:00
4485 changed files with 536628 additions and 132797 deletions

View File

@@ -0,0 +1,5 @@
name: codeql/python-consistency-queries
groups: [python, test, consistency-queries]
dependencies:
codeql/python-all: "*"
extractor: python

View File

@@ -1,4 +1,6 @@
name: codeql/python-examples
version: 0.0.2
groups:
- python
- examples
dependencies:
codeql/python-all: "*"
codeql/python-all: "*"

View File

@@ -0,0 +1,34 @@
## 0.0.9
## 0.0.8
### Deprecated APIs
* Moved the files defining regex injection configuration and customization, instead of `import semmle.python.security.injection.RegexInjection` please use `import semmle.python.security.dataflow.RegexInjection` (the same for `RegexInjectionCustomizations`).
* The `codeql/python-upgrades` CodeQL pack has been removed. All upgrades scripts have been merged into the `codeql/python-all` CodeQL pack.
## 0.0.7
## 0.0.6
## 0.0.5
### Minor Analysis Improvements
* Added modeling of many functions from the `os` module that uses file system paths, such as `os.stat`, `os.chdir`, `os.mkdir`, and so on.
* Added modeling of the `tempfile` module for creating temporary files and directories, such as the functions `tempfile.NamedTemporaryFile` and `tempfile.TemporaryDirectory`.
* Extended the modeling of FastAPI such that custom subclasses of `fastapi.APIRouter` are recognized.
* Extended the modeling of FastAPI such that `fastapi.responses.FileResponse` are considered `FileSystemAccess`.
* Added modeling of the `posixpath`, `ntpath`, and `genericpath` modules for path operations (although these are not supposed to be used), resulting in new sinks.
* Added modeling of `wsgiref.simple_server` applications, leading to new remote flow sources.
## 0.0.4
### Major Analysis Improvements
* Added modeling of `os.stat`, `os.lstat`, `os.statvfs`, `os.fstat`, and `os.fstatvfs`, which are new sinks for the _Uncontrolled data used in path expression_ (`py/path-injection`) query.
* Added modeling of the `posixpath`, `ntpath`, and `genericpath` modules for path operations (although these are not supposed to be used), resulting in new sinks for the _Uncontrolled data used in path expression_ (`py/path-injection`) query.
* Added modeling of `wsgiref.simple_server` applications, leading to new remote flow sources.
* Added modeling of `aiopg` for sinks executing SQL.
* Added modeling of HTTP requests and responses when using `flask_admin` (`Flask-Admin` PyPI package), which leads to additional remote flow sources.
* Added modeling of the PyPI package `toml`, which provides encoding/decoding of TOML documents, leading to new taint-tracking steps.

View File

@@ -0,0 +1,4 @@
---
category: deprecated
---
* The old points-to based modeling has been deprecated. Use the new type-tracking/API-graphs based modeling instead.

View File

@@ -0,0 +1,4 @@
---
category: minorAnalysis
---
* Fixed taint propagation for attribute assignment. In the assignment `x.foo = tainted` we no longer treat the entire object `x` as tainted, just because the attribute `foo` contains tainted data. This leads to slightly fewer false positives.

View File

@@ -0,0 +1,10 @@
## 0.0.4
### Major Analysis Improvements
* Added modeling of `os.stat`, `os.lstat`, `os.statvfs`, `os.fstat`, and `os.fstatvfs`, which are new sinks for the _Uncontrolled data used in path expression_ (`py/path-injection`) query.
* Added modeling of the `posixpath`, `ntpath`, and `genericpath` modules for path operations (although these are not supposed to be used), resulting in new sinks for the _Uncontrolled data used in path expression_ (`py/path-injection`) query.
* Added modeling of `wsgiref.simple_server` applications, leading to new remote flow sources.
* Added modeling of `aiopg` for sinks executing SQL.
* Added modeling of HTTP requests and responses when using `flask_admin` (`Flask-Admin` PyPI package), which leads to additional remote flow sources.
* Added modeling of the PyPI package `toml`, which provides encoding/decoding of TOML documents, leading to new taint-tracking steps.

View File

@@ -0,0 +1,10 @@
## 0.0.5
### Minor Analysis Improvements
* Added modeling of many functions from the `os` module that uses file system paths, such as `os.stat`, `os.chdir`, `os.mkdir`, and so on.
* Added modeling of the `tempfile` module for creating temporary files and directories, such as the functions `tempfile.NamedTemporaryFile` and `tempfile.TemporaryDirectory`.
* Extended the modeling of FastAPI such that custom subclasses of `fastapi.APIRouter` are recognized.
* Extended the modeling of FastAPI such that `fastapi.responses.FileResponse` are considered `FileSystemAccess`.
* Added modeling of the `posixpath`, `ntpath`, and `genericpath` modules for path operations (although these are not supposed to be used), resulting in new sinks.
* Added modeling of `wsgiref.simple_server` applications, leading to new remote flow sources.

View File

@@ -0,0 +1 @@
## 0.0.6

View File

@@ -0,0 +1 @@
## 0.0.7

View File

@@ -0,0 +1,6 @@
## 0.0.8
### Deprecated APIs
* Moved the files defining regex injection configuration and customization, instead of `import semmle.python.security.injection.RegexInjection` please use `import semmle.python.security.dataflow.RegexInjection` (the same for `RegexInjectionCustomizations`).
* The `codeql/python-upgrades` CodeQL pack has been removed. All upgrades scripts have been merged into the `codeql/python-all` CodeQL pack.

View File

@@ -0,0 +1 @@
## 0.0.9

View File

@@ -0,0 +1,2 @@
---
lastReleaseVersion: 0.0.9

View File

@@ -10,6 +10,7 @@ import semmle.python.Class
import semmle.python.Import
import semmle.python.Stmts
import semmle.python.Exprs
import semmle.python.Patterns
import semmle.python.Keywords
import semmle.python.Comprehensions
import semmle.python.Flow

View File

@@ -1,7 +1,7 @@
name: codeql/python-all
version: 0.0.2
version: 0.0.10-dev
groups: python
dbscheme: semmlecode.python.dbscheme
extractor: python
library: true
dependencies:
codeql/python-upgrades: 0.0.2
upgrades: upgrades

View File

@@ -114,13 +114,13 @@ module API {
* Gets a node such that there is an edge in the API graph between this node and the other
* one, and that edge is labeled with `lbl`.
*/
Node getASuccessor(string lbl) { Impl::edge(this, lbl, result) }
Node getASuccessor(Label::ApiLabel lbl) { Impl::edge(this, lbl, result) }
/**
* Gets a node such that there is an edge in the API graph between that other node and
* this one, and that edge is labeled with `lbl`
*/
Node getAPredecessor(string lbl) { this = result.getASuccessor(lbl) }
Node getAPredecessor(Label::ApiLabel lbl) { this = result.getASuccessor(lbl) }
/**
* Gets a node such that there is an edge in the API graph between this node and the other
@@ -174,9 +174,8 @@ module API {
length = 0 and
result = ""
or
exists(Node pred, string lbl, string predpath |
exists(Node pred, Label::ApiLabel lbl, string predpath |
Impl::edge(pred, lbl, this) and
lbl != "" and
predpath = pred.getAPath(length - 1) and
exists(string dot | if length = 1 then dot = "" else dot = "." |
result = predpath + dot + lbl and
@@ -335,7 +334,8 @@ module API {
*
* For instance, `prefix_member("foo.bar", "baz", "foo.bar.baz")` would hold.
*/
private predicate prefix_member(TApiNode base, string member, TApiNode sub) {
cached
predicate prefix_member(TApiNode base, string member, TApiNode sub) {
exists(string sub_str, string regexp |
regexp = "(.+)[.]([^.]+)" and
base = MkModuleImport(sub_str.regexpCapture(regexp, 1)) and
@@ -358,134 +358,26 @@ module API {
)
}
/** Gets the name of a known built-in. */
private string getBuiltInName() {
// These lists were created by inspecting the `builtins` and `__builtin__` modules in
// Python 3 and 2 respectively, using the `dir` built-in.
// Built-in functions and exceptions shared between Python 2 and 3
result in [
"abs", "all", "any", "bin", "bool", "bytearray", "callable", "chr", "classmethod",
"compile", "complex", "delattr", "dict", "dir", "divmod", "enumerate", "eval", "filter",
"float", "format", "frozenset", "getattr", "globals", "hasattr", "hash", "help", "hex",
"id", "input", "int", "isinstance", "issubclass", "iter", "len", "list", "locals", "map",
"max", "memoryview", "min", "next", "object", "oct", "open", "ord", "pow", "print",
"property", "range", "repr", "reversed", "round", "set", "setattr", "slice", "sorted",
"staticmethod", "str", "sum", "super", "tuple", "type", "vars", "zip", "__import__",
// Exceptions
"ArithmeticError", "AssertionError", "AttributeError", "BaseException", "BufferError",
"BytesWarning", "DeprecationWarning", "EOFError", "EnvironmentError", "Exception",
"FloatingPointError", "FutureWarning", "GeneratorExit", "IOError", "ImportError",
"ImportWarning", "IndentationError", "IndexError", "KeyError", "KeyboardInterrupt",
"LookupError", "MemoryError", "NameError", "NotImplemented", "NotImplementedError",
"OSError", "OverflowError", "PendingDeprecationWarning", "ReferenceError", "RuntimeError",
"RuntimeWarning", "StandardError", "StopIteration", "SyntaxError", "SyntaxWarning",
"SystemError", "SystemExit", "TabError", "TypeError", "UnboundLocalError",
"UnicodeDecodeError", "UnicodeEncodeError", "UnicodeError", "UnicodeTranslateError",
"UnicodeWarning", "UserWarning", "ValueError", "Warning", "ZeroDivisionError",
// Added for compatibility
"exec"
]
or
// Built-in constants shared between Python 2 and 3
result in ["False", "True", "None", "NotImplemented", "Ellipsis", "__debug__"]
or
// Python 3 only
result in [
"ascii", "breakpoint", "bytes", "exec", "aiter", "anext",
// Exceptions
"BlockingIOError", "BrokenPipeError", "ChildProcessError", "ConnectionAbortedError",
"ConnectionError", "ConnectionRefusedError", "ConnectionResetError", "FileExistsError",
"FileNotFoundError", "InterruptedError", "IsADirectoryError", "ModuleNotFoundError",
"NotADirectoryError", "PermissionError", "ProcessLookupError", "RecursionError",
"ResourceWarning", "StopAsyncIteration", "TimeoutError"
]
or
// Python 2 only
result in [
"basestring", "cmp", "execfile", "file", "long", "raw_input", "reduce", "reload",
"unichr", "unicode", "xrange"
]
}
/**
* Gets a data flow node that is likely to refer to a built-in with the name `name`.
*
* Currently this is an over-approximation, and may not account for things like overwriting a
* built-in with a different value.
*/
private DataFlow::Node likely_builtin(string name) {
exists(Module m |
result.asCfgNode() =
any(NameNode n |
possible_builtin_accessed_in_module(n, name, m) and
not possible_builtin_defined_in_module(name, m)
)
)
}
/**
* Holds if a global variable called `name` (which is also the name of a built-in) is assigned
* a value in the module `m`.
*/
private predicate possible_builtin_defined_in_module(string name, Module m) {
global_name_defined_in_module(name, m) and
name = getBuiltInName()
}
/**
* Holds if `n` is an access of a global variable called `name` (which is also the name of a
* built-in) inside the module `m`.
*/
private predicate possible_builtin_accessed_in_module(NameNode n, string name, Module m) {
n.isGlobal() and
n.isLoad() and
name = n.getId() and
name = getBuiltInName() and
m = n.getEnclosingModule()
}
/**
* Holds if `n` is an access of a variable called `name` (which is _not_ the name of a
* built-in, and which is _not_ a global defined in the enclosing module) inside the scope `s`.
*/
private predicate name_possibly_defined_in_import_star(NameNode n, string name, Scope s) {
n.isLoad() and
name = n.getId() and
// Not already defined in an enclosing scope.
not exists(LocalVariable v |
v.getId() = name and v.getScope() = n.getScope().getEnclosingScope*()
) and
not name = getBuiltInName() and
s = n.getScope().getEnclosingScope*() and
exists(potential_import_star_base(s)) and
not global_name_defined_in_module(name, n.getEnclosingModule())
}
/** Holds if a global variable called `name` is assigned a value in the module `m`. */
private predicate global_name_defined_in_module(string name, Module m) {
exists(NameNode n |
not exists(LocalVariable v | n.defines(v)) and
n.isStore() and
name = n.getId() and
m = n.getEnclosingModule()
)
}
private import semmle.python.dataflow.new.internal.Builtins
private import semmle.python.dataflow.new.internal.ImportStar
/**
* Gets the API graph node for all modules imported with `from ... import *` inside the scope `s`.
*
* For example, given
*
* `from foo.bar import *`
* ```python
* from foo.bar import *
* ```
*
* this would be the API graph node with the path
*
* `moduleImport("foo").getMember("bar")`
*/
private TApiNode potential_import_star_base(Scope s) {
exists(DataFlow::Node ref |
ref.asCfgNode() = any(ImportStarNode n | n.getScope() = s).getModule() and
use(result, ref)
exists(DataFlow::Node n |
n.asCfgNode() = ImportStar::potentialImportStarBase(s) and
use(result, n)
)
}
@@ -494,7 +386,7 @@ module API {
* `lbl` in the API graph.
*/
cached
predicate use(TApiNode base, string lbl, DataFlow::Node ref) {
predicate use(TApiNode base, Label::ApiLabel lbl, DataFlow::Node ref) {
exists(DataFlow::LocalSourceNode src, DataFlow::LocalSourceNode pred |
// First, we find a predecessor of the node `ref` that we want to determine. The predecessor
// is any node that is a type-tracked use of a data flow node (`src`), which is itself a
@@ -529,14 +421,14 @@ module API {
or
// Built-ins, treated as members of the module `builtins`
base = MkModuleImport("builtins") and
lbl = Label::member(any(string name | ref = likely_builtin(name)))
lbl = Label::member(any(string name | ref = Builtins::likelyBuiltin(name)))
or
// Unknown variables that may belong to a module imported with `import *`
exists(Scope s |
base = potential_import_star_base(s) and
lbl =
Label::member(any(string name |
name_possibly_defined_in_import_star(ref.asCfgNode(), name, s)
ImportStar::namePossiblyDefinedInImportStar(ref.asCfgNode(), name, s)
))
)
}
@@ -589,7 +481,7 @@ module API {
* Holds if there is an edge from `pred` to `succ` in the API graph that is labeled with `lbl`.
*/
cached
predicate edge(TApiNode pred, string lbl, TApiNode succ) {
predicate edge(TApiNode pred, Label::ApiLabel lbl, TApiNode succ) {
/* There's an edge from the root node for each imported module. */
exists(string m |
pred = MkRoot() and
@@ -622,36 +514,126 @@ module API {
cached
int distanceFromRoot(TApiNode nd) = shortestDistances(MkRoot/0, edge/2)(_, nd, result)
}
}
private module Label {
/** Gets the edge label for the module `m`. */
bindingset[m]
bindingset[result]
string mod(string m) { result = "moduleImport(\"" + m + "\")" }
/** Provides classes modeling the various edges (labels) in the API graph. */
module Label {
/** A label in the API-graph */
class ApiLabel extends TLabel {
/** Gets a string representation of this label. */
string toString() { result = "???" }
}
/** Gets the `member` edge label for member `m`. */
bindingset[m]
bindingset[result]
string member(string m) { result = "getMember(\"" + m + "\")" }
private import LabelImpl
/** Gets the `member` edge label for the unknown member. */
string unknownMember() { result = "getUnknownMember()" }
private module LabelImpl {
private import semmle.python.dataflow.new.internal.Builtins
private import semmle.python.dataflow.new.internal.ImportStar
/** Gets the `member` edge label for the given attribute reference. */
string memberFromRef(DataFlow::AttrRef pr) {
result = member(pr.getAttributeName())
or
not exists(pr.getAttributeName()) and
result = unknownMember()
newtype TLabel =
MkLabelModule(string mod) { exists(Impl::MkModuleImport(mod)) } or
MkLabelMember(string member) {
member = any(DataFlow::AttrRef pr).getAttributeName() or
exists(Builtins::likelyBuiltin(member)) or
ImportStar::namePossiblyDefinedInImportStar(_, member, _) or
Impl::prefix_member(_, member, _)
} or
MkLabelUnknownMember() or
MkLabelParameter(int i) {
none() // TODO: Fill in when adding def nodes
} or
MkLabelReturn() or
MkLabelSubclass() or
MkLabelAwait()
/** A label for a module. */
class LabelModule extends ApiLabel {
string mod;
LabelModule() { this = MkLabelModule(mod) }
/** Gets the module associated with this label. */
string getMod() { result = mod }
override string toString() { result = "moduleImport(\"" + mod + "\")" }
}
/** A label for the member named `prop`. */
class LabelMember extends ApiLabel {
string member;
LabelMember() { this = MkLabelMember(member) }
/** Gets the property associated with this label. */
string getMember() { result = member }
override string toString() { result = "getMember(\"" + member + "\")" }
}
/** A label for a member with an unknown name. */
class LabelUnknownMember extends ApiLabel {
LabelUnknownMember() { this = MkLabelUnknownMember() }
override string toString() { result = "getUnknownMember()" }
}
/** A label for parameter `i`. */
class LabelParameter extends ApiLabel {
int i;
LabelParameter() { this = MkLabelParameter(i) }
override string toString() { result = "getParameter(" + i + ")" }
/** Gets the index of the parameter for this label. */
int getIndex() { result = i }
}
/** A label that gets the return value of a function. */
class LabelReturn extends ApiLabel {
LabelReturn() { this = MkLabelReturn() }
override string toString() { result = "getReturn()" }
}
/** A label that gets the subclass of a class. */
class LabelSubclass extends ApiLabel {
LabelSubclass() { this = MkLabelSubclass() }
override string toString() { result = "getASubclass()" }
}
/** A label for awaited values. */
class LabelAwait extends ApiLabel {
LabelAwait() { this = MkLabelAwait() }
override string toString() { result = "getAwaited()" }
}
}
/** Gets the edge label for the module `m`. */
LabelModule mod(string m) { result.getMod() = m }
/** Gets the `member` edge label for member `m`. */
LabelMember member(string m) { result.getMember() = m }
/** Gets the `member` edge label for the unknown member. */
LabelUnknownMember unknownMember() { any() }
/** Gets the `member` edge label for the given attribute reference. */
ApiLabel memberFromRef(DataFlow::AttrRef pr) {
result = member(pr.getAttributeName())
or
not exists(pr.getAttributeName()) and
result = unknownMember()
}
/** Gets the `return` edge label. */
LabelReturn return() { any() }
/** Gets the `subclass` edge label. */
LabelSubclass subclass() { any() }
/** Gets the `await` edge label. */
LabelAwait await() { any() }
}
/** Gets the `return` edge label. */
string return() { result = "getReturn()" }
/** Gets the `subclass` edge label. */
string subclass() { result = "getASubclass()" }
/** Gets the `await` edge label. */
string await() { result = "getAwaited()" }
}

View File

@@ -62,27 +62,33 @@ abstract class AstNode extends AstNode_ {
/* Parents */
/** Internal implementation class */
library class FunctionParent extends FunctionParent_ { }
class FunctionParent extends FunctionParent_ { }
/** Internal implementation class */
library class ArgumentsParent extends ArgumentsParent_ { }
class ArgumentsParent extends ArgumentsParent_ { }
/** Internal implementation class */
library class ExprListParent extends ExprListParent_ { }
class ExprListParent extends ExprListParent_ { }
/** Internal implementation class */
library class ExprContextParent extends ExprContextParent_ { }
class ExprContextParent extends ExprContextParent_ { }
/** Internal implementation class */
library class StmtListParent extends StmtListParent_ { }
class StmtListParent extends StmtListParent_ { }
/** Internal implementation class */
library class StrListParent extends StrListParent_ { }
class StrListParent extends StrListParent_ { }
/** Internal implementation class */
library class ExprParent extends ExprParent_ { }
class ExprParent extends ExprParent_ { }
library class DictItem extends DictItem_, AstNode {
/** Internal implementation class */
class PatternListParent extends PatternListParent_ { }
/** Internal implementation class */
class PatternParent extends PatternParent_ { }
class DictItem extends DictItem_, AstNode {
override string toString() { result = DictItem_.super.toString() }
override AstNode getAChildNode() { none() }
@@ -162,9 +168,12 @@ class ExprList extends ExprList_ {
/* syntax: Expr, ... */
}
library class DictItemList extends DictItemList_ { }
/** A list of patterns */
class PatternList extends PatternList_ { }
library class DictItemListParent extends DictItemListParent_ { }
class DictItemList extends DictItemList_ { }
class DictItemListParent extends DictItemListParent_ { }
/** A list of strings (the primitive type string not Bytes or Unicode) */
class StringList extends StringList_ { }

File diff suppressed because it is too large Load Diff

View File

@@ -40,7 +40,7 @@ class Comment extends @py_comment {
private predicate comment_block_part(Comment start, Comment part, int i) {
not exists(Comment prev | prev.getFollowing() = part) and
exists(Comment following | part.getFollowing() = following) and
exists(part.getFollowing()) and
start = part and
i = 1
or

View File

@@ -514,7 +514,7 @@ class ComparisonControlBlock extends ConditionBlock {
Comparison getTest() { this.getLastNode() = result }
/** Whether this conditional guard implies that, in block `b`, the result of `that` is `thatIsTrue` */
/** Whether this conditional guard implies that, in block `b`, the result of `that` is `thatIsTrue` */
predicate impliesThat(BasicBlock b, Comparison that, boolean thatIsTrue) {
exists(boolean controlSense |
this.controls(b, controlSense) and

View File

@@ -115,6 +115,9 @@ module Path {
PathNormalization::Range range;
PathNormalization() { this = range }
/** Gets an argument to this path normalization that is interpreted as a path. */
DataFlow::Node getPathArg() { result = range.getPathArg() }
}
/** Provides a class for modeling new path normalization APIs. */
@@ -123,7 +126,10 @@ module Path {
* A data-flow node that performs path normalization. This is often needed in order
* to safely access paths.
*/
abstract class Range extends DataFlow::Node { }
abstract class Range extends DataFlow::Node {
/** Gets an argument to this path normalization that is interpreted as a path. */
abstract DataFlow::Node getPathArg();
}
}
/** A data-flow node that checks that a path is safe to access. */
@@ -443,6 +449,44 @@ module RegexExecution {
}
}
/** Provides classes for modeling LDAP-related APIs. */
module LDAP {
/**
* A data-flow node that executes an LDAP query.
*
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `LDAPQuery::Range` instead.
*/
class LdapExecution extends DataFlow::Node {
LdapExecution::Range range;
LdapExecution() { this = range }
/** Gets the argument containing the filter string. */
DataFlow::Node getFilter() { result = range.getFilter() }
/** Gets the argument containing the base DN. */
DataFlow::Node getBaseDn() { result = range.getBaseDn() }
}
/** Provides classes for modeling new LDAP query execution-related APIs. */
module LdapExecution {
/**
* A data-flow node that executes an LDAP query.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `LDAPQuery` instead.
*/
abstract class Range extends DataFlow::Node {
/** Gets the argument containing the filter string. */
abstract DataFlow::Node getFilter();
/** Gets the argument containing the base DN. */
abstract DataFlow::Node getBaseDn();
}
}
}
/**
* A data-flow node that escapes meta-characters, which could be used to prevent
* injection attacks.
@@ -500,8 +544,20 @@ module Escaping {
/** Gets the escape-kind for escaping a string so it can safely be included in HTML. */
string getHtmlKind() { result = "html" }
/** Gets the escape-kind for escaping a string so it can safely be included in HTML. */
/** Gets the escape-kind for escaping a string so it can safely be included in a regular expression. */
string getRegexKind() { result = "regex" }
/**
* Gets the escape-kind for escaping a string so it can safely be used as a
* distinguished name (DN) in an LDAP search.
*/
string getLdapDnKind() { result = "ldap_dn" }
/**
* Gets the escape-kind for escaping a string so it can safely be used as a
* filter in an LDAP search.
*/
string getLdapFilterKind() { result = "ldap_filter" }
// TODO: If adding an XML kind, update the modeling of the `MarkupSafe` PyPI package.
//
// Technically it claims to escape for both HTML and XML, but for now we don't have
@@ -526,9 +582,28 @@ class RegexEscaping extends Escaping {
RegexEscaping() { range.getKind() = Escaping::getRegexKind() }
}
/**
* An escape of a string so it can be safely used as a distinguished name (DN)
* in an LDAP search.
*/
class LdapDnEscaping extends Escaping {
LdapDnEscaping() { range.getKind() = Escaping::getLdapDnKind() }
}
/**
* An escape of a string so it can be safely used as a filter in an LDAP search.
*/
class LdapFilterEscaping extends Escaping {
LdapFilterEscaping() { range.getKind() = Escaping::getLdapFilterKind() }
}
/** Provides classes for modeling HTTP-related APIs. */
module HTTP {
import semmle.python.web.HttpConstants
/** Gets an HTTP verb, in upper case */
string httpVerb() { result in ["GET", "POST", "PUT", "PATCH", "DELETE", "OPTIONS", "HEAD"] }
/** Gets an HTTP verb, in lower case */
string httpVerbLower() { result = httpVerb().toLowerCase() }
/** Provides classes for modeling HTTP servers. */
module Server {
@@ -812,6 +887,72 @@ module HTTP {
}
}
}
/** Provides classes for modeling HTTP clients. */
module Client {
/**
* A data-flow node that makes an outgoing HTTP request.
*
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `HTTP::Client::Request::Range` instead.
*/
class Request extends DataFlow::Node instanceof Request::Range {
/**
* Gets a data-flow node that contributes to the URL of the request.
* Depending on the framework, a request may have multiple nodes which contribute to the URL.
*/
DataFlow::Node getAUrlPart() { result = super.getAUrlPart() }
/** Gets a string that identifies the framework used for this request. */
string getFramework() { result = super.getFramework() }
/**
* Holds if this request is made using a mode that disables SSL/TLS
* certificate validation, where `disablingNode` represents the point at
* which the validation was disabled, and `argumentOrigin` represents the origin
* of the argument that disabled the validation (which could be the same node as
* `disablingNode`).
*/
predicate disablesCertificateValidation(
DataFlow::Node disablingNode, DataFlow::Node argumentOrigin
) {
super.disablesCertificateValidation(disablingNode, argumentOrigin)
}
}
/** Provides a class for modeling new HTTP requests. */
module Request {
/**
* A data-flow node that makes an outgoing HTTP request.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `HTTP::Client::Request` instead.
*/
abstract class Range extends DataFlow::Node {
/**
* Gets a data-flow node that contributes to the URL of the request.
* Depending on the framework, a request may have multiple nodes which contribute to the URL.
*/
abstract DataFlow::Node getAUrlPart();
/** Gets a string that identifies the framework used for this request. */
abstract string getFramework();
/**
* Holds if this request is made using a mode that disables SSL/TLS
* certificate validation, where `disablingNode` represents the point at
* which the validation was disabled, and `argumentOrigin` represents the origin
* of the argument that disabled the validation (which could be the same node as
* `disablingNode`).
*/
abstract predicate disablesCertificateValidation(
DataFlow::Node disablingNode, DataFlow::Node argumentOrigin
);
}
}
// TODO: investigate whether we should treat responses to client requests as
// remote-flow-sources in general.
}
}
/**

View File

@@ -718,6 +718,12 @@ class FormattedValue extends FormattedValue_ {
}
}
/** A guard in a case statement */
class Guard extends Guard_ {
/* syntax: if Expr */
override Expr getASubExpression() { result = this.getTest() }
}
/* Expression Contexts */
/** A context in which an expression used */
class ExprContext extends ExprContext_ { }

View File

@@ -22,6 +22,8 @@ private import semmle.python.frameworks.FlaskSqlAlchemy
private import semmle.python.frameworks.Idna
private import semmle.python.frameworks.Invoke
private import semmle.python.frameworks.Jmespath
private import semmle.python.frameworks.Ldap
private import semmle.python.frameworks.Ldap3
private import semmle.python.frameworks.MarkupSafe
private import semmle.python.frameworks.Multidict
private import semmle.python.frameworks.Mysql
@@ -30,6 +32,7 @@ private import semmle.python.frameworks.Peewee
private import semmle.python.frameworks.Psycopg2
private import semmle.python.frameworks.Pydantic
private import semmle.python.frameworks.PyMySQL
private import semmle.python.frameworks.Requests
private import semmle.python.frameworks.RestFramework
private import semmle.python.frameworks.Rsa
private import semmle.python.frameworks.RuamelYaml

View File

@@ -18,7 +18,7 @@ class Function extends Function_, Scope, AstNode {
override Scope getScope() { result = this.getEnclosingScope() }
/** Whether this function is declared in a class */
predicate isMethod() { exists(Class cls | this.getEnclosingScope() = cls) }
predicate isMethod() { this.getEnclosingScope() instanceof Class }
/** Whether this is a special method, that is does its name have the form `__xxx__` (except `__init__`) */
predicate isSpecialMethod() {

View File

@@ -98,7 +98,7 @@ class LShift extends LShift_ {
override string getSpecialMethodName() { result = "__lshift__" }
}
/** A modulo (`%`) binary operator, which includes string formatting */
/** A modulo (`%`) binary operator, which includes string formatting */
class Mod extends Mod_ {
override string getSpecialMethodName() { result = "__mod__" }
}

View File

@@ -0,0 +1,118 @@
/**
* Wrapping generated AST classes: `Pattern_` and subclasses.
*/
import python
/** A pattern in a match statement */
class Pattern extends Pattern_, AstNode {
/** Gets the scope of this pattern */
override Scope getScope() { result = this.getCase().getScope() }
/** Gets the case statement containing this pattern */
Case getCase() { result.contains(this) }
override string toString() { result = "Pattern" }
/** Gets the module enclosing this pattern */
Module getEnclosingModule() { result = this.getScope().getEnclosingModule() }
/** Whether the parenthesized property of this expression is true. */
predicate isParenthesized() { Pattern_.super.isParenthesised() }
override Location getLocation() { result = Pattern_.super.getLocation() }
/** Gets an immediate (non-nested) sub-expression of this pattern */
Expr getASubExpression() { none() }
/** Gets an immediate (non-nested) sub-statement of this pattern */
Stmt getASubStatement() { none() }
/** Gets an immediate (non-nested) sub-pattern of this pattern */
Pattern getASubPattern() { none() }
override AstNode getAChildNode() {
result = this.getASubExpression()
or
result = this.getASubStatement()
or
result = this.getASubPattern()
}
}
/** An as-pattern in a match statement: `<subpattern> as alias` */
class MatchAsPattern extends MatchAsPattern_ {
override Pattern getASubPattern() { result = this.getPattern() }
override Expr getASubExpression() { result = this.getAlias() }
override Name getAlias() { result = super.getAlias() }
}
/** An or-pattern in a match statement: `(<pattern1>|<pattern2>)` */
class MatchOrPattern extends MatchOrPattern_ {
override Pattern getASubPattern() { result = this.getAPattern() }
}
/** A literal pattern in a match statement: `42` */
class MatchLiteralPattern extends MatchLiteralPattern_ {
override Expr getASubExpression() { result = this.getLiteral() }
}
/** A capture pattern in a match statement: `var` */
class MatchCapturePattern extends MatchCapturePattern_ {
/* syntax: varname */
override Expr getASubExpression() { result = this.getVariable() }
/** Gets the variable that is bound by this capture pattern */
override Name getVariable() { result = super.getVariable() }
}
/** A wildcard pattern in a match statement: `_` */
class MatchWildcardPattern extends MatchWildcardPattern_ { }
/** A value pattern in a match statement: `Http.OK` */
class MatchValuePattern extends MatchValuePattern_ {
override Expr getASubExpression() { result = this.getValue() }
}
/** A sequence pattern in a match statement `<p1>, <p2>` */
class MatchSequencePattern extends MatchSequencePattern_ {
override Pattern getASubPattern() { result = this.getAPattern() }
}
/** A star pattern in a match statement: `(..., *)` */
class MatchStarPattern extends MatchStarPattern_ {
override Pattern getASubPattern() { result = this.getTarget() }
}
/** A mapping pattern in a match statement: `{'a': var}` */
class MatchMappingPattern extends MatchMappingPattern_ {
override Pattern getASubPattern() { result = this.getAMapping() }
}
/** A double star pattern in a match statement: `{..., **}` */
class MatchDoubleStarPattern extends MatchDoubleStarPattern_ {
override Pattern getASubPattern() { result = this.getTarget() }
}
/** A key-value pattern inside a mapping pattern: `a: var` */
class MatchKeyValuePattern extends MatchKeyValuePattern_ {
override Pattern getASubPattern() { result = this.getKey() or result = this.getValue() }
}
/** A class pattern in a match statement: `Circle(radius = 3)` */
class MatchClassPattern extends MatchClassPattern_ {
override Expr getASubExpression() { result = this.getClassName() }
override Pattern getASubPattern() {
result = this.getAPositional() or result = this.getAKeyword()
}
}
/** A keyword pattern inside a class pattern: `radius = 3` */
class MatchKeywordPattern extends MatchKeywordPattern_ {
override Expr getASubExpression() { result = this.getAttribute() }
override Pattern getASubPattern() { result = this.getValue() }
}

View File

@@ -53,9 +53,6 @@ private newtype TPrintAstNode =
shouldPrint(list.getAnItem(), _) and
not list = any(Module mod).getBody() and
not forall(AstNode child | child = list.getAnItem() | isNotNeeded(child))
} or
TRegExpTermNode(RegExpTerm term) {
exists(StrConst str | term.getRootTerm() = getParsedRegExp(str) and shouldPrint(str, _))
}
/**
@@ -76,7 +73,7 @@ class PrintAstNode extends TPrintAstNode {
/**
* Gets a child of this node.
*/
final PrintAstNode getAChild() { result = getChild(_) }
final PrintAstNode getAChild() { result = this.getChild(_) }
/**
* Gets the parent of this node, if any.
@@ -94,7 +91,7 @@ class PrintAstNode extends TPrintAstNode {
*/
string getProperty(string key) {
key = "semmle.label" and
result = toString()
result = this.toString()
}
/**
@@ -103,7 +100,7 @@ class PrintAstNode extends TPrintAstNode {
* this.
*/
string getChildEdgeLabel(int childIndex) {
exists(getChild(childIndex)) and
exists(this.getChild(childIndex)) and
result = childIndex.toString()
}
}
@@ -157,13 +154,13 @@ class AstElementNode extends PrintAstNode, TElementNode {
override PrintAstNode getChild(int childIndex) {
exists(AstNode el | result.(AstElementNode).getAstNode() = el |
el = this.getChildNode(childIndex) and not el = getStmtList(_, _).getAnItem()
el = this.getChildNode(childIndex) and not el = this.getStmtList(_, _).getAnItem()
)
or
// displaying all `StmtList` after the other children.
exists(int offset | offset = 1 + max([0, any(int index | exists(this.getChildNode(index)))]) |
exists(int index | childIndex = index + offset |
result.(StmtListNode).getList() = getStmtList(index, _)
result.(StmtListNode).getList() = this.getStmtList(index, _)
)
)
}
@@ -299,7 +296,7 @@ class StmtListNode extends PrintAstNode, TStmtListNode {
private string getLabel() { this.getList() = any(AstElementNode node).getStmtList(_, result) }
override string toString() { result = "(StmtList) " + getLabel() }
override string toString() { result = "(StmtList) " + this.getLabel() }
override PrintAstNode getChild(int childIndex) {
exists(AstNode el | result.(AstElementNode).getAstNode() = el | el = list.getItem(childIndex))
@@ -430,32 +427,6 @@ class ParameterNode extends AstElementNode {
*/
class StrConstNode extends AstElementNode {
override StrConst element;
override PrintAstNode getChild(int childIndex) {
childIndex = 0 and result.(RegExpTermNode).getTerm() = getParsedRegExp(element)
}
}
/**
* A print node for a regular expression term.
*/
class RegExpTermNode extends TRegExpTermNode, PrintAstNode {
RegExpTerm term;
RegExpTermNode() { this = TRegExpTermNode(term) }
/** Gets the `RegExpTerm` for this node. */
RegExpTerm getTerm() { result = term }
override PrintAstNode getChild(int childIndex) {
result.(RegExpTermNode).getTerm() = term.getChild(childIndex)
}
override string toString() {
result = "[" + strictconcat(term.getPrimaryQLClass(), " | ") + "] " + term.toString()
}
override Location getLocation() { result = term.getLocation() }
}
/**

View File

@@ -467,9 +467,10 @@ class RegExpEscape extends RegExpNormalChar {
or
this.getUnescaped() = "t" and result = "\t"
or
// TODO: Find a way to include a formfeed character
// this.getUnescaped() = "f" and result = " "
// or
this.getUnescaped() = "f" and result = 12.toUnicode()
or
this.getUnescaped() = "v" and result = 11.toUnicode()
or
this.isUnicode() and
result = this.getUnicode()
}
@@ -480,7 +481,7 @@ class RegExpEscape extends RegExpNormalChar {
override string getPrimaryQLClass() { result = "RegExpEscape" }
/** Gets the part of the term following the escape character. That is e.g. "w" if the term is "\w". */
private string getUnescaped() { result = this.getText().suffix(1) }
string getUnescaped() { result = this.getText().suffix(1) }
/**
* Gets the text for this escape. That is e.g. "\w".
@@ -535,6 +536,13 @@ private int toHex(string hex) {
result = 15 and hex = ["f", "F"]
}
/**
* A word boundary, that is, a regular expression term of the form `\b`.
*/
class RegExpWordBoundary extends RegExpSpecialChar {
RegExpWordBoundary() { this.getChar() = "\\b" }
}
/**
* A character class escape in a regular expression.
* That is, an escaped charachter that denotes multiple characters.
@@ -801,7 +809,7 @@ class RegExpDot extends RegExpSpecialChar {
}
/**
* A dollar assertion `$` matching the end of a line.
* A dollar assertion `$` or `\Z` matching the end of a line.
*
* Example:
*
@@ -810,13 +818,13 @@ class RegExpDot extends RegExpSpecialChar {
* ```
*/
class RegExpDollar extends RegExpSpecialChar {
RegExpDollar() { this.getChar() = "$" }
RegExpDollar() { this.getChar() = ["$", "\\Z"] }
override string getPrimaryQLClass() { result = "RegExpDollar" }
}
/**
* A caret assertion `^` matching the beginning of a line.
* A caret assertion `^` or `\A` matching the beginning of a line.
*
* Example:
*
@@ -825,7 +833,7 @@ class RegExpDollar extends RegExpSpecialChar {
* ```
*/
class RegExpCaret extends RegExpSpecialChar {
RegExpCaret() { this.getChar() = "^" }
RegExpCaret() { this.getChar() = ["^", "\\A"] }
override string getPrimaryQLClass() { result = "RegExpCaret" }
}

View File

@@ -86,7 +86,7 @@ class SsaVariable extends @py_ssa_var {
/** Gets the incoming edges for a Phi node. */
private BasicBlock getAPredecessorBlockForPhi() {
exists(getAPhiInput()) and
exists(this.getAPhiInput()) and
result.getASuccessor() = this.getDefinition().getBasicBlock()
}

View File

@@ -18,10 +18,15 @@ class Stmt extends Stmt_, AstNode {
/** Gets an immediate (non-nested) sub-statement of this statement */
Stmt getASubStatement() { none() }
/** Gets an immediate (non-nested) sub-pattern of this statement */
Pattern getASubPattern() { none() }
override AstNode getAChildNode() {
result = this.getASubExpression()
or
result = this.getASubStatement()
or
result = this.getASubPattern()
}
private ControlFlowNode possibleEntryNode() {
@@ -412,6 +417,24 @@ class With extends With_ {
override Stmt getLastStatement() { result = this.getBody().getLastItem().getLastStatement() }
}
/** A match statement */
class MatchStmt extends MatchStmt_ {
/* syntax: match subject: */
override Expr getASubExpression() { result = this.getSubject() }
override Stmt getASubStatement() { result = this.getCase(_) }
}
/** A case statement */
class Case extends Case_ {
/* syntax: case pattern if guard: */
override Expr getASubExpression() { result = this.getGuard() }
override Stmt getASubStatement() { result = this.getStmt(_) }
override Pattern getASubPattern() { result = this.getPattern() }
}
/** A plain text used in a template is wrapped in a TemplateWrite statement */
class TemplateWrite extends TemplateWrite_ {
override Expr getASubExpression() { result = this.getValue() }

View File

@@ -57,7 +57,7 @@ class LocalVariable extends Variable {
override string toString() { result = "Local Variable " + this.getId() }
/** Whether this variable is a parameter */
override predicate isParameter() { exists(Parameter p | this.getAnAccess() = p) }
override predicate isParameter() { this.getAnAccess() instanceof Parameter }
/** Holds if this variable is the first parameter of a method. It is not necessarily called "self" */
override predicate isSelf() {
@@ -87,7 +87,7 @@ class NameLocalVariable extends LocalVariable {
/** A global (module-level) variable */
class GlobalVariable extends Variable {
GlobalVariable() { exists(Module m | m = this.getScope()) }
GlobalVariable() { this.getScope() instanceof Module }
override string toString() { result = "Global Variable " + this.getId() }
}

View File

@@ -1,55 +1,10 @@
/**
* Provides classes modeling cryptographic algorithms, separated into strong and weak variants.
*
* The classification into strong and weak are based on Wikipedia, OWASP and google (2017).
* The classification into strong and weak are based on Wikipedia, OWASP and Google (2021).
*/
/**
* Names of cryptographic algorithms, separated into strong and weak variants.
*
* The names are normalized: upper-case, no spaces, dashes or underscores.
*
* The names are inspired by the names used in real world crypto libraries.
*
* The classification into strong and weak are based on Wikipedia, OWASP and google (2017).
*/
private module AlgorithmNames {
predicate isStrongHashingAlgorithm(string name) {
name =
[
"DSA", "ED25519", "ES256", "ECDSA256", "ES384", "ECDSA384", "ES512", "ECDSA512", "SHA2",
"SHA224", "SHA256", "SHA384", "SHA512", "SHA3", "SHA3224", "SHA3256", "SHA3384", "SHA3512"
]
}
predicate isWeakHashingAlgorithm(string name) {
name =
[
"HAVEL128", "MD2", "MD4", "MD5", "PANAMA", "RIPEMD", "RIPEMD128", "RIPEMD256", "RIPEMD160",
"RIPEMD320", "SHA0", "SHA1"
]
}
predicate isStrongEncryptionAlgorithm(string name) {
name = ["AES", "AES128", "AES192", "AES256", "AES512", "RSA", "RABBIT", "BLOWFISH"]
}
predicate isWeakEncryptionAlgorithm(string name) {
name =
[
"DES", "3DES", "TRIPLEDES", "TDEA", "TRIPLEDEA", "ARC2", "RC2", "ARC4", "RC4", "ARCFOUR",
"ARC5", "RC5"
]
}
predicate isStrongPasswordHashingAlgorithm(string name) {
name = ["ARGON2", "PBKDF2", "BCRYPT", "SCRYPT"]
}
predicate isWeakPasswordHashingAlgorithm(string name) { name = "EVPKDF" }
}
private import AlgorithmNames
private import internal.CryptoAlgorithmNames
/**
* A cryptographic algorithm.

View File

@@ -0,0 +1,72 @@
/**
* Names of cryptographic algorithms, separated into strong and weak variants.
*
* The names are normalized: upper-case, no spaces, dashes or underscores.
*
* The names are inspired by the names used in real world crypto libraries.
*
* The classification into strong and weak are based on Wikipedia, OWASP and Google (2021).
*/
/**
* Holds if `name` corresponds to a strong hashing algorithm.
*/
predicate isStrongHashingAlgorithm(string name) {
name =
[
"DSA", "ED25519", "ES256", "ECDSA256", "ES384", "ECDSA384", "ES512", "ECDSA512", "SHA2",
"SHA224", "SHA256", "SHA384", "SHA512", "SHA3", "SHA3224", "SHA3256", "SHA3384", "SHA3512"
]
}
/**
* Holds if `name` corresponds to a weak hashing algorithm.
*/
predicate isWeakHashingAlgorithm(string name) {
name =
[
"HAVEL128", "MD2", "MD4", "MD5", "PANAMA", "RIPEMD", "RIPEMD128", "RIPEMD256", "RIPEMD160",
"RIPEMD320", "SHA0", "SHA1"
]
}
/**
* Holds if `name` corresponds to a strong encryption algorithm.
*/
predicate isStrongEncryptionAlgorithm(string name) {
name =
[
"AES", "AES128", "AES192", "AES256", "AES512", "AES-128", "AES-192", "AES-256", "AES-512",
"ARIA", "BLOWFISH", "BF", "ECIES", "CAST", "CAST5", "CAMELLIA", "CAMELLIA128", "CAMELLIA192",
"CAMELLIA256", "CAMELLIA-128", "CAMELLIA-192", "CAMELLIA-256", "CHACHA", "GOST", "GOST89",
"IDEA", "RABBIT", "RSA", "SEED", "SM4"
]
}
/**
* Holds if `name` corresponds to a weak encryption algorithm.
*/
predicate isWeakEncryptionAlgorithm(string name) {
name =
[
"DES", "3DES", "DES3", "TRIPLEDES", "DESX", "TDEA", "TRIPLEDEA", "ARC2", "RC2", "ARC4", "RC4",
"ARCFOUR", "ARC5", "RC5"
]
}
/**
* Holds if `name` corresponds to a strong password hashing algorithm.
*/
predicate isStrongPasswordHashingAlgorithm(string name) {
name = ["ARGON2", "PBKDF2", "BCRYPT", "SCRYPT"]
}
/**
* Holds if `name` corresponds to a weak password hashing algorithm.
*/
predicate isWeakPasswordHashingAlgorithm(string name) { name = "EVPKDF" }
/**
* Holds if `name` corresponds to a weak block cipher mode of operation.
*/
predicate isWeakBlockMode(string name) { name = "ECB" }

View File

@@ -0,0 +1,93 @@
/** Provides predicates for reasoning about built-ins in Python. */
private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.internal.ImportStar
module Builtins {
/** Gets the name of a known built-in. */
string getBuiltinName() {
// These lists were created by inspecting the `builtins` and `__builtin__` modules in
// Python 3 and 2 respectively, using the `dir` built-in.
// Built-in functions and exceptions shared between Python 2 and 3
result in [
"abs", "all", "any", "bin", "bool", "bytearray", "callable", "chr", "classmethod",
"compile", "complex", "delattr", "dict", "dir", "divmod", "enumerate", "eval", "filter",
"float", "format", "frozenset", "getattr", "globals", "hasattr", "hash", "help", "hex",
"id", "input", "int", "isinstance", "issubclass", "iter", "len", "list", "locals", "map",
"max", "memoryview", "min", "next", "object", "oct", "open", "ord", "pow", "print",
"property", "range", "repr", "reversed", "round", "set", "setattr", "slice", "sorted",
"staticmethod", "str", "sum", "super", "tuple", "type", "vars", "zip", "__import__",
// Exceptions
"ArithmeticError", "AssertionError", "AttributeError", "BaseException", "BufferError",
"BytesWarning", "DeprecationWarning", "EOFError", "EnvironmentError", "Exception",
"FloatingPointError", "FutureWarning", "GeneratorExit", "IOError", "ImportError",
"ImportWarning", "IndentationError", "IndexError", "KeyError", "KeyboardInterrupt",
"LookupError", "MemoryError", "NameError", "NotImplemented", "NotImplementedError",
"OSError", "OverflowError", "PendingDeprecationWarning", "ReferenceError", "RuntimeError",
"RuntimeWarning", "StandardError", "StopIteration", "SyntaxError", "SyntaxWarning",
"SystemError", "SystemExit", "TabError", "TypeError", "UnboundLocalError",
"UnicodeDecodeError", "UnicodeEncodeError", "UnicodeError", "UnicodeTranslateError",
"UnicodeWarning", "UserWarning", "ValueError", "Warning", "ZeroDivisionError",
// Added for compatibility
"exec"
]
or
// Built-in constants shared between Python 2 and 3
result in ["False", "True", "None", "NotImplemented", "Ellipsis", "__debug__"]
or
// Python 3 only
result in [
"ascii", "breakpoint", "bytes", "exec",
// Exceptions
"BlockingIOError", "BrokenPipeError", "ChildProcessError", "ConnectionAbortedError",
"ConnectionError", "ConnectionRefusedError", "ConnectionResetError", "FileExistsError",
"FileNotFoundError", "InterruptedError", "IsADirectoryError", "ModuleNotFoundError",
"NotADirectoryError", "PermissionError", "ProcessLookupError", "RecursionError",
"ResourceWarning", "StopAsyncIteration", "TimeoutError"
]
or
// Python 2 only
result in [
"basestring", "cmp", "execfile", "file", "long", "raw_input", "reduce", "reload", "unichr",
"unicode", "xrange"
]
}
/**
* Gets a data flow node that is likely to refer to a built-in with the name `name`.
*
* Currently this is an over-approximation, and may not account for things like overwriting a
* built-in with a different value.
*/
DataFlow::Node likelyBuiltin(string name) {
exists(Module m |
result.asCfgNode() =
any(NameNode n |
possible_builtin_accessed_in_module(n, name, m) and
not possible_builtin_defined_in_module(name, m)
)
)
}
/**
* Holds if a global variable called `name` (which is also the name of a built-in) is assigned
* a value in the module `m`.
*/
private predicate possible_builtin_defined_in_module(string name, Module m) {
ImportStar::globalNameDefinedInModule(name, m) and
name = getBuiltinName()
}
/**
* Holds if `n` is an access of a global variable called `name` (which is also the name of a
* built-in) inside the module `m`.
*/
private predicate possible_builtin_accessed_in_module(NameNode n, string name, Module m) {
n.isGlobal() and
n.isLoad() and
name = n.getId() and
name = getBuiltinName() and
m = n.getEnclosingModule()
}
}

View File

@@ -3,6 +3,17 @@ private import DataFlowImplSpecific::Public
import Cached
module DataFlowImplCommonPublic {
/** A state value to track during data flow. */
class FlowState = string;
/**
* The default state, which is used when the state is unspecified for a source
* or a sink.
*/
class FlowStateEmpty extends FlowState {
FlowStateEmpty() { this = "" }
}
private newtype TFlowFeature =
TFeatureHasSourceCallContext() or
TFeatureHasSinkCallContext() or
@@ -62,6 +73,18 @@ predicate accessPathCostLimits(int apLimit, int tupleLimit) {
tupleLimit = 1000
}
/**
* Holds if `arg` is an argument of `call` with an argument position that matches
* parameter position `ppos`.
*/
pragma[noinline]
predicate argumentPositionMatch(DataFlowCall call, ArgNode arg, ParameterPosition ppos) {
exists(ArgumentPosition apos |
arg.argumentOf(call, apos) and
parameterMatch(ppos, apos)
)
}
/**
* Provides a simple data-flow analysis for resolving lambda calls. The analysis
* currently excludes read-steps, store-steps, and flow-through.
@@ -71,25 +94,27 @@ predicate accessPathCostLimits(int apLimit, int tupleLimit) {
* calls. For this reason, we cannot reuse the code from `DataFlowImpl.qll` directly.
*/
private module LambdaFlow {
private predicate viableParamNonLambda(DataFlowCall call, int i, ParamNode p) {
p.isParameterOf(viableCallable(call), i)
pragma[noinline]
private predicate viableParamNonLambda(DataFlowCall call, ParameterPosition ppos, ParamNode p) {
p.isParameterOf(viableCallable(call), ppos)
}
private predicate viableParamLambda(DataFlowCall call, int i, ParamNode p) {
p.isParameterOf(viableCallableLambda(call, _), i)
pragma[noinline]
private predicate viableParamLambda(DataFlowCall call, ParameterPosition ppos, ParamNode p) {
p.isParameterOf(viableCallableLambda(call, _), ppos)
}
private predicate viableParamArgNonLambda(DataFlowCall call, ParamNode p, ArgNode arg) {
exists(int i |
viableParamNonLambda(call, i, p) and
arg.argumentOf(call, i)
exists(ParameterPosition ppos |
viableParamNonLambda(call, ppos, p) and
argumentPositionMatch(call, arg, ppos)
)
}
private predicate viableParamArgLambda(DataFlowCall call, ParamNode p, ArgNode arg) {
exists(int i |
viableParamLambda(call, i, p) and
arg.argumentOf(call, i)
exists(ParameterPosition ppos |
viableParamLambda(call, ppos, p) and
argumentPositionMatch(call, arg, ppos)
)
}
@@ -322,7 +347,7 @@ private module Cached {
or
exists(ArgNode arg |
result.(PostUpdateNode).getPreUpdateNode() = arg and
arg.argumentOf(call, k.(ParamUpdateReturnKind).getPosition())
arg.argumentOf(call, k.(ParamUpdateReturnKind).getAMatchingArgumentPosition())
)
}
@@ -330,7 +355,7 @@ private module Cached {
predicate returnNodeExt(Node n, ReturnKindExt k) {
k = TValueReturn(n.(ReturnNode).getKind())
or
exists(ParamNode p, int pos |
exists(ParamNode p, ParameterPosition pos |
parameterValueFlowsToPreUpdate(p, n) and
p.isParameterOf(_, pos) and
k = TParamUpdate(pos)
@@ -352,11 +377,13 @@ private module Cached {
}
cached
predicate parameterNode(Node p, DataFlowCallable c, int pos) { isParameterNode(p, c, pos) }
predicate parameterNode(Node p, DataFlowCallable c, ParameterPosition pos) {
isParameterNode(p, c, pos)
}
cached
predicate argumentNode(Node n, DataFlowCall call, int pos) {
n.(ArgumentNode).argumentOf(call, pos)
predicate argumentNode(Node n, DataFlowCall call, ArgumentPosition pos) {
isArgumentNode(n, call, pos)
}
/**
@@ -374,12 +401,12 @@ private module Cached {
}
/**
* Holds if `p` is the `i`th parameter of a viable dispatch target of `call`.
* The instance parameter is considered to have index `-1`.
* Holds if `p` is the parameter of a viable dispatch target of `call`,
* and `p` has position `ppos`.
*/
pragma[nomagic]
private predicate viableParam(DataFlowCall call, int i, ParamNode p) {
p.isParameterOf(viableCallableExt(call), i)
private predicate viableParam(DataFlowCall call, ParameterPosition ppos, ParamNode p) {
p.isParameterOf(viableCallableExt(call), ppos)
}
/**
@@ -388,9 +415,9 @@ private module Cached {
*/
cached
predicate viableParamArg(DataFlowCall call, ParamNode p, ArgNode arg) {
exists(int i |
viableParam(call, i, p) and
arg.argumentOf(call, i) and
exists(ParameterPosition ppos |
viableParam(call, ppos, p) and
argumentPositionMatch(call, arg, ppos) and
compatibleTypes(getNodeDataFlowType(arg), getNodeDataFlowType(p))
)
}
@@ -862,7 +889,7 @@ private module Cached {
cached
newtype TReturnKindExt =
TValueReturn(ReturnKind kind) or
TParamUpdate(int pos) { exists(ParamNode p | p.isParameterOf(_, pos)) }
TParamUpdate(ParameterPosition pos) { exists(ParamNode p | p.isParameterOf(_, pos)) }
cached
newtype TBooleanOption =
@@ -1054,9 +1081,9 @@ class ParamNode extends Node {
/**
* Holds if this node is the parameter of callable `c` at the specified
* (zero-based) position.
* position.
*/
predicate isParameterOf(DataFlowCallable c, int i) { parameterNode(this, c, i) }
predicate isParameterOf(DataFlowCallable c, ParameterPosition pos) { parameterNode(this, c, pos) }
}
/** A data-flow node that represents a call argument. */
@@ -1064,7 +1091,9 @@ class ArgNode extends Node {
ArgNode() { argumentNode(this, _, _) }
/** Holds if this argument occurs at the given position in the given call. */
final predicate argumentOf(DataFlowCall call, int pos) { argumentNode(this, call, pos) }
final predicate argumentOf(DataFlowCall call, ArgumentPosition pos) {
argumentNode(this, call, pos)
}
}
/**
@@ -1110,11 +1139,14 @@ class ValueReturnKind extends ReturnKindExt, TValueReturn {
}
class ParamUpdateReturnKind extends ReturnKindExt, TParamUpdate {
private int pos;
private ParameterPosition pos;
ParamUpdateReturnKind() { this = TParamUpdate(pos) }
int getPosition() { result = pos }
ParameterPosition getPosition() { result = pos }
pragma[nomagic]
ArgumentPosition getAMatchingArgumentPosition() { parameterMatch(pos, result) }
override string toString() { result = "param update " + pos }
}
@@ -1258,7 +1290,7 @@ class DataFlowCallOption extends TDataFlowCallOption {
}
}
/** Content tagged with the type of a containing object. */
/** A `Content` tagged with the type of a containing object. */
class TypedContent extends MkTypedContent {
private Content c;
private DataFlowType t;

View File

@@ -9,6 +9,31 @@ private import tainttracking1.TaintTrackingParameter::Private
private import tainttracking1.TaintTrackingParameter::Public
module Consistency {
private newtype TConsistencyConfiguration = MkConsistencyConfiguration()
/** A class for configuring the consistency queries. */
class ConsistencyConfiguration extends TConsistencyConfiguration {
string toString() { none() }
/** Holds if `n` should be excluded from the consistency test `uniqueEnclosingCallable`. */
predicate uniqueEnclosingCallableExclude(Node n) { none() }
/** Holds if `n` should be excluded from the consistency test `uniqueNodeLocation`. */
predicate uniqueNodeLocationExclude(Node n) { none() }
/** Holds if `n` should be excluded from the consistency test `missingLocation`. */
predicate missingLocationExclude(Node n) { none() }
/** Holds if `n` should be excluded from the consistency test `postWithInFlow`. */
predicate postWithInFlowExclude(Node n) { none() }
/** Holds if `n` should be excluded from the consistency test `argHasPostUpdate`. */
predicate argHasPostUpdateExclude(ArgumentNode n) { none() }
/** Holds if `n` should be excluded from the consistency test `reverseRead`. */
predicate reverseReadExclude(Node n) { none() }
}
private class RelevantNode extends Node {
RelevantNode() {
this instanceof ArgumentNode or
@@ -33,6 +58,7 @@ module Consistency {
n instanceof RelevantNode and
c = count(nodeGetEnclosingCallable(n)) and
c != 1 and
not any(ConsistencyConfiguration conf).uniqueEnclosingCallableExclude(n) and
msg = "Node should have one enclosing callable but has " + c + "."
)
}
@@ -53,6 +79,7 @@ module Consistency {
n.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
) and
c != 1 and
not any(ConsistencyConfiguration conf).uniqueNodeLocationExclude(n) and
msg = "Node should have one location but has " + c + "."
)
}
@@ -63,7 +90,8 @@ module Consistency {
strictcount(Node n |
not exists(string filepath, int startline, int startcolumn, int endline, int endcolumn |
n.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
)
) and
not any(ConsistencyConfiguration conf).missingLocationExclude(n)
) and
msg = "Nodes without location: " + c
)
@@ -159,12 +187,13 @@ module Consistency {
query predicate reverseRead(Node n, string msg) {
exists(Node n2 | readStep(n, _, n2) and hasPost(n2) and not hasPost(n)) and
not any(ConsistencyConfiguration conf).reverseReadExclude(n) and
msg = "Origin of readStep is missing a PostUpdateNode."
}
query predicate argHasPostUpdate(ArgumentNode n, string msg) {
not hasPost(n) and
not isImmutableOrUnobservable(n) and
not any(ConsistencyConfiguration c).argHasPostUpdateExclude(n) and
msg = "ArgumentNode is missing PostUpdateNode."
}
@@ -177,6 +206,7 @@ module Consistency {
isPostUpdateNode(n) and
not clearsContent(n, _) and
simpleLocalFlowStep(_, n) and
not any(ConsistencyConfiguration c).postWithInFlowExclude(n) and
msg = "PostUpdateNode should not be the target of local flow."
}
}

View File

@@ -2,12 +2,34 @@ private import python
private import DataFlowPublic
import semmle.python.SpecialMethods
private import semmle.python.essa.SsaCompute
private import semmle.python.dataflow.new.internal.ImportStar
/** Gets the callable in which this node occurs. */
DataFlowCallable nodeGetEnclosingCallable(Node n) { result = n.getEnclosingCallable() }
/** A parameter position represented by an integer. */
class ParameterPosition extends int {
ParameterPosition() { exists(any(DataFlowCallable c).getParameter(this)) }
}
/** An argument position represented by an integer. */
class ArgumentPosition extends int {
ArgumentPosition() { exists(any(DataFlowCall c).getArg(this)) }
}
/** Holds if arguments at position `apos` match parameters at position `ppos`. */
pragma[inline]
predicate parameterMatch(ParameterPosition ppos, ArgumentPosition apos) { ppos = apos }
/** Holds if `p` is a `ParameterNode` of `c` with position `pos`. */
predicate isParameterNode(ParameterNode p, DataFlowCallable c, int pos) { p.isParameterOf(c, pos) }
predicate isParameterNode(ParameterNode p, DataFlowCallable c, ParameterPosition pos) {
p.isParameterOf(c, pos)
}
/** Holds if `arg` is an `ArgumentNode` of `c` with position `pos`. */
predicate isArgumentNode(ArgumentNode arg, DataFlowCall c, ArgumentPosition pos) {
arg.argumentOf(c, pos)
}
//--------
// Data flow graph
@@ -227,6 +249,8 @@ module EssaFlow {
// Flow inside an unpacking assignment
iterableUnpackingFlowStep(nodeFrom, nodeTo)
or
matchFlowStep(nodeFrom, nodeTo)
or
// Overflow keyword argument
exists(CallNode call, CallableValue callable |
call = callable.getACall() and
@@ -363,7 +387,7 @@ private Node update(Node node) {
* ```python
* f(0, 1, 2, a=3)
* ```
* will be modelled as
* will be modeled as
* ```python
* f(0, 1, [*t], [**d])
* ```
@@ -376,7 +400,7 @@ private Node update(Node node) {
* ```python
* f(0, **{"y": 1, "a": 3})
* ```
* no tuple argument is synthesized. It is modelled as
* no tuple argument is synthesized. It is modeled as
* ```python
* f(0, [y=1], [**d])
* ```
@@ -927,7 +951,7 @@ predicate jumpStep(Node nodeFrom, Node nodeTo) {
private predicate module_export(Module m, string name, CfgNode defn) {
exists(EssaVariable v |
v.getName() = name and
v.getAUse() = m.getANormalExit()
v.getAUse() = ImportStar::getStarImported*(m).getANormalExit()
|
defn.getNode() = v.getDefinition().(AssignmentDefinition).getValue()
or
@@ -960,6 +984,8 @@ predicate storeStep(Node nodeFrom, Content c, Node nodeTo) {
posOverflowStoreStep(nodeFrom, c, nodeTo)
or
kwOverflowStoreStep(nodeFrom, c, nodeTo)
or
matchStoreStep(nodeFrom, c, nodeTo)
}
/** Data flows from an element of a list to the list. */
@@ -976,7 +1002,7 @@ predicate listStoreStep(CfgNode nodeFrom, ListElementContent c, CfgNode nodeTo)
}
/** Data flows from an element of a set to the set. */
predicate setStoreStep(CfgNode nodeFrom, ListElementContent c, CfgNode nodeTo) {
predicate setStoreStep(CfgNode nodeFrom, SetElementContent c, CfgNode nodeTo) {
// Set
// `{..., 42, ...}`
// nodeFrom is `42`, cfg node
@@ -1102,6 +1128,8 @@ predicate readStep(Node nodeFrom, Content c, Node nodeTo) {
or
iterableUnpackingReadStep(nodeFrom, c, nodeTo)
or
matchReadStep(nodeFrom, c, nodeTo)
or
popReadStep(nodeFrom, c, nodeTo)
or
forReadStep(nodeFrom, c, nodeTo)
@@ -1531,6 +1559,318 @@ module IterableUnpacking {
import IterableUnpacking
/**
* There are a number of patterns available for the match statement.
* Each one transfers data and content differently to its parts.
*
* Furthermore, given a successful match, we can infer some data about
* the subject. Consider the example:
* ```python
* match choice:
* case 'Y':
* ...body
* ```
* Inside `body`, we know that `choice` has the value `'Y'`.
*
* A similar thing happens with the "as pattern". Consider the example:
* ```python
* match choice:
* case ('y'|'Y') as c:
* ...body
* ```
* By the binding rules, there is data flow from `choice` to `c`. But we
* can infer the value of `c` to be either `'y'` or `'Y'` if the match succeeds.
*
* We will treat such inferences separately as guards. First we will model the data flow
* stemming from the bindings and the matching of shape. Below, 'subject' is not necessarily the
* top-level subject of the match, but rather the part recursively matched by the current pattern.
* For instance, in the example:
* ```python
* match command:
* case ('quit' as c) | ('go', ('up'|'down') as c):
* ...body
* ```
* `command` is the subject of first the as-pattern, while the second component of `command`
* is the subject of the second as-pattern. As such, 'subject' refers to the pattern under evaluation.
*
* - as pattern: subject flows to alias as well as to the interior pattern
* - or pattern: subject flows to each alternative
* - literal pattern: flow from the literal to the pattern, to add information
* - capture pattern: subject flows to the variable
* - wildcard pattern: no flow
* - value pattern: flow from the value to the pattern, to add information
* - sequence pattern: each element reads from subject at the associated index
* - star pattern: subject flows to the variable, possibly via a conversion
* - mapping pattern: each value reads from subject at the associated key
* - double star pattern: subject flows to the variable, possibly via a conversion
* - key-value pattern: the value reads from the subject at the key (see mapping pattern)
* - class pattern: all keywords read the appropriate attribute from the subject
* - keyword pattern: the appropriate attribute is read from the subject (see class pattern)
*
* Inside the class pattern, we also find positional arguments. They are converted to
* keyword arguments using the `__match_args__` attribute on the class. We do not
* currently model this.
*/
module MatchUnpacking {
/**
* The subject of a match flows to each top-level pattern
* (a pattern directly under a `case` statement).
*
* We could consider a model closer to use-use-flow, where the subject
* only flows to the first top-level pattern and from there to the
* following ones.
*/
predicate matchSubjectFlowStep(Node nodeFrom, Node nodeTo) {
exists(MatchStmt match, Expr subject, Pattern target |
subject = match.getSubject() and
target = match.getCase(_).(Case).getPattern()
|
nodeFrom.asExpr() = subject and
nodeTo.asCfgNode().getNode() = target
)
}
/**
* as pattern: subject flows to alias as well as to the interior pattern
* syntax (toplevel): `case pattern as alias:`
*/
predicate matchAsFlowStep(Node nodeFrom, Node nodeTo) {
exists(MatchAsPattern subject, Name alias | alias = subject.getAlias() |
// We make the subject flow to the interior pattern via the alias.
// That way, information can propagate from the interior pattern to the alias.
//
// the subject flows to the interior pattern
nodeFrom.asCfgNode().getNode() = subject and
nodeTo.asCfgNode().getNode() = subject.getPattern()
or
// the interior pattern flows to the alias
nodeFrom.asCfgNode().getNode() = subject.getPattern() and
nodeTo.asVar().getDefinition().(PatternAliasDefinition).getDefiningNode().getNode() = alias
)
}
/**
* or pattern: subject flows to each alternative
* syntax (toplevel): `case alt1 | alt2:`
*/
predicate matchOrFlowStep(Node nodeFrom, Node nodeTo) {
exists(MatchOrPattern subject, Pattern pattern | pattern = subject.getAPattern() |
nodeFrom.asCfgNode().getNode() = subject and
nodeTo.asCfgNode().getNode() = pattern
)
}
/**
* literal pattern: flow from the literal to the pattern, to add information
* syntax (toplevel): `case literal:`
*/
predicate matchLiteralFlowStep(Node nodeFrom, Node nodeTo) {
exists(MatchLiteralPattern pattern, Expr literal | literal = pattern.getLiteral() |
nodeFrom.asExpr() = literal and
nodeTo.asCfgNode().getNode() = pattern
)
}
/**
* capture pattern: subject flows to the variable
* syntax (toplevel): `case var:`
*/
predicate matchCaptureFlowStep(Node nodeFrom, Node nodeTo) {
exists(MatchCapturePattern capture, Name var | capture.getVariable() = var |
nodeFrom.asCfgNode().getNode() = capture and
nodeTo.asVar().getDefinition().(PatternCaptureDefinition).getDefiningNode().getNode() = var
)
}
/**
* value pattern: flow from the value to the pattern, to add information
* syntax (toplevel): `case Dotted.value:`
*/
predicate matchValueFlowStep(Node nodeFrom, Node nodeTo) {
exists(MatchValuePattern pattern, Expr value | value = pattern.getValue() |
nodeFrom.asExpr() = value and
nodeTo.asCfgNode().getNode() = pattern
)
}
/**
* sequence pattern: each element reads from subject at the associated index
* syntax (toplevel): `case [a, b]:`
*/
predicate matchSequenceReadStep(Node nodeFrom, Content c, Node nodeTo) {
exists(MatchSequencePattern subject, int index, Pattern element |
element = subject.getPattern(index)
|
nodeFrom.asCfgNode().getNode() = subject and
nodeTo.asCfgNode().getNode() = element and
(
// tuple content
c.(TupleElementContent).getIndex() = index
or
// list content
c instanceof ListElementContent
// set content is excluded from sequence patterns,
// see https://www.python.org/dev/peps/pep-0635/#sequence-patterns
)
)
}
/**
* star pattern: subject flows to the variable, possibly via a conversion
* syntax (toplevel): `case *var:`
*
* We decompose this flow into a read step and a store step. The read step
* reads both tuple and list content, the store step only stores list content.
* This way, we convert all content to list content.
*
* This is the read step.
*/
predicate matchStarReadStep(Node nodeFrom, Content c, Node nodeTo) {
exists(MatchSequencePattern subject, int index, MatchStarPattern star |
star = subject.getPattern(index)
|
nodeFrom.asCfgNode().getNode() = subject and
nodeTo = TStarPatternElementNode(star) and
(
// tuple content
c.(TupleElementContent).getIndex() >= index
or
// list content
c instanceof ListElementContent
// set content is excluded from sequence patterns,
// see https://www.python.org/dev/peps/pep-0635/#sequence-patterns
)
)
}
/**
* star pattern: subject flows to the variable, possibly via a conversion
* syntax (toplevel): `case *var:`
*
* We decompose this flow into a read step and a store step. The read step
* reads both tuple and list content, the store step only stores list content.
* This way, we convert all content to list content.
*
* This is the store step.
*/
predicate matchStarStoreStep(Node nodeFrom, Content c, Node nodeTo) {
exists(MatchStarPattern star |
nodeFrom = TStarPatternElementNode(star) and
nodeTo.asCfgNode().getNode() = star.getTarget() and
c instanceof ListElementContent
)
}
/**
* mapping pattern: each value reads from subject at the associated key
* syntax (toplevel): `case {"color": c, "height": x}:`
*/
predicate matchMappingReadStep(Node nodeFrom, Content c, Node nodeTo) {
exists(
MatchMappingPattern subject, MatchKeyValuePattern keyValue, MatchLiteralPattern key,
Pattern value
|
keyValue = subject.getAMapping() and
key = keyValue.getKey() and
value = keyValue.getValue()
|
nodeFrom.asCfgNode().getNode() = subject and
nodeTo.asCfgNode().getNode() = value and
c.(DictionaryElementContent).getKey() = key.getLiteral().(StrConst).getText()
)
}
/**
* double star pattern: subject flows to the variable, possibly via a conversion
* syntax (toplevel): `case {**var}:`
*
* Dictionary content flows to the double star, but all mentioned keys in the
* mapping pattern should be cleared.
*/
predicate matchMappingFlowStep(Node nodeFrom, Node nodeTo) {
exists(MatchMappingPattern subject, MatchDoubleStarPattern dstar |
dstar = subject.getAMapping()
|
nodeFrom.asCfgNode().getNode() = subject and
nodeTo.asCfgNode().getNode() = dstar.getTarget()
)
}
/**
* Bindings that are mentioned in a mapping pattern will not be available
* to a double star pattern in the same mapping pattern.
*/
predicate matchMappingClearStep(Node n, Content c) {
exists(
MatchMappingPattern subject, MatchKeyValuePattern keyValue, MatchLiteralPattern key,
MatchDoubleStarPattern dstar
|
keyValue = subject.getAMapping() and
key = keyValue.getKey() and
dstar = subject.getAMapping()
|
n.asCfgNode().getNode() = dstar.getTarget() and
c.(DictionaryElementContent).getKey() = key.getLiteral().(StrConst).getText()
)
}
/**
* class pattern: all keywords read the appropriate attribute from the subject
* syntax (toplevel): `case ClassName(attr = val):`
*/
predicate matchClassReadStep(Node nodeFrom, Content c, Node nodeTo) {
exists(MatchClassPattern subject, MatchKeywordPattern keyword, Name attr, Pattern value |
keyword = subject.getKeyword(_) and
attr = keyword.getAttribute() and
value = keyword.getValue()
|
nodeFrom.asCfgNode().getNode() = subject and
nodeTo.asCfgNode().getNode() = value and
c.(AttributeContent).getAttribute() = attr.getId()
)
}
/** All flow steps associated with match. */
predicate matchFlowStep(Node nodeFrom, Node nodeTo) {
matchSubjectFlowStep(nodeFrom, nodeTo)
or
matchAsFlowStep(nodeFrom, nodeTo)
or
matchOrFlowStep(nodeFrom, nodeTo)
or
matchLiteralFlowStep(nodeFrom, nodeTo)
or
matchCaptureFlowStep(nodeFrom, nodeTo)
or
matchValueFlowStep(nodeFrom, nodeTo)
or
matchMappingFlowStep(nodeFrom, nodeTo)
}
/** All read steps associated with match. */
predicate matchReadStep(Node nodeFrom, Content c, Node nodeTo) {
matchClassReadStep(nodeFrom, c, nodeTo)
or
matchSequenceReadStep(nodeFrom, c, nodeTo)
or
matchMappingReadStep(nodeFrom, c, nodeTo)
or
matchStarReadStep(nodeFrom, c, nodeTo)
}
/** All store steps associated with match. */
predicate matchStoreStep(Node nodeFrom, Content c, Node nodeTo) {
matchStarStoreStep(nodeFrom, c, nodeTo)
}
/**
* All clear steps associated with match
*/
predicate matchClearStep(Node n, Content c) { matchMappingClearStep(n, c) }
}
import MatchUnpacking
/** Data flows from a sequence to a call to `pop` on the sequence. */
predicate popReadStep(CfgNode nodeFrom, Content c, CfgNode nodeTo) {
// set.pop or list.pop
@@ -1613,11 +1953,10 @@ predicate kwUnpackReadStep(CfgNode nodeFrom, DictionaryElementContent c, Node no
}
/**
* Holds if values stored inside content `c` are cleared at node `n`. For example,
* any value stored inside `f` is cleared at the pre-update node associated with `x`
* in `x.f = newValue`.
* Clear content at key `name` of the synthesized dictionary `TKwOverflowNode(call, callable)`,
* whenever `call` unpacks `name`.
*/
predicate clearsContent(Node n, Content c) {
predicate kwOverflowClearStep(Node n, Content c) {
exists(CallNode call, CallableValue callable, string name |
call_unpacks(call, _, callable, name, _) and
n = TKwOverflowNode(call, callable) and
@@ -1625,6 +1964,17 @@ predicate clearsContent(Node n, Content c) {
)
}
/**
* Holds if values stored inside content `c` are cleared at node `n`. For example,
* any value stored inside `f` is cleared at the pre-update node associated with `x`
* in `x.f = newValue`.
*/
predicate clearsContent(Node n, Content c) {
kwOverflowClearStep(n, c)
or
matchClearStep(n, c)
}
//--------
// Fancy context-sensitive guards
//--------
@@ -1649,18 +1999,6 @@ DataFlowCallable viableImplInCallContext(DataFlowCall call, DataFlowCall ctx) {
*/
predicate mayBenefitFromCallContext(DataFlowCall call, DataFlowCallable c) { none() }
//--------
// Misc
//--------
/**
* Holds if `n` does not require a `PostUpdateNode` as it either cannot be
* modified or its modification cannot be observed, for example if it is a
* freshly created object that is not saved in a variable.
*
* This predicate is only used for consistency checks.
*/
predicate isImmutableOrUnobservable(Node n) { none() }
int accessPathLimit() { result = 5 }
/**

View File

@@ -8,6 +8,7 @@ import semmle.python.dataflow.new.TypeTracker
import Attributes
import LocalSources
private import semmle.python.essa.SsaCompute
private import semmle.python.dataflow.new.internal.ImportStar
/**
* IPA type for data flow nodes.
@@ -24,13 +25,25 @@ newtype TNode =
/** A node corresponding to an SSA variable. */
TEssaNode(EssaVariable var) or
/** A node corresponding to a control flow node. */
TCfgNode(ControlFlowNode node) { isExpressionNode(node) } or
TCfgNode(ControlFlowNode node) {
isExpressionNode(node)
or
node.getNode() instanceof Pattern
} or
/** A synthetic node representing the value of an object before a state change */
TSyntheticPreUpdateNode(NeedsSyntheticPreUpdateNode post) or
/** A synthetic node representing the value of an object after a state change. */
TSyntheticPostUpdateNode(NeedsSyntheticPostUpdateNode pre) or
/** A node representing a global (module-level) variable in a specific module. */
TModuleVariableNode(Module m, GlobalVariable v) { v.getScope() = m and v.escapes() } or
TModuleVariableNode(Module m, GlobalVariable v) {
v.getScope() = m and
(
v.escapes()
or
isAccessedThroughImportStar(m) and
ImportStar::globalNameDefinedInModule(v.getId(), m)
)
} or
/**
* A node representing the overflow positional arguments to a call.
* That is, `call` contains more positional arguments than there are
@@ -70,7 +83,11 @@ newtype TNode =
* A synthetic node representing that there may be an iterable element
* for `consumer` to consume.
*/
TIterableElementNode(UnpackingAssignmentTarget consumer)
TIterableElementNode(UnpackingAssignmentTarget consumer) or
/**
* A synthetic node representing element content in a star pattern.
*/
TStarPatternElementNode(MatchStarPattern target)
/** Helper for `Node::getEnclosingCallable`. */
private DataFlowCallable getCallableScope(Scope s) {
@@ -346,6 +363,8 @@ class ModuleVariableNode extends Node, TModuleVariableNode {
result.asCfgNode() = var.getALoad().getAFlowNode() and
// Ignore reads that happen when the module is imported. These are only executed once.
not result.getScope() = mod
or
this = import_star_read(result)
}
/** Gets an `EssaNode` that corresponds to an assignment of this global variable. */
@@ -358,6 +377,13 @@ class ModuleVariableNode extends Node, TModuleVariableNode {
override Location getLocation() { result = mod.getLocation() }
}
private predicate isAccessedThroughImportStar(Module m) { m = ImportStar::getStarImported(_) }
private ModuleVariableNode import_star_read(Node n) {
ImportStar::importStarResolvesTo(n.asCfgNode(), result.getModule()) and
n.asCfgNode().(NameNode).getId() = result.getVariable().getId()
}
/**
* The node holding the extra positional arguments to a call. This node is passed as a tuple
* to the starred parameter of the callable.
@@ -458,6 +484,21 @@ class IterableElementNode extends Node, TIterableElementNode {
override Location getLocation() { result = consumer.getLocation() }
}
/**
* A synthetic node representing element content of a star pattern.
*/
class StarPatternElementNode extends Node, TStarPatternElementNode {
CfgNode consumer;
StarPatternElementNode() { this = TStarPatternElementNode(consumer.getNode().getNode()) }
override string toString() { result = "StarPatternElement" }
override DataFlowCallable getEnclosingCallable() { result = consumer.getEnclosingCallable() }
override Location getLocation() { result = consumer.getLocation() }
}
/**
* A node that controls whether other nodes are evaluated.
*/

View File

@@ -15,6 +15,7 @@ predicate localFlowStep(Node nodeFrom, Node nodeTo) { simpleLocalFlowStep(nodeFr
* Holds if data flows from `source` to `sink` in zero or more local
* (intra-procedural) steps.
*/
pragma[inline]
predicate localFlow(Node source, Node sink) { localFlowStep*(source, sink) }
/**

View File

@@ -0,0 +1,95 @@
/** Provides predicates for reasoning about uses of `import *` in Python. */
private import python
private import semmle.python.dataflow.new.internal.Builtins
cached
module ImportStar {
/**
* Holds if `n` is an access of a variable called `name` (which is _not_ the name of a
* built-in, and which is _not_ a global defined in the enclosing module) inside the scope `s`.
*/
cached
predicate namePossiblyDefinedInImportStar(NameNode n, string name, Scope s) {
n.isLoad() and
name = n.getId() and
s = n.getScope().getEnclosingScope*() and
exists(potentialImportStarBase(s)) and
// Not already defined in an enclosing scope.
not isDefinedLocally(n.getNode())
}
/** Holds if `n` refers to a variable that is defined in the module in which it occurs. */
cached
private predicate isDefinedLocally(Name n) {
// Defined in an enclosing scope
enclosing_scope_defines_name(n.getScope(), n.getId())
or
// Defined as a built-in
n.getId() = Builtins::getBuiltinName()
or
// Defined as a global in this module
globalNameDefinedInModule(n.getId(), n.getEnclosingModule())
or
// A non-built-in that still has file-specific meaning
n.getId() in ["__name__", "__package__"]
}
pragma[nomagic]
private predicate enclosing_scope_defines_name(Scope s, string name) {
exists(LocalVariable v |
v.getId() = name and v.getScope() = s and not name = Builtins::getBuiltinName()
)
or
enclosing_scope_defines_name(s.getEnclosingScope(), name)
}
/** Holds if a global variable called `name` is assigned a value in the module `m`. */
cached
predicate globalNameDefinedInModule(string name, Module m) {
exists(NameNode n |
not exists(LocalVariable v | n.defines(v)) and
n.isStore() and
name = n.getId() and
m = n.getEnclosingModule()
)
}
/**
* Holds if `n` may refer to a global variable of the same name in the module `m`, accessible
* from the scope of `n` by a chain of `import *` imports.
*/
cached
predicate importStarResolvesTo(NameNode n, Module m) {
m = getStarImported+(n.getEnclosingModule()) and
globalNameDefinedInModule(n.getId(), m) and
not isDefinedLocally(n.getNode())
}
/**
* Gets a module that is imported from `m` via `import *`.
*/
cached
Module getStarImported(Module m) {
exists(ImportStar i |
i.getScope() = m and result = i.getModule().pointsTo().(ModuleValue).getScope()
)
}
/**
* Gets the data-flow node for a module imported with `from ... import *` inside the scope `s`.
*
* For example, given
*
* ```python
* from foo.bar import *
* from quux import *
* ```
*
* this would return the data-flow nodes corresponding to `foo.bar` and `quux`.
*/
cached
ControlFlowNode potentialImportStarBase(Scope s) {
result = any(ImportStarNode n | n.getScope() = s).getModule()
}
}

View File

@@ -67,6 +67,6 @@ string prettyNodeForInlineTest(DataFlow::Node node) {
)
or
not exists(node.asExpr()) and
not exists(Expr e | e = node.(DataFlow::PostUpdateNode).getPreUpdateNode().asExpr()) and
not exists(node.(DataFlow::PostUpdateNode).getPreUpdateNode().asExpr()) and
result = node.toString()
}

View File

@@ -10,6 +10,12 @@ private import semmle.python.ApiGraphs
*/
predicate defaultTaintSanitizer(DataFlow::Node node) { none() }
/**
* Holds if `guard` should be a sanitizer guard in all global taint flow configurations
* but not in local taint.
*/
predicate defaultTaintSanitizerGuard(DataFlow::BarrierGuard guard) { none() }
/**
* Holds if default `TaintTracking::Configuration`s should allow implicit reads
* of `c` at sinks and inputs to additional taint steps.
@@ -161,8 +167,25 @@ predicate stringManipulation(DataFlow::CfgNode nodeFrom, DataFlow::CfgNode nodeT
*/
predicate containerStep(DataFlow::CfgNode nodeFrom, DataFlow::Node nodeTo) {
// construction by literal
// TODO: Not limiting the content argument here feels like a BIG hack, but we currently get nothing for free :|
DataFlowPrivate::storeStep(nodeFrom, _, nodeTo)
//
// TODO: once we have proper flow-summary modeling, we might not need this step any
// longer -- but there needs to be a matching read-step for the store-step, and we
// don't provide that right now.
DataFlowPrivate::listStoreStep(nodeFrom, _, nodeTo)
or
DataFlowPrivate::setStoreStep(nodeFrom, _, nodeTo)
or
DataFlowPrivate::tupleStoreStep(nodeFrom, _, nodeTo)
or
DataFlowPrivate::dictStoreStep(nodeFrom, _, nodeTo)
or
// comprehension, so there is taint-flow from `x` in `[x for x in xs]` to the
// resulting list of the list-comprehension.
//
// TODO: once we have proper flow-summary modeling, we might not need this step any
// longer -- but there needs to be a matching read-step for the store-step, and we
// don't provide that right now.
DataFlowPrivate::comprehensionStoreStep(nodeFrom, _, nodeTo)
or
// constructor call
exists(DataFlow::CallCfgNode call | call = nodeTo |

View File

@@ -14,12 +14,14 @@ private import semmle.python.Frameworks
* Holds if taint propagates from `source` to `sink` in zero or more local
* (intra-procedural) steps.
*/
pragma[inline]
predicate localTaint(DataFlow::Node source, DataFlow::Node sink) { localTaintStep*(source, sink) }
/**
* Holds if taint can flow from `e1` to `e2` in zero or more local (intra-procedural)
* steps.
*/
pragma[inline]
predicate localExprTaint(Expr e1, Expr e2) {
localTaint(DataFlow::exprNode(e1), DataFlow::exprNode(e2))
}

View File

@@ -3,7 +3,7 @@
private import TypeTrackerSpecific
/**
* Any string that may appear as the name of a piece of content. This will usually include things like:
* A string that may appear as the name of a piece of content. This will usually include things like:
* - Attribute names (in Python)
* - Property names (in JavaScript)
*
@@ -18,7 +18,7 @@ class ContentName extends string {
ContentName() { this = getPossibleContentName() }
}
/** Either a content name, or the empty string (representing no content). */
/** A content name, or the empty string (representing no content). */
class OptionalContentName extends string {
OptionalContentName() { this instanceof ContentName or this = "" }
}
@@ -200,7 +200,7 @@ module StepSummary {
private newtype TTypeTracker = MkTypeTracker(Boolean hasCall, OptionalContentName content)
/**
* Summary of the steps needed to track a value to a given dataflow node.
* A summary of the steps needed to track a value to a given dataflow node.
*
* This can be used to track objects that implement a certain API in order to
* recognize calls to that API. Note that type-tracking does not by itself provide a
@@ -347,7 +347,7 @@ module TypeTracker {
private newtype TTypeBackTracker = MkTypeBackTracker(Boolean hasReturn, OptionalContentName content)
/**
* Summary of the steps needed to back-track a use of a value to a given dataflow node.
* A summary of the steps needed to back-track a use of a value to a given dataflow node.
*
* This can for example be used to track callbacks that are passed to a certain API,
* so we can model specific parameters of that callback as having a certain type.

View File

@@ -61,7 +61,7 @@ abstract class Configuration extends DataFlow::Configuration {
* The smaller this predicate is, the faster `hasFlow()` will converge.
*/
// overridden to provide taint-tracking specific qldoc
abstract override predicate isSource(DataFlow::Node source);
override predicate isSource(DataFlow::Node source) { none() }
/**
* Holds if `sink` is a relevant taint sink.
@@ -69,7 +69,7 @@ abstract class Configuration extends DataFlow::Configuration {
* The smaller this predicate is, the faster `hasFlow()` will converge.
*/
// overridden to provide taint-tracking specific qldoc
abstract override predicate isSink(DataFlow::Node sink);
override predicate isSink(DataFlow::Node sink) { none() }
/** Holds if the node `node` is a taint sanitizer. */
predicate isSanitizer(DataFlow::Node node) { none() }
@@ -93,7 +93,7 @@ abstract class Configuration extends DataFlow::Configuration {
predicate isSanitizerGuard(DataFlow::BarrierGuard guard) { none() }
final override predicate isBarrierGuard(DataFlow::BarrierGuard guard) {
this.isSanitizerGuard(guard)
this.isSanitizerGuard(guard) or defaultTaintSanitizerGuard(guard)
}
/**

View File

@@ -61,7 +61,7 @@ abstract class Configuration extends DataFlow::Configuration {
* The smaller this predicate is, the faster `hasFlow()` will converge.
*/
// overridden to provide taint-tracking specific qldoc
abstract override predicate isSource(DataFlow::Node source);
override predicate isSource(DataFlow::Node source) { none() }
/**
* Holds if `sink` is a relevant taint sink.
@@ -69,7 +69,7 @@ abstract class Configuration extends DataFlow::Configuration {
* The smaller this predicate is, the faster `hasFlow()` will converge.
*/
// overridden to provide taint-tracking specific qldoc
abstract override predicate isSink(DataFlow::Node sink);
override predicate isSink(DataFlow::Node sink) { none() }
/** Holds if the node `node` is a taint sanitizer. */
predicate isSanitizer(DataFlow::Node node) { none() }
@@ -93,7 +93,7 @@ abstract class Configuration extends DataFlow::Configuration {
predicate isSanitizerGuard(DataFlow::BarrierGuard guard) { none() }
final override predicate isBarrierGuard(DataFlow::BarrierGuard guard) {
this.isSanitizerGuard(guard)
this.isSanitizerGuard(guard) or defaultTaintSanitizerGuard(guard)
}
/**

View File

@@ -61,7 +61,7 @@ abstract class Configuration extends DataFlow::Configuration {
* The smaller this predicate is, the faster `hasFlow()` will converge.
*/
// overridden to provide taint-tracking specific qldoc
abstract override predicate isSource(DataFlow::Node source);
override predicate isSource(DataFlow::Node source) { none() }
/**
* Holds if `sink` is a relevant taint sink.
@@ -69,7 +69,7 @@ abstract class Configuration extends DataFlow::Configuration {
* The smaller this predicate is, the faster `hasFlow()` will converge.
*/
// overridden to provide taint-tracking specific qldoc
abstract override predicate isSink(DataFlow::Node sink);
override predicate isSink(DataFlow::Node sink) { none() }
/** Holds if the node `node` is a taint sanitizer. */
predicate isSanitizer(DataFlow::Node node) { none() }
@@ -93,7 +93,7 @@ abstract class Configuration extends DataFlow::Configuration {
predicate isSanitizerGuard(DataFlow::BarrierGuard guard) { none() }
final override predicate isBarrierGuard(DataFlow::BarrierGuard guard) {
this.isSanitizerGuard(guard)
this.isSanitizerGuard(guard) or defaultTaintSanitizerGuard(guard)
}
/**

View File

@@ -61,7 +61,7 @@ abstract class Configuration extends DataFlow::Configuration {
* The smaller this predicate is, the faster `hasFlow()` will converge.
*/
// overridden to provide taint-tracking specific qldoc
abstract override predicate isSource(DataFlow::Node source);
override predicate isSource(DataFlow::Node source) { none() }
/**
* Holds if `sink` is a relevant taint sink.
@@ -69,7 +69,7 @@ abstract class Configuration extends DataFlow::Configuration {
* The smaller this predicate is, the faster `hasFlow()` will converge.
*/
// overridden to provide taint-tracking specific qldoc
abstract override predicate isSink(DataFlow::Node sink);
override predicate isSink(DataFlow::Node sink) { none() }
/** Holds if the node `node` is a taint sanitizer. */
predicate isSanitizer(DataFlow::Node node) { none() }
@@ -93,7 +93,7 @@ abstract class Configuration extends DataFlow::Configuration {
predicate isSanitizerGuard(DataFlow::BarrierGuard guard) { none() }
final override predicate isBarrierGuard(DataFlow::BarrierGuard guard) {
this.isSanitizerGuard(guard)
this.isSanitizerGuard(guard) or defaultTaintSanitizerGuard(guard)
}
/**

View File

@@ -503,7 +503,7 @@ class TaintTrackingImplementation extends string {
TaintKind kind, string edgeLabel
) {
exists(PythonFunctionValue init, EssaVariable self, TaintTrackingContext callee |
instantiationCall(node.asCfgNode(), src, init, context, callee) and
this.instantiationCall(node.asCfgNode(), src, init, context, callee) and
this.(EssaTaintTracking).taintedDefinition(_, self.getDefinition(), callee, path, kind) and
self.getSourceVariable().(Variable).isSelf() and
BaseFlow::reaches_exit(self) and
@@ -789,9 +789,9 @@ private class EssaTaintTracking extends string {
TaintTrackingNode src, PyEdgeRefinement defn, TaintTrackingContext context, AttributePath path,
TaintKind kind
) {
taintedPiNodeOneway(src, defn, context, path, kind)
this.taintedPiNodeOneway(src, defn, context, path, kind)
or
taintedPiNodeBothways(src, defn, context, path, kind)
this.taintedPiNodeBothways(src, defn, context, path, kind)
}
pragma[noinline]
@@ -802,7 +802,7 @@ private class EssaTaintTracking extends string {
exists(DataFlow::Node srcnode, ControlFlowNode use |
src = TTaintTrackingNode_(srcnode, context, path, kind, this) and
not this.(TaintTracking::Configuration).isBarrierTest(defn.getTest(), defn.getSense()) and
defn.getSense() = testEvaluates(defn, defn.getTest(), use, src)
defn.getSense() = this.testEvaluates(defn, defn.getTest(), use, src)
)
}
@@ -898,7 +898,7 @@ private class EssaTaintTracking extends string {
)
)
or
result = testEvaluates(defn, not_operand(test), use, src).booleanNot()
result = this.testEvaluates(defn, not_operand(test), use, src).booleanNot()
}
/**
@@ -911,7 +911,7 @@ private class EssaTaintTracking extends string {
use = test
or
exists(ControlFlowNode notuse |
boolean_filter(test, notuse) and
this.boolean_filter(test, notuse) and
use = not_operand(notuse)
)
)

View File

@@ -70,6 +70,10 @@ abstract class SsaSourceVariable extends @py_variable {
SsaSource::exception_capture(this, def)
or
SsaSource::with_definition(this, def)
or
SsaSource::pattern_capture_definition(this, def)
or
SsaSource::pattern_alias_definition(this, def)
}
/**

View File

@@ -8,7 +8,7 @@ import semmle.python.essa.Definitions
/** An (enhanced) SSA variable derived from `SsaSourceVariable`. */
class EssaVariable extends TEssaDefinition {
/** Gets the (unique) definition of this variable. */
/** Gets the (unique) definition of this variable. */
EssaDefinition getDefinition() { this = result }
/**
@@ -545,6 +545,24 @@ class WithDefinition extends EssaNodeDefinition {
override string getRepresentation() { result = "with" }
}
/** A definition of a variable via a capture pattern */
class PatternCaptureDefinition extends EssaNodeDefinition {
PatternCaptureDefinition() {
SsaSource::pattern_capture_definition(this.getSourceVariable(), this.getDefiningNode())
}
override string getRepresentation() { result = "pattern capture" }
}
/** A definition of a variable via a pattern alias */
class PatternAliasDefinition extends EssaNodeDefinition {
PatternAliasDefinition() {
SsaSource::pattern_alias_definition(this.getSourceVariable(), this.getDefiningNode())
}
override string getRepresentation() { result = "pattern alias" }
}
/** A definition of a variable by declaring it as a parameter */
class ParameterDefinition extends EssaNodeDefinition {
ParameterDefinition() {

View File

@@ -478,12 +478,11 @@ private module SsaComputeImpl {
predicate adjacentUseUse(ControlFlowNode use1, ControlFlowNode use2) {
adjacentUseUseSameVar(use1, use2)
or
exists(SsaSourceVariable v, EssaDefinition def, BasicBlock b1, int i1, BasicBlock b2, int i2 |
exists(SsaSourceVariable v, PhiFunction def, BasicBlock b1, int i1, BasicBlock b2, int i2 |
adjacentVarRefs(v, b1, i1, b2, i2) and
variableUse(v, use1, b1, i1) and
definesAt(def, v, b2, i2) and
firstUse(def, use2) and
def instanceof PhiFunction
variableUse(pragma[only_bind_into](v), use1, b1, i1) and
definesAt(def, pragma[only_bind_into](v), b2, i2) and
firstUse(def, use2)
)
}

View File

@@ -40,6 +40,28 @@ module SsaSource {
)
}
/** Holds if `v` is defined by a capture pattern. */
cached
predicate pattern_capture_definition(Variable v, ControlFlowNode defn) {
exists(MatchCapturePattern capture, Name var |
capture.getVariable() = var and
var.getAFlowNode() = defn
|
var = v.getAStore()
)
}
/** Holds if `v` is defined by as the alias of an as-pattern. */
cached
predicate pattern_alias_definition(Variable v, ControlFlowNode defn) {
exists(MatchAsPattern pattern, Name var |
pattern.getAlias() = var and
var.getAFlowNode() = defn
|
var = v.getAStore()
)
}
/** Holds if `v` is defined by multiple assignment at `defn`. */
cached
predicate multi_assignment_definition(Variable v, ControlFlowNode defn, int n, SequenceNode lhs) {
@@ -127,7 +149,7 @@ module SsaSource {
not test_contains(_, call)
}
/** Holds if an attribute is deleted at `def` and `use` is the use of `v` for that deletion */
/** Holds if an attribute is deleted at `def` and `use` is the use of `v` for that deletion */
cached
predicate attribute_deletion_refinement(Variable v, NameNode use, DeletionNode def) {
use.uses(v) and

View File

@@ -2295,4 +2295,22 @@ module PrivateDjango {
override string getMimetypeDefault() { none() }
}
// ---------------------------------------------------------------------------
// Logging
// ---------------------------------------------------------------------------
/**
* A standard Python logger instance from Django.
* see https://github.com/django/django/blob/stable/4.0.x/django/utils/log.py#L11
*/
private class DjangoLogger extends Stdlib::Logger::InstanceSource {
DjangoLogger() {
this =
API::moduleImport("django")
.getMember("utils")
.getMember("log")
.getMember("request_logger")
.getAnImmediateUse()
}
}
}

View File

@@ -33,7 +33,7 @@ private module FastApi {
module APIRouter {
/** Gets a reference to an instance of `fastapi.APIRouter`. */
API::Node instance() {
result = API::moduleImport("fastapi").getMember("APIRouter").getReturn()
result = API::moduleImport("fastapi").getMember("APIRouter").getASubclass*().getReturn()
}
}
@@ -163,7 +163,7 @@ private module FastApi {
exists(Class cls, API::Node base |
base = getModeledResponseClass(_).getASubclass*() and
cls.getABase() = base.getAUse().asExpr() and
responseClass.getAnImmediateUse().asExpr().(ClassExpr) = cls.getParent()
responseClass.getAnImmediateUse().asExpr() = cls.getParent()
|
exists(Assign assign | assign = cls.getAStmt() |
assign.getATarget().(Name).getId() = "media_type" and
@@ -226,6 +226,17 @@ private module FastApi {
}
}
/**
* A direct instantiation of a FileResponse.
*/
private class FileResponseInstantiation extends ResponseInstantiation, FileSystemAccess::Range {
FileResponseInstantiation() { baseApiNode = getModeledResponseClass("FileResponse") }
override DataFlow::Node getAPathArgument() {
result in [this.getArg(0), this.getArgByName("path")]
}
}
/**
* An implicit response from a return of FastAPI request handler.
*/
@@ -256,7 +267,8 @@ private module FastApi {
* An implicit response from a return of FastAPI request handler, that has
* `response_class` set to a `FileResponse`.
*/
private class FastApiRequestHandlerFileResponseReturn extends FastApiRequestHandlerReturn {
private class FastApiRequestHandlerFileResponseReturn extends FastApiRequestHandlerReturn,
FileSystemAccess::Range {
FastApiRequestHandlerFileResponseReturn() {
exists(API::Node responseClass |
responseClass.getAUse() = routeSetup.getResponseClassArg() and
@@ -265,6 +277,8 @@ private module FastApi {
}
override DataFlow::Node getBody() { none() }
override DataFlow::Node getAPathArgument() { result = this }
}
/**

View File

@@ -9,6 +9,7 @@ private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.dataflow.new.TaintTracking
private import semmle.python.Concepts
private import semmle.python.frameworks.Werkzeug
private import semmle.python.frameworks.Stdlib
private import semmle.python.ApiGraphs
private import semmle.python.frameworks.internal.InstanceTaintStepsHelper
private import semmle.python.security.dataflow.PathInjectionCustomizations
@@ -569,4 +570,18 @@ module Flask {
result in [this.getArg(0), this.getArgByName("filename_or_fp")]
}
}
// ---------------------------------------------------------------------------
// Logging
// ---------------------------------------------------------------------------
/**
* A Flask application provides a standard Python logger via the `logger` attribute.
*
* See
* - https://flask.palletsprojects.com/en/2.0.x/api/#flask.Flask.logger
* - https://flask.palletsprojects.com/en/2.0.x/logging/
*/
private class FlaskLogger extends Stdlib::Logger::InstanceSource {
FlaskLogger() { this = FlaskApp::instance().getMember("logger").getAnImmediateUse() }
}
}

View File

@@ -0,0 +1,75 @@
/**
* Provides classes modeling security-relevant aspects of the `python-ldap` PyPI package (imported as `ldap`).
* See https://www.python-ldap.org/en/python-ldap-3.3.0/index.html
*/
private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.Concepts
private import semmle.python.ApiGraphs
/**
* Provides models for the `python-ldap` PyPI package (imported as `ldap`).
*
* See https://www.python-ldap.org/en/python-ldap-3.3.0/index.html
*/
private module Ldap {
/**
* The execution of an `ldap` query.
*
* See https://www.python-ldap.org/en/python-ldap-3.3.0/reference/ldap.html#functions
*/
private class LdapQueryExecution extends DataFlow::CallCfgNode, LDAP::LdapExecution::Range {
LdapQueryExecution() {
this =
API::moduleImport("ldap")
.getMember("initialize")
.getReturn()
.getMember(["search", "search_s", "search_st", "search_ext", "search_ext_s"])
.getACall()
}
override DataFlow::Node getFilter() {
result in [this.getArg(2), this.getArgByName("filterstr")]
}
override DataFlow::Node getBaseDn() { result in [this.getArg(0), this.getArgByName("base")] }
}
/**
* A call to `ldap.dn.escape_dn_chars`.
*
* See https://github.com/python-ldap/python-ldap/blob/7ce471e238cdd9a4dd8d17baccd1c9e05e6f894a/Lib/ldap/dn.py#L17
*/
private class LdapEscapeDnCall extends DataFlow::CallCfgNode, Escaping::Range {
LdapEscapeDnCall() {
this = API::moduleImport("ldap").getMember("dn").getMember("escape_dn_chars").getACall()
}
override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("s")] }
override DataFlow::Node getOutput() { result = this }
override string getKind() { result = Escaping::getLdapDnKind() }
}
/**
* A call to `ldap.filter.escape_filter_chars`.
*
* See https://www.python-ldap.org/en/python-ldap-3.3.0/reference/ldap-filter.html#ldap.filter.escape_filter_chars
*/
private class LdapEscapeFilterCall extends DataFlow::CallCfgNode, Escaping::Range {
LdapEscapeFilterCall() {
this =
API::moduleImport("ldap").getMember("filter").getMember("escape_filter_chars").getACall()
}
override DataFlow::Node getAnInput() {
result in [this.getArg(0), this.getArgByName("assertion_value")]
}
override DataFlow::Node getOutput() { result = this }
override string getKind() { result = Escaping::getLdapFilterKind() }
}
}

View File

@@ -0,0 +1,80 @@
/**
* Provides classes modeling security-relevant aspects of the `ldap3` PyPI package
* See https://pypi.org/project/ldap3/
*/
private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.Concepts
private import semmle.python.ApiGraphs
/**
* Provides models for the `ldap3` PyPI package
*
* See https://pypi.org/project/ldap3/
*/
private module Ldap3 {
/** The execution of an `ldap` query. */
private class LdapQueryExecution extends DataFlow::CallCfgNode, LDAP::LdapExecution::Range {
LdapQueryExecution() {
this =
API::moduleImport("ldap3")
.getMember("Connection")
.getReturn()
.getMember("search")
.getACall()
}
override DataFlow::Node getFilter() {
result in [this.getArg(1), this.getArgByName("search_filter")]
}
override DataFlow::Node getBaseDn() {
result in [this.getArg(0), this.getArgByName("search_base")]
}
}
/**
* A call to `ldap3.utils.dn.escape_rdn`.
*
* See https://github.com/cannatag/ldap3/blob/4d33166f0869b929f59c6e6825a1b9505eb99967/ldap3/utils/dn.py#L390
*/
private class LdapEscapeDnCall extends DataFlow::CallCfgNode, Escaping::Range {
LdapEscapeDnCall() {
this =
API::moduleImport("ldap3")
.getMember("utils")
.getMember("dn")
.getMember("escape_rdn")
.getACall()
}
override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("rdn")] }
override DataFlow::Node getOutput() { result = this }
override string getKind() { result = Escaping::getLdapDnKind() }
}
/**
* A call to `ldap3.utils.conv.escape_filter_chars`.
*
* See https://github.com/cannatag/ldap3/blob/4d33166f0869b929f59c6e6825a1b9505eb99967/ldap3/utils/conv.py#L91
*/
private class LdapEscapeFilterCall extends DataFlow::CallCfgNode, Escaping::Range {
LdapEscapeFilterCall() {
this =
API::moduleImport("ldap3")
.getMember("utils")
.getMember("conv")
.getMember("escape_filter_chars")
.getACall()
}
override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("text")] }
override DataFlow::Node getOutput() { result = this }
override string getKind() { result = Escaping::getLdapFilterKind() }
}
}

View File

@@ -0,0 +1,171 @@
/**
* Provides classes modeling security-relevant aspects of the `requests` PyPI package.
*
* See
* - https://pypi.org/project/requests/
* - https://docs.python-requests.org/en/latest/
*/
private import python
private import semmle.python.Concepts
private import semmle.python.ApiGraphs
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.TaintTracking
private import semmle.python.frameworks.internal.InstanceTaintStepsHelper
private import semmle.python.frameworks.Stdlib
/**
* INTERNAL: Do not use.
*
* Provides models for the `requests` PyPI package.
*
* See
* - https://pypi.org/project/requests/
* - https://docs.python-requests.org/en/latest/
*/
private module Requests {
private class OutgoingRequestCall extends HTTP::Client::Request::Range, DataFlow::CallCfgNode {
string methodName;
OutgoingRequestCall() {
methodName in [HTTP::httpVerbLower(), "request"] and
(
this = API::moduleImport("requests").getMember(methodName).getACall()
or
exists(API::Node moduleExporting, API::Node sessionInstance |
moduleExporting in [
API::moduleImport("requests"), //
API::moduleImport("requests").getMember("sessions")
] and
sessionInstance = moduleExporting.getMember(["Session", "session"]).getReturn()
|
this = sessionInstance.getMember(methodName).getACall()
)
)
}
override DataFlow::Node getAUrlPart() {
result = this.getArgByName("url")
or
not methodName = "request" and
result = this.getArg(0)
or
methodName = "request" and
result = this.getArg(1)
}
/** Gets the `verify` argument to this outgoing requests call. */
DataFlow::Node getVerifyArg() { result = this.getArgByName("verify") }
override predicate disablesCertificateValidation(
DataFlow::Node disablingNode, DataFlow::Node argumentOrigin
) {
disablingNode = this.getVerifyArg() and
argumentOrigin = verifyArgBacktracker(disablingNode) and
argumentOrigin.asExpr().(ImmutableLiteral).booleanValue() = false and
not argumentOrigin.asExpr() instanceof None
}
override string getFramework() { result = "requests" }
}
/**
* Extra taint propagation for outgoing requests calls,
* to ensure that responses to user-controlled URL are tainted.
*/
private class OutgoingRequestCallTaintStep extends TaintTracking::AdditionalTaintStep {
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
nodeFrom = nodeTo.(OutgoingRequestCall).getAUrlPart()
}
}
/** Gets a back-reference to the verify argument `arg`. */
private DataFlow::TypeTrackingNode verifyArgBacktracker(
DataFlow::TypeBackTracker t, DataFlow::Node arg
) {
t.start() and
arg = any(OutgoingRequestCall c).getVerifyArg() and
result = arg.getALocalSource()
or
exists(DataFlow::TypeBackTracker t2 | result = verifyArgBacktracker(t2, arg).backtrack(t2, t))
}
/** Gets a back-reference to the verify argument `arg`. */
private DataFlow::LocalSourceNode verifyArgBacktracker(DataFlow::Node arg) {
result = verifyArgBacktracker(DataFlow::TypeBackTracker::end(), arg)
}
// ---------------------------------------------------------------------------
// Response
// ---------------------------------------------------------------------------
/**
* Provides models for the `requests.models.Response` class
*
* See https://docs.python-requests.org/en/latest/api/#requests.Response.
*/
module Response {
/** Gets a reference to the `requests.models.Response` class. */
private API::Node classRef() {
result = API::moduleImport("requests").getMember("models").getMember("Response")
or
result = API::moduleImport("requests").getMember("Response")
}
/**
* A source of instances of `requests.models.Response`, extend this class to model new instances.
*
* This can include instantiations of the class, return values from function
* calls, or a special parameter that will be set when functions are called by an external
* library.
*
* Use the predicate `Response::instance()` to get references to instances of `requests.models.Response`.
*/
abstract class InstanceSource extends DataFlow::LocalSourceNode { }
/** A direct instantiation of `requests.models.Response`. */
private class ClassInstantiation extends InstanceSource, DataFlow::CallCfgNode {
ClassInstantiation() { this = classRef().getACall() }
}
/** Return value from making a reuqest. */
private class RequestReturnValue extends InstanceSource, DataFlow::Node {
RequestReturnValue() { this = any(OutgoingRequestCall c) }
}
/** Gets a reference to an instance of `requests.models.Response`. */
private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t) {
t.start() and
result instanceof InstanceSource
or
exists(DataFlow::TypeTracker t2 | result = instance(t2).track(t2, t))
}
/** Gets a reference to an instance of `requests.models.Response`. */
DataFlow::Node instance() { instance(DataFlow::TypeTracker::end()).flowsTo(result) }
/**
* Taint propagation for `requests.models.Response`.
*/
private class InstanceTaintSteps extends InstanceTaintStepsHelper {
InstanceTaintSteps() { this = "requests.models.Response" }
override DataFlow::Node getInstance() { result = instance() }
override string getAttributeName() {
result in ["text", "content", "raw", "links", "cookies", "headers"]
}
override string getMethodName() { result in ["json", "iter_content", "iter_lines"] }
override string getAsyncMethodName() { none() }
}
/** An attribute read that is a file-like instance. */
private class FileLikeInstances extends Stdlib::FileLikeObject::InstanceSource {
FileLikeInstances() {
this.(DataFlow::AttrRead).getObject() = instance() and
this.(DataFlow::AttrRead).getAttributeName() = "raw"
}
}
}
}

View File

@@ -211,6 +211,13 @@ module SqlAlchemy {
.getReturn()
.getMember("begin")
.getACall()
or
this =
API::moduleImport("sqlalchemy")
.getMember("orm")
.getMember("scoped_session")
.getReturn()
.getACall()
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -62,6 +62,22 @@ private DataFlow::TypeTrackingNode poorMansFunctionTracker(DataFlow::TypeTracker
exists(DataFlow::TypeTracker t2 | result = poorMansFunctionTracker(t2, func).track(t2, t))
}
/**
* Gets a reference to `func`. `func` must be defined inside a class, and the reference
* will be inside a different method of the same class.
*/
private DataFlow::Node getSimpleMethodReferenceWithinClass(Function func) {
// TODO: Should take MRO into account
exists(Class cls, Function otherFunc, DataFlow::Node selfRefOtherFunc |
pragma[only_bind_into](cls).getAMethod() = func and
pragma[only_bind_into](cls).getAMethod() = otherFunc
|
selfRefOtherFunc.getALocalSource().(DataFlow::ParameterNode).getParameter() =
otherFunc.getArg(0) and
result.(DataFlow::AttrRead).accesses(selfRefOtherFunc, func.getName())
)
}
/**
* INTERNAL: Do not use.
*
@@ -80,7 +96,20 @@ private DataFlow::TypeTrackingNode poorMansFunctionTracker(DataFlow::TypeTracker
* inst = MyClass()
* print(inst.my_method)
* ```
*
* But it is able to handle simple method calls within a class (but does not take MRO into
* account).
* ```py
* class MyClass:
* def method1(self);
* pass
*
* def method2(self);
* self.method1()
* ```
*/
DataFlow::Node poorMansFunctionTracker(Function func) {
poorMansFunctionTracker(DataFlow::TypeTracker::end(), func).flowsTo(result)
or
result = getSimpleMethodReferenceWithinClass(func)
}

View File

@@ -89,7 +89,7 @@ class PythonFunctionObjectInternal extends CallableObjectInternal, TPythonFuncti
origin = CfgOrigin::fromCfgNode(forigin)
)
or
procedureReturnsNone(callee, obj, origin)
this.procedureReturnsNone(callee, obj, origin)
}
private predicate procedureReturnsNone(
@@ -382,7 +382,7 @@ class BuiltinMethodObjectInternal extends CallableObjectInternal, TBuiltinMethod
/**
* Class representing bound-methods.
* Note that built-in methods, such as `[].append` are also represented as bound-methods.
* Although built-in methods and bound-methods are distinct classes in CPython, their behaviour
* Although built-in methods and bound-methods are distinct classes in CPython, their behavior
* is the same and we treat them identically.
*/
class BoundMethodObjectInternal extends CallableObjectInternal, TBoundMethod {

View File

@@ -27,7 +27,8 @@ class PropertyInternal extends ObjectInternal, TProperty {
or
// x = property(getter, setter, deleter)
exists(ControlFlowNode setter_arg |
setter_arg = getCallNode().getArg(1) or setter_arg = getCallNode().getArgByName("fset")
setter_arg = this.getCallNode().getArg(1) or
setter_arg = this.getCallNode().getArgByName("fset")
|
PointsToInternal::pointsTo(setter_arg, this.getContext(), result, _)
)
@@ -43,7 +44,8 @@ class PropertyInternal extends ObjectInternal, TProperty {
or
// x = property(getter, setter, deleter)
exists(ControlFlowNode deleter_arg |
deleter_arg = getCallNode().getArg(2) or deleter_arg = getCallNode().getArgByName("fdel")
deleter_arg = this.getCallNode().getArg(2) or
deleter_arg = this.getCallNode().getArgByName("fdel")
|
PointsToInternal::pointsTo(deleter_arg, this.getContext(), result, _)
)

View File

@@ -138,8 +138,8 @@ class Value extends TObject {
* The result can be `none()`, but never both `true` and `false`.
*/
boolean getDefiniteBooleanValue() {
result = getABooleanValue() and
not (getABooleanValue() = true and getABooleanValue() = false)
result = this.getABooleanValue() and
not (this.getABooleanValue() = true and this.getABooleanValue() = false)
}
}
@@ -197,7 +197,7 @@ class ModuleValue extends Value instanceof ModuleObjectInternal {
/** When used (exclusively) as a script (will not include normal modules that can also be run as a script) */
predicate isUsedAsScript() {
not isUsedAsModule() and
not this.isUsedAsModule() and
(
not this.getPath().getExtension() = "py"
or

View File

@@ -150,8 +150,10 @@ newtype TObject =
TBuiltinTuple(Builtin bltn) { bltn.getClass() = Builtin::special("tuple") } or
/** Represents a tuple in the Python source */
TPythonTuple(TupleNode origin, PointsToContext context) {
origin.isLoad() and
context.appliesTo(origin)
exists(Scope s |
context.appliesToScope(s) and
scope_loads_tuplenode(s, origin)
)
} or
/** Varargs tuple */
TVarargsTuple(CallNode call, PointsToContext context, int offset, int length) {
@@ -175,7 +177,7 @@ newtype TObject =
not count(instantiation.getAnArg()) = 1 and
Types::getMro(metacls).contains(TType())
} or
/** Represents `sys.version_info`. Acts like a tuple with a range of values depending on the version being analysed. */
/** Represents `sys.version_info`. Acts like a tuple with a range of values depending on the version being analyzed. */
TSysVersionInfo() or
/** Represents a module that is inferred to perhaps exist, but is not present in the database. */
TAbsentModule(string name) { missing_imported_module(_, _, name) } or
@@ -201,6 +203,13 @@ newtype TObject =
Expressions::subscriptPartsPointsTo(_, _, generic, index)
}
/** Join-order helper for TPythonTuple */
pragma[nomagic]
private predicate scope_loads_tuplenode(Scope s, TupleNode origin) {
origin.isLoad() and
origin.getScope() = s
}
/** Holds if the object `t` is a type. */
predicate isType(ObjectInternal t) {
t.isClass() = true

View File

@@ -75,9 +75,9 @@ class ClassList extends TClassList {
this = Empty() and result = ""
or
exists(ClassObjectInternal head | head = this.getHead() |
this.getTail() = Empty() and result = className(head)
this.getTail() = Empty() and result = this.className(head)
or
this.getTail() != Empty() and result = className(head) + ", " + this.getTail().contents()
this.getTail() != Empty() and result = this.className(head) + ", " + this.getTail().contents()
)
}
@@ -331,9 +331,9 @@ private class ClassListList extends TClassListList {
ClassObjectInternal bestMergeCandidate(int n) {
exists(ClassObjectInternal head | head = this.getItem(n).getHead() |
legalMergeCandidate(head) and result = head
this.legalMergeCandidate(head) and result = head
or
illegalMergeCandidate(head) and result = this.bestMergeCandidate(n + 1)
this.illegalMergeCandidate(head) and result = this.bestMergeCandidate(n + 1)
)
}

View File

@@ -656,6 +656,7 @@ module PointsToInternal {
builtin_not_in_outer_scope(def, context, value, origin)
}
pragma[nomagic]
private predicate undefined_variable(
ScopeEntryDefinition def, PointsToContext context, ObjectInternal value, ControlFlowNode origin
) {
@@ -674,6 +675,7 @@ module PointsToInternal {
origin = def.getDefiningNode()
}
pragma[nomagic]
private predicate builtin_not_in_outer_scope(
ScopeEntryDefinition def, PointsToContext context, ObjectInternal value, ControlFlowNode origin
) {
@@ -914,7 +916,7 @@ private module InterModulePointsTo {
private predicate exportsSubmodule(Folder folder, string name) {
name.regexpMatch("\\p{L}(\\p{L}|\\d|_)*") and
(
exists(Folder child | child = folder.getChildContainer(name))
folder.getChildContainer(name) instanceof Folder
or
exists(folder.getFile(name + ".py"))
)
@@ -1332,13 +1334,13 @@ module InterProceduralPointsTo {
predicate callsite_points_to(
CallsiteRefinement def, PointsToContext context, ObjectInternal value, CfgOrigin origin
) {
exists(SsaSourceVariable srcvar | srcvar = def.getSourceVariable() |
exists(SsaSourceVariable srcvar | pragma[only_bind_into](srcvar) = def.getSourceVariable() |
if srcvar instanceof EscapingAssignmentGlobalVariable
then
/* If global variable can be reassigned, we need to track it through calls */
exists(EssaVariable var, Function func, PointsToContext callee |
callsite_calls_function(def.getCall(), context, func, callee, _) and
var_at_exit(srcvar, func, var) and
var_at_exit(pragma[only_bind_into](srcvar), func, var) and
PointsToInternal::variablePointsTo(var, callee, value, origin)
)
or

View File

@@ -198,14 +198,11 @@ abstract class RegexString extends Expr {
/** Whether there is a character class, between start (inclusive) and end (exclusive) */
predicate charSet(int start, int end) {
exists(int inner_start, int inner_end |
exists(int inner_start |
this.char_set_start(start, inner_start) and
not this.char_set_start(_, start)
|
end = inner_end + 1 and
inner_end > inner_start and
this.nonEscapedCharAt(inner_end) = "]" and
not exists(int mid | this.nonEscapedCharAt(mid) = "]" | mid > inner_start and mid < inner_end)
end - 1 = min(int i | this.nonEscapedCharAt(i) = "]" and inner_start < i)
)
}
@@ -344,9 +341,7 @@ abstract class RegexString extends Expr {
this.escapingChar(start) and
this.getChar(start + 1) = "N" and
this.getChar(start + 2) = "{" and
this.getChar(end - 1) = "}" and
end > start and
not exists(int i | start + 2 < i and i < end - 1 | this.getChar(i) = "}")
end - 1 = min(int i | start + 2 < i and this.getChar(i) = "}")
}
/**
@@ -375,7 +370,7 @@ abstract class RegexString extends Expr {
// 32-bit hex value \Uhhhhhhhh
this.getChar(start + 1) = "U" and end = start + 10
or
escapedName(start, end)
this.escapedName(start, end)
or
// escape not handled above, update when adding a new case
not this.getChar(start + 1) in ["x", "u", "U", "N"] and
@@ -437,11 +432,18 @@ abstract class RegexString extends Expr {
}
predicate specialCharacter(int start, int end, string char) {
not this.inCharSet(start) and
this.character(start, end) and
end = start + 1 and
char = this.getChar(start) and
(char = "$" or char = "^" or char = ".") and
not this.inCharSet(start)
(
end = start + 1 and
char = this.getChar(start) and
(char = "$" or char = "^" or char = ".")
or
end = start + 2 and
this.escapingChar(start) and
char = this.getText().substring(start, end) and
char = ["\\A", "\\Z", "\\b", "\\B"]
)
}
/** Whether the text in the range start,end is a group */
@@ -454,6 +456,7 @@ abstract class RegexString extends Expr {
/** Gets the number of the group in start,end */
int getGroupNumber(int start, int end) {
this.group(start, end) and
not this.non_capturing_group_start(start, _) and
result =
count(int i | this.group(i, _) and i < start and not this.non_capturing_group_start(i, _)) + 1
}
@@ -900,7 +903,8 @@ abstract class RegexString extends Expr {
exists(int x | this.firstPart(x, end) |
this.emptyMatchAtStartGroup(x, start) or
this.qualifiedItem(x, start, true, _) or
this.specialCharacter(x, start, "^")
// ^ and \A match the start of the string
this.specialCharacter(x, start, ["^", "\\A"])
)
or
exists(int y | this.firstPart(start, y) |
@@ -925,9 +929,8 @@ abstract class RegexString extends Expr {
or
this.qualifiedItem(end, y, true, _)
or
this.specialCharacter(end, y, "$")
or
y = end + 2 and this.escapingChar(end) and this.getChar(end + 1) = "Z"
// $ and \Z match the end of the string.
this.specialCharacter(end, y, ["$", "\\Z"])
)
or
exists(int x |

View File

@@ -0,0 +1,306 @@
/**
* Provides precicates for reasoning about bad tag filter vulnerabilities.
*/
import performance.ReDoSUtil
/**
* A module for determining if a regexp matches a given string,
* and reasoning about which capture groups are filled by a given string.
*/
private module RegexpMatching {
/**
* A class to test whether a regular expression matches a string.
* Override this class and extend `test`/`testWithGroups` to configure which strings should be tested for acceptance by this regular expression.
* The result can afterwards be read from the `matches` predicate.
*
* Strings in the `testWithGroups` predicate are also tested for which capture groups are filled by the given string.
* The result is available in the `fillCaptureGroup` predicate.
*/
abstract class MatchedRegExp extends RegExpTerm {
MatchedRegExp() { this.isRootTerm() }
/**
* Holds if it should be tested whether this regular expression matches `str`.
*
* If `ignorePrefix` is true, then a regexp without a start anchor will be treated as if it had a start anchor.
* E.g. a regular expression `/foo$/` will match any string that ends with "foo",
* but if `ignorePrefix` is true, it will only match "foo".
*/
predicate test(string str, boolean ignorePrefix) {
none() // maybe overriden in subclasses
}
/**
* Same as `test(..)`, but where the `fillsCaptureGroup` afterwards tells which capture groups were filled by the given string.
*/
predicate testWithGroups(string str, boolean ignorePrefix) {
none() // maybe overriden in subclasses
}
/**
* Holds if this RegExp matches `str`, where `str` is either in the `test` or `testWithGroups` predicate.
*/
final predicate matches(string str) {
exists(State state | state = getAState(this, str.length() - 1, str, _) |
epsilonSucc*(state) = Accept(_)
)
}
/**
* Holds if matching `str` may fill capture group number `g`.
* Only holds if `str` is in the `testWithGroups` predicate.
*/
final predicate fillsCaptureGroup(string str, int g) {
exists(State s |
s = getAStateThatReachesAccept(this, _, str, _) and
g = group(s.getRepr())
)
}
}
/**
* Gets a state the regular expression `reg` can be in after matching the `i`th char in `str`.
* The regular expression is modeled as a non-determistic finite automaton,
* the regular expression can therefore be in multiple states after matching a character.
*
* It's a forward search to all possible states, and there is thus no guarantee that the state is on a path to an accepting state.
*/
private State getAState(MatchedRegExp reg, int i, string str, boolean ignorePrefix) {
// start state, the -1 position before any chars have been matched
i = -1 and
(
reg.test(str, ignorePrefix)
or
reg.testWithGroups(str, ignorePrefix)
) and
result.getRepr().getRootTerm() = reg and
isStartState(result)
or
// recursive case
result = getAStateAfterMatching(reg, _, str, i, _, ignorePrefix)
}
/**
* Gets the next state after the `prev` state from `reg`.
* `prev` is the state after matching `fromIndex` chars in `str`,
* and the result is the state after matching `toIndex` chars in `str`.
*
* This predicate is used as a step relation in the forwards search (`getAState`),
* and also as a step relation in the later backwards search (`getAStateThatReachesAccept`).
*/
private State getAStateAfterMatching(
MatchedRegExp reg, State prev, string str, int toIndex, int fromIndex, boolean ignorePrefix
) {
// the basic recursive case - outlined into a noopt helper to make performance work out.
result = getAStateAfterMatchingAux(reg, prev, str, toIndex, fromIndex, ignorePrefix)
or
// we can skip past word boundaries if the next char is a non-word char.
fromIndex = toIndex and
prev.getRepr() instanceof RegExpWordBoundary and
prev = getAState(reg, toIndex, str, ignorePrefix) and
after(prev.getRepr()) = result and
str.charAt(toIndex + 1).regexpMatch("\\W") // \W matches any non-word char.
}
pragma[noopt]
private State getAStateAfterMatchingAux(
MatchedRegExp reg, State prev, string str, int toIndex, int fromIndex, boolean ignorePrefix
) {
prev = getAState(reg, fromIndex, str, ignorePrefix) and
fromIndex = toIndex - 1 and
exists(string char | char = str.charAt(toIndex) | specializedDeltaClosed(prev, char, result)) and
not discardedPrefixStep(prev, result, ignorePrefix)
}
/** Holds if a step from `prev` to `next` should be discarded when the `ignorePrefix` flag is set. */
private predicate discardedPrefixStep(State prev, State next, boolean ignorePrefix) {
prev = mkMatch(any(RegExpRoot r)) and
ignorePrefix = true and
next = prev
}
// The `deltaClosed` relation specialized to the chars that exists in strings tested by a `MatchedRegExp`.
private predicate specializedDeltaClosed(State prev, string char, State next) {
deltaClosed(prev, specializedGetAnInputSymbolMatching(char), next)
}
// The `getAnInputSymbolMatching` relation specialized to the chars that exists in strings tested by a `MatchedRegExp`.
pragma[noinline]
private InputSymbol specializedGetAnInputSymbolMatching(string char) {
exists(string s, MatchedRegExp r |
r.test(s, _)
or
r.testWithGroups(s, _)
|
char = s.charAt(_)
) and
result = getAnInputSymbolMatching(char)
}
/**
* Gets the `i`th state on a path to the accepting state when `reg` matches `str`.
* Starts with an accepting state as found by `getAState` and searches backwards
* to the start state through the reachable states (as found by `getAState`).
*
* This predicate holds the invariant that the result state can be reached with `i` steps from a start state,
* and an accepting state can be found after (`str.length() - 1 - i`) steps from the result.
* The result state is therefore always on a valid path where `reg` accepts `str`.
*
* This predicate is only used to find which capture groups a regular expression has filled,
* and thus the search is only performed for the strings in the `testWithGroups(..)` predicate.
*/
private State getAStateThatReachesAccept(
MatchedRegExp reg, int i, string str, boolean ignorePrefix
) {
// base case, reaches an accepting state from the last state in `getAState(..)`
reg.testWithGroups(str, ignorePrefix) and
i = str.length() - 1 and
result = getAState(reg, i, str, ignorePrefix) and
epsilonSucc*(result) = Accept(_)
or
// recursive case. `next` is the next state to be matched after matching `prev`.
// this predicate is doing a backwards search, so `prev` is the result we are looking for.
exists(State next, State prev, int fromIndex, int toIndex |
next = getAStateThatReachesAccept(reg, toIndex, str, ignorePrefix) and
next = getAStateAfterMatching(reg, prev, str, toIndex, fromIndex, ignorePrefix) and
i = fromIndex and
result = prev
)
}
/** Gets the capture group number that `term` belongs to. */
private int group(RegExpTerm term) {
exists(RegExpGroup grp | grp.getNumber() = result | term.getParent*() = grp)
}
}
/** A class to test whether a regular expression matches certain HTML tags. */
class HTMLMatchingRegExp extends RegexpMatching::MatchedRegExp {
HTMLMatchingRegExp() {
// the regexp must mention "<" and ">" explicitly.
forall(string angleBracket | angleBracket = ["<", ">"] |
any(RegExpConstant term | term.getValue().matches("%" + angleBracket + "%")).getRootTerm() =
this
)
}
override predicate testWithGroups(string str, boolean ignorePrefix) {
ignorePrefix = true and
str = ["<!-- foo -->", "<!-- foo --!>", "<!- foo ->", "<foo>", "<script>"]
}
override predicate test(string str, boolean ignorePrefix) {
ignorePrefix = true and
str =
[
"<!-- foo -->", "<!- foo ->", "<!-- foo --!>", "<!-- foo\n -->", "<script>foo</script>",
"<script \n>foo</script>", "<script >foo\n</script>", "<foo ></foo>", "<foo>",
"<foo src=\"foo\"></foo>", "<script>", "<script src=\"foo\"></script>",
"<script src='foo'></script>", "<SCRIPT>foo</SCRIPT>", "<script\tsrc=\"foo\"/>",
"<script\tsrc='foo'></script>", "<sCrIpT>foo</ScRiPt>", "<script src=\"foo\">foo</script >",
"<script src=\"foo\">foo</script foo=\"bar\">", "<script src=\"foo\">foo</script\t\n bar>"
]
}
}
/**
* Holds if `regexp` matches some HTML tags, but misses some HTML tags that it should match.
*
* When adding a new case to this predicate, make sure the test string used in `matches(..)` calls are present in `HTMLMatchingRegExp::test` / `HTMLMatchingRegExp::testWithGroups`.
*/
predicate isBadRegexpFilter(HTMLMatchingRegExp regexp, string msg) {
// CVE-2021-33829 - matching both "<!-- foo -->" and "<!-- foo --!>", but in different capture groups
regexp.matches("<!-- foo -->") and
regexp.matches("<!-- foo --!>") and
exists(int a, int b | a != b |
regexp.fillsCaptureGroup("<!-- foo -->", a) and
// <!-- foo --> might be ambigously parsed (matching both capture groups), and that is ok here.
regexp.fillsCaptureGroup("<!-- foo --!>", b) and
not regexp.fillsCaptureGroup("<!-- foo --!>", a) and
msg =
"Comments ending with --> are matched differently from comments ending with --!>. The first is matched with capture group "
+ a + " and comments ending with --!> are matched with capture group " +
strictconcat(int i | regexp.fillsCaptureGroup("<!-- foo --!>", i) | i.toString(), ", ") +
"."
)
or
// CVE-2020-17480 - matching "<!-- foo -->" and other tags, but not "<!-- foo --!>".
exists(int group, int other |
group != other and
regexp.fillsCaptureGroup("<!-- foo -->", group) and
regexp.fillsCaptureGroup("<foo>", other) and
not regexp.matches("<!-- foo --!>") and
not regexp.fillsCaptureGroup("<!-- foo -->", any(int i | i != group)) and
not regexp.fillsCaptureGroup("<!- foo ->", group) and
not regexp.fillsCaptureGroup("<foo>", group) and
not regexp.fillsCaptureGroup("<script>", group) and
msg =
"This regular expression only parses --> (capture group " + group +
") and not --!> as a HTML comment end tag."
)
or
regexp.matches("<!-- foo -->") and
not regexp.matches("<!-- foo\n -->") and
not regexp.matches("<!- foo ->") and
not regexp.matches("<foo>") and
not regexp.matches("<script>") and
msg = "This regular expression does not match comments containing newlines."
or
regexp.matches("<script>foo</script>") and
regexp.matches("<script src=\"foo\"></script>") and
not regexp.matches("<foo ></foo>") and
(
not regexp.matches("<script \n>foo</script>") and
msg = "This regular expression matches <script></script>, but not <script \\n></script>"
or
not regexp.matches("<script >foo\n</script>") and
msg = "This regular expression matches <script>...</script>, but not <script >...\\n</script>"
)
or
regexp.matches("<script>foo</script>") and
regexp.matches("<script src=\"foo\"></script>") and
not regexp.matches("<script src='foo'></script>") and
not regexp.matches("<foo>") and
msg = "This regular expression does not match script tags where the attribute uses single-quotes."
or
regexp.matches("<script>foo</script>") and
regexp.matches("<script src='foo'></script>") and
not regexp.matches("<script src=\"foo\"></script>") and
not regexp.matches("<foo>") and
msg = "This regular expression does not match script tags where the attribute uses double-quotes."
or
regexp.matches("<script>foo</script>") and
regexp.matches("<script src='foo'></script>") and
not regexp.matches("<script\tsrc='foo'></script>") and
not regexp.matches("<foo>") and
not regexp.matches("<foo src=\"foo\"></foo>") and
msg = "This regular expression does not match script tags where tabs are used between attributes."
or
regexp.matches("<script>foo</script>") and
not RegExpFlags::isIgnoreCase(regexp) and
not regexp.matches("<foo>") and
not regexp.matches("<foo ></foo>") and
(
not regexp.matches("<SCRIPT>foo</SCRIPT>") and
msg = "This regular expression does not match upper case <SCRIPT> tags."
or
not regexp.matches("<sCrIpT>foo</ScRiPt>") and
regexp.matches("<SCRIPT>foo</SCRIPT>") and
msg = "This regular expression does not match mixed case <sCrIpT> tags."
)
or
regexp.matches("<script src=\"foo\"></script>") and
not regexp.matches("<foo>") and
not regexp.matches("<foo ></foo>") and
(
not regexp.matches("<script src=\"foo\">foo</script >") and
msg = "This regular expression does not match script end tags like </script >."
or
not regexp.matches("<script src=\"foo\">foo</script foo=\"bar\">") and
msg = "This regular expression does not match script end tags like </script foo=\"bar\">."
or
not regexp.matches("<script src=\"foo\">foo</script\t\n bar>") and
msg = "This regular expression does not match script end tags like </script\\t\\n bar>."
)
}

View File

@@ -4,7 +4,7 @@ import semmle.python.security.SensitiveData
import semmle.python.dataflow.Files
import semmle.python.web.Http
module ClearTextStorage {
deprecated module ClearTextStorage {
abstract class Sink extends TaintSink {
override predicate sinks(TaintKind kind) { kind instanceof SensitiveData }
}
@@ -26,7 +26,7 @@ module ClearTextStorage {
}
}
module ClearTextLogging {
deprecated module ClearTextLogging {
abstract class Sink extends TaintSink {
override predicate sinks(TaintKind kind) { kind instanceof SensitiveData }
}

View File

@@ -3,12 +3,12 @@ import semmle.python.dataflow.TaintTracking
private import semmle.python.security.SensitiveData
private import semmle.crypto.Crypto as CryptoLib
abstract class WeakCryptoSink extends TaintSink {
abstract deprecated class WeakCryptoSink extends TaintSink {
override predicate sinks(TaintKind taint) { taint instanceof SensitiveData }
}
/** Modeling the 'pycrypto' package https://github.com/dlitz/pycrypto (latest release 2013) */
module Pycrypto {
deprecated module Pycrypto {
ModuleValue cipher(string name) { result = Module::named("Crypto.Cipher").attr(name) }
class CipherInstance extends TaintKind {
@@ -58,7 +58,7 @@ module Pycrypto {
}
}
module Cryptography {
deprecated module Cryptography {
ModuleValue ciphers() {
result = Module::named("cryptography.hazmat.primitives.ciphers") and
result.isPackage()
@@ -128,7 +128,7 @@ module Cryptography {
}
}
private class CipherConfig extends TaintTracking::Configuration {
deprecated private class CipherConfig extends TaintTracking::Configuration {
CipherConfig() { this = "Crypto cipher config" }
override predicate isSource(TaintTracking::Source source) {

View File

@@ -7,13 +7,15 @@ import python
import semmle.python.dataflow.TaintTracking
import semmle.python.security.strings.Basic
private Value traceback_function(string name) { result = Module::named("traceback").attr(name) }
deprecated private Value traceback_function(string name) {
result = Module::named("traceback").attr(name)
}
/**
* This represents information relating to an exception, for instance the
* message, arguments or parts of the exception traceback.
*/
class ExceptionInfo extends StringKind {
deprecated class ExceptionInfo extends StringKind {
ExceptionInfo() { this = "exception.info" }
override string repr() { result = "exception info" }
@@ -23,12 +25,12 @@ class ExceptionInfo extends StringKind {
* A class representing sources of information about
* execution state exposed in tracebacks and the like.
*/
abstract class ErrorInfoSource extends TaintSource { }
abstract deprecated class ErrorInfoSource extends TaintSource { }
/**
* This kind represents exceptions themselves.
*/
class ExceptionKind extends TaintKind {
deprecated class ExceptionKind extends TaintKind {
ExceptionKind() { this = "exception.kind" }
override string repr() { result = "exception" }
@@ -44,7 +46,7 @@ class ExceptionKind extends TaintKind {
* A source of exception objects, either explicitly created, or captured by an
* `except` statement.
*/
class ExceptionSource extends ErrorInfoSource {
deprecated class ExceptionSource extends ErrorInfoSource {
ExceptionSource() {
exists(ClassValue cls |
cls.getASuperType() = ClassValue::baseException() and
@@ -63,7 +65,7 @@ class ExceptionSource extends ErrorInfoSource {
* Represents a sequence of pieces of information relating to an exception,
* for instance the contents of the `args` attribute, or the stack trace.
*/
class ExceptionInfoSequence extends SequenceKind {
deprecated class ExceptionInfoSequence extends SequenceKind {
ExceptionInfoSequence() { this.getItem() instanceof ExceptionInfo }
}
@@ -71,7 +73,7 @@ class ExceptionInfoSequence extends SequenceKind {
* Represents calls to functions in the `traceback` module that return
* sequences of exception information.
*/
class CallToTracebackFunction extends ErrorInfoSource {
deprecated class CallToTracebackFunction extends ErrorInfoSource {
CallToTracebackFunction() {
exists(string name |
name in [
@@ -92,7 +94,7 @@ class CallToTracebackFunction extends ErrorInfoSource {
* Represents calls to functions in the `traceback` module that return a single
* string of information about an exception.
*/
class FormattedTracebackSource extends ErrorInfoSource {
deprecated class FormattedTracebackSource extends ErrorInfoSource {
FormattedTracebackSource() { this = traceback_function("format_exc").getACall() }
override string toString() { result = "exception.info.source" }

View File

@@ -1,6 +1,6 @@
import semmle.python.dataflow.Implementation
module TaintTrackingPaths {
deprecated module TaintTrackingPaths {
predicate edge(TaintTrackingNode src, TaintTrackingNode dest, string label) {
exists(TaintTrackingNode source, TaintTrackingNode sink |
source.getConfiguration().hasFlowPath(source, sink) and
@@ -11,6 +11,6 @@ module TaintTrackingPaths {
}
}
query predicate edges(TaintTrackingNode fromnode, TaintTrackingNode tonode) {
deprecated query predicate edges(TaintTrackingNode fromnode, TaintTrackingNode tonode) {
TaintTrackingPaths::edge(fromnode, tonode, _)
}

View File

@@ -15,7 +15,7 @@ import semmle.python.web.HttpRequest
import semmle.python.security.internal.SensitiveDataHeuristics
private import HeuristicNames
abstract class SensitiveData extends TaintKind {
abstract deprecated class SensitiveData extends TaintKind {
bindingset[this]
SensitiveData() { this = this }
@@ -23,7 +23,7 @@ abstract class SensitiveData extends TaintKind {
abstract SensitiveDataClassification getClassification();
}
module SensitiveData {
deprecated module SensitiveData {
class Secret extends SensitiveData {
Secret() { this = "sensitive.data.secret" }
@@ -115,4 +115,4 @@ module SensitiveData {
}
//Backwards compatibility
class SensitiveDataSource = SensitiveData::Source;
deprecated class SensitiveDataSource = SensitiveData::Source;

View File

@@ -1,4 +1,6 @@
/**
* DEPRECATED -- use flow state instead
*
* This defines a `PathGraph` where sinks from `TaintTracking::Configuration`s are identified with
* sources from `TaintTracking2::Configuration`s if they represent the same `ControlFlowNode`.
*
@@ -28,9 +30,11 @@ private newtype TCustomPathNode =
CrossoverNode(DataFlow::Node node) { crossoverNode(node) }
/**
* DEPRECATED: Use flow state instead
*
* A class representing the set of all the path nodes in either config.
*/
class CustomPathNode extends TCustomPathNode {
deprecated class CustomPathNode extends TCustomPathNode {
/** Gets the PathNode if it is in Config1. */
DataFlow::PathNode asNode1() {
this = Config1Node(result) or this = CrossoverNode(result.getNode())
@@ -64,8 +68,12 @@ class CustomPathNode extends TCustomPathNode {
}
}
/** Holds if `(a,b)` is an edge in the graph of data flow path explanations. */
query predicate edges(CustomPathNode a, CustomPathNode b) {
/**
* DEPRECATED: Use flow state instead
*
* Holds if `(a,b)` is an edge in the graph of data flow path explanations.
*/
deprecated query predicate edges(CustomPathNode a, CustomPathNode b) {
// Edge is in Config1 graph
DataFlow::PathGraph::edges(a.asNode1(), b.asNode1())
or
@@ -73,8 +81,12 @@ query predicate edges(CustomPathNode a, CustomPathNode b) {
DataFlow2::PathGraph::edges(a.asNode2(), b.asNode2())
}
/** Holds if `n` is a node in the graph of data flow path explanations. */
query predicate nodes(CustomPathNode n, string key, string val) {
/**
* DEPRECATED: Use flow state instead
*
* Holds if `n` is a node in the graph of data flow path explanations.
*/
deprecated query predicate nodes(CustomPathNode n, string key, string val) {
// Node is in Config1 graph
DataFlow::PathGraph::nodes(n.asNode1(), key, val)
or

View File

@@ -40,6 +40,10 @@ module CleartextLogging {
* A source of sensitive data, considered as a flow source.
*/
class SensitiveDataSourceAsSource extends Source, SensitiveDataSource {
SensitiveDataSourceAsSource() {
not SensitiveDataSource.super.getClassification() = SensitiveDataClassification::id()
}
override SensitiveDataClassification getClassification() {
result = SensitiveDataSource.super.getClassification()
}

View File

@@ -39,6 +39,10 @@ module CleartextStorage {
* A source of sensitive data, considered as a flow source.
*/
class SensitiveDataSourceAsSource extends Source, SensitiveDataSource {
SensitiveDataSourceAsSource() {
not SensitiveDataSource.super.getClassification() = SensitiveDataClassification::id()
}
override SensitiveDataClassification getClassification() {
result = SensitiveDataSource.super.getClassification()
}

View File

@@ -0,0 +1,60 @@
/**
* Provides taint-tracking configurations for detecting LDAP injection vulnerabilities
*
* Note, for performance reasons: only import this file if
* `LdapInjection::Configuration` is needed, otherwise
* `LdapInjectionCustomizations` should be imported instead.
*/
import python
import semmle.python.Concepts
import semmle.python.dataflow.new.DataFlow
import semmle.python.dataflow.new.TaintTracking
import semmle.python.dataflow.new.RemoteFlowSources
/**
* Provides aint-tracking configurations for detecting LDAP injection vulnerabilities.class
*
* Two configurations are provided. One is for detecting LDAP injection
* via the distinguished name (DN). The other is for detecting LDAP injection
* via the filter. These require different escapings.
*/
module LdapInjection {
import LdapInjectionCustomizations::LdapInjection
/**
* A taint-tracking configuration for detecting LDAP injection vulnerabilities
* via the distinguished name (DN) parameter of an LDAP search.
*/
class DnConfiguration extends TaintTracking::Configuration {
DnConfiguration() { this = "LdapDnInjection" }
override predicate isSource(DataFlow::Node source) { source instanceof Source }
override predicate isSink(DataFlow::Node sink) { sink instanceof DnSink }
override predicate isSanitizer(DataFlow::Node node) { node instanceof DnSanitizer }
override predicate isSanitizerGuard(DataFlow::BarrierGuard guard) {
guard instanceof DnSanitizerGuard
}
}
/**
* A taint-tracking configuration for detecting LDAP injection vulnerabilities
* via the filter parameter of an LDAP search.
*/
class FilterConfiguration extends TaintTracking::Configuration {
FilterConfiguration() { this = "LdapFilterInjection" }
override predicate isSource(DataFlow::Node source) { source instanceof Source }
override predicate isSink(DataFlow::Node sink) { sink instanceof FilterSink }
override predicate isSanitizer(DataFlow::Node node) { node instanceof FilterSanitizer }
override predicate isSanitizerGuard(DataFlow::BarrierGuard guard) {
guard instanceof FilterSanitizerGuard
}
}
}

View File

@@ -0,0 +1,97 @@
/**
* Provides default sources, sinks and sanitizers for detecting
* "ldap injection"
* vulnerabilities, as well as extension points for adding your own.
*/
private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.Concepts
private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.dataflow.new.BarrierGuards
/**
* Provides default sources, sinks and sanitizers for detecting
* "ldap injection"
* vulnerabilities, as well as extension points for adding your own.
*/
module LdapInjection {
/**
* A data flow source for "ldap injection" vulnerabilities.
*/
abstract class Source extends DataFlow::Node { }
/**
* A data flow sink for "ldap injection" vulnerabilities.
*/
abstract class DnSink extends DataFlow::Node { }
/**
* A data flow sink for "ldap injection" vulnerabilities.
*/
abstract class FilterSink extends DataFlow::Node { }
/**
* A sanitizer for "ldap injection" vulnerabilities.
*/
abstract class DnSanitizer extends DataFlow::Node { }
/**
* A sanitizer for "ldap injection" vulnerabilities.
*/
abstract class FilterSanitizer extends DataFlow::Node { }
/**
* A sanitizer guard for "ldap injection" vulnerabilities.
*/
abstract class DnSanitizerGuard extends DataFlow::BarrierGuard { }
/**
* A sanitizer guard for "ldap injection" vulnerabilities.
*/
abstract class FilterSanitizerGuard extends DataFlow::BarrierGuard { }
/**
* A source of remote user input, considered as a flow source.
*/
class RemoteFlowSourceAsSource extends Source, RemoteFlowSource { }
/**
* A logging operation, considered as a flow sink.
*/
class LdapExecutionAsDnSink extends DnSink {
LdapExecutionAsDnSink() { this = any(LDAP::LdapExecution ldap).getBaseDn() }
}
/**
* A logging operation, considered as a flow sink.
*/
class LdapExecutionAsFilterSink extends FilterSink {
LdapExecutionAsFilterSink() { this = any(LDAP::LdapExecution ldap).getFilter() }
}
/**
* A comparison with a constant string, considered as a sanitizer-guard.
*/
class StringConstCompareAsDnSanitizerGuard extends DnSanitizerGuard, StringConstCompare { }
/**
* A comparison with a constant string, considered as a sanitizer-guard.
*/
class StringConstCompareAsFilterSanitizerGuard extends FilterSanitizerGuard, StringConstCompare {
}
/**
* A call to replace line breaks functions as a sanitizer.
*/
class LdapDnEscapingSanitizer extends DnSanitizer, DataFlow::CallCfgNode {
LdapDnEscapingSanitizer() { this = any(LdapDnEscaping ldapDnEsc).getOutput() }
}
/**
* A call to replace line breaks functions as a sanitizer.
*/
class LdapFilterEscapingSanitizer extends FilterSanitizer, DataFlow::CallCfgNode {
LdapFilterEscapingSanitizer() { this = any(LdapFilterEscaping ldapDnEsc).getOutput() }
}
}

View File

@@ -0,0 +1,35 @@
/**
* Provides a taint-tracking configuration for tracking untrusted user input used in log entries.
*
* Note, for performance reasons: only import this file if
* `LogInjection::Configuration` is needed, otherwise
* `LogInjectionCustomizations` should be imported instead.
*/
import python
import semmle.python.dataflow.new.DataFlow
import semmle.python.dataflow.new.TaintTracking
/**
* Provides a taint-tracking configuration for tracking untrusted user input used in log entries.
*/
module LogInjection {
import LogInjectionCustomizations::LogInjection
/**
* A taint-tracking configuration for tracking untrusted user input used in log entries.
*/
class Configuration extends TaintTracking::Configuration {
Configuration() { this = "LogInjection" }
override predicate isSource(DataFlow::Node source) { source instanceof Source }
override predicate isSink(DataFlow::Node sink) { sink instanceof Sink }
override predicate isSanitizer(DataFlow::Node node) { node instanceof Sanitizer }
override predicate isSanitizerGuard(DataFlow::BarrierGuard guard) {
guard instanceof SanitizerGuard
}
}
}

View File

@@ -0,0 +1,73 @@
/**
* Provides default sources, sinks and sanitizers for detecting
* "log injection"
* vulnerabilities, as well as extension points for adding your own.
*/
private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.Concepts
private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.dataflow.new.BarrierGuards
/**
* Provides default sources, sinks and sanitizers for detecting
* "log injection"
* vulnerabilities, as well as extension points for adding your own.
*/
module LogInjection {
/**
* A data flow source for "log injection" vulnerabilities.
*/
abstract class Source extends DataFlow::Node { }
/**
* A data flow sink for "log injection" vulnerabilities.
*/
abstract class Sink extends DataFlow::Node { }
/**
* A sanitizer for "log injection" vulnerabilities.
*/
abstract class Sanitizer extends DataFlow::Node { }
/**
* A sanitizer guard for "log injection" vulnerabilities.
*/
abstract class SanitizerGuard extends DataFlow::BarrierGuard { }
/**
* A source of remote user input, considered as a flow source.
*/
class RemoteFlowSourceAsSource extends Source, RemoteFlowSource { }
/**
* A logging operation, considered as a flow sink.
*/
class LoggingAsSink extends Sink {
LoggingAsSink() { this = any(Logging write).getAnInput() }
}
/**
* A comparison with a constant string, considered as a sanitizer-guard.
*/
class StringConstCompareAsSanitizerGuard extends SanitizerGuard, StringConstCompare { }
/**
* A call to replace line breaks, considered as a sanitizer.
*/
class ReplaceLineBreaksSanitizer extends Sanitizer, DataFlow::CallCfgNode {
// Note: This sanitizer is not 100% accurate, since:
// - we do not check that all kinds of line breaks are replaced
// - we do not check that one kind of line breaks is not replaced by another
//
// However, we lack a simple way to do better, and the query would likely
// be too noisy without this.
//
// TODO: Consider rewriting using flow states.
ReplaceLineBreaksSanitizer() {
this.getFunction().(DataFlow::AttrRead).getAttributeName() = "replace" and
this.getArg(0).asExpr().(StrConst).getText() in ["\r\n", "\n"]
}
}
}

View File

@@ -2,24 +2,102 @@
* Provides taint-tracking configurations for detecting "path injection" vulnerabilities.
*
* Note, for performance reasons: only import this file if
* the Configurations or the `pathInjection` predicate are needed, otherwise
* `PathInjection::Configuration` is needed, otherwise
* `PathInjectionCustomizations` should be imported instead.
*/
private import python
private import semmle.python.Concepts
private import semmle.python.dataflow.new.DataFlow
import semmle.python.dataflow.new.DataFlow
import semmle.python.dataflow.new.TaintTracking
/**
* Provides a taint-tracking configuration for detecting "path injection" vulnerabilities.
*/
module PathInjection {
import PathInjectionCustomizations::PathInjection
/**
* A taint-tracking configuration for detecting "path injection" vulnerabilities.
*
* This configuration uses two flow states, `NotNormalized` and `NormalizedUnchecked`,
* to track the requirement that a file path must be first normalized and then checked
* before it is safe to use.
*
* At sources, paths are assumed not normalized. At normalization points, they change
* state to `NormalizedUnchecked` after which they can be made safe by an appropriate
* check of the prefix.
*
* Such checks are ineffective in the `NotNormalized` state.
*/
class Configuration extends TaintTracking::Configuration {
Configuration() { this = "PathInjection" }
override predicate isSource(DataFlow::Node source, DataFlow::FlowState state) {
source instanceof Source and state instanceof NotNormalized
}
override predicate isSink(DataFlow::Node sink, DataFlow::FlowState state) {
sink instanceof Sink and
(
state instanceof NotNormalized or
state instanceof NormalizedUnchecked
)
}
override predicate isSanitizer(DataFlow::Node node) { node instanceof Sanitizer }
override predicate isBarrier(DataFlow::Node node, DataFlow::FlowState state) {
// Block `NotNormalized` paths here, since they change state to `NormalizedUnchecked`
node instanceof Path::PathNormalization and
state instanceof NotNormalized
or
node = any(Path::SafeAccessCheck c).getAGuardedNode() and
state instanceof NormalizedUnchecked
}
override predicate isSanitizerGuard(DataFlow::BarrierGuard guard) {
guard instanceof SanitizerGuard
}
override predicate isAdditionalFlowStep(
DataFlow::Node nodeFrom, DataFlow::FlowState stateFrom, DataFlow::Node nodeTo,
DataFlow::FlowState stateTo
) {
nodeFrom = nodeTo.(Path::PathNormalization).getPathArg() and
stateFrom instanceof NotNormalized and
stateTo instanceof NormalizedUnchecked
}
}
/** A state signifying that the file path has not been normalized. */
class NotNormalized extends DataFlow::FlowState {
NotNormalized() { this = "NotNormalized" }
}
/** A state signifying that the file path has been normalized, but not checked. */
class NormalizedUnchecked extends DataFlow::FlowState {
NormalizedUnchecked() { this = "NormalizedUnchecked" }
}
}
// ---------------------------------------------------------------------------
// Old, deprecated code
// ---------------------------------------------------------------------------
private import semmle.python.dataflow.new.DataFlow2
private import semmle.python.dataflow.new.TaintTracking
private import semmle.python.dataflow.new.TaintTracking2
import ChainedConfigs12
private import ChainedConfigs12
import PathInjectionCustomizations::PathInjection
// ---------------------------------------------------------------------------
// Case 1. The path is never normalized.
// ---------------------------------------------------------------------------
/** Configuration to find paths from sources to sinks that contain no normalization. */
class PathNotNormalizedConfiguration extends TaintTracking::Configuration {
/**
* DEPRECATED: Use `PathInjection::Configuration` instead
*
* Configuration to find paths from sources to sinks that contain no normalization.
*/
deprecated class PathNotNormalizedConfiguration extends TaintTracking::Configuration {
PathNotNormalizedConfiguration() { this = "PathNotNormalizedConfiguration" }
override predicate isSource(DataFlow::Node source) { source instanceof Source }
@@ -38,18 +116,24 @@ class PathNotNormalizedConfiguration extends TaintTracking::Configuration {
}
/**
* DEPRECATED: Use `PathInjection::Configuration` instead
*
* Holds if there is a path injection from source to sink, where the (python) path is
* not normalized.
*/
predicate pathNotNormalized(CustomPathNode source, CustomPathNode sink) {
deprecated predicate pathNotNormalized(CustomPathNode source, CustomPathNode sink) {
any(PathNotNormalizedConfiguration config).hasFlowPath(source.asNode1(), sink.asNode1())
}
// ---------------------------------------------------------------------------
// Case 2. The path is normalized at least once, but never checked afterwards.
// ---------------------------------------------------------------------------
/** Configuration to find paths from sources to normalizations that contain no prior normalizations. */
class FirstNormalizationConfiguration extends TaintTracking::Configuration {
/**
* DEPRECATED: Use `PathInjection::Configuration` instead
*
* Configuration to find paths from sources to normalizations that contain no prior normalizations.
*/
deprecated class FirstNormalizationConfiguration extends TaintTracking::Configuration {
FirstNormalizationConfiguration() { this = "FirstNormalizationConfiguration" }
override predicate isSource(DataFlow::Node source) { source instanceof Source }
@@ -65,8 +149,12 @@ class FirstNormalizationConfiguration extends TaintTracking::Configuration {
}
}
/** Configuration to find paths from normalizations to sinks that do not go through a check. */
class NormalizedPathNotCheckedConfiguration extends TaintTracking2::Configuration {
/**
* DEPRECATED: Use `PathInjection::Configuration` instead
*
* Configuration to find paths from normalizations to sinks that do not go through a check.
*/
deprecated class NormalizedPathNotCheckedConfiguration extends TaintTracking2::Configuration {
NormalizedPathNotCheckedConfiguration() { this = "NormalizedPathNotCheckedConfiguration" }
override predicate isSource(DataFlow::Node source) { source instanceof Path::PathNormalization }
@@ -83,10 +171,12 @@ class NormalizedPathNotCheckedConfiguration extends TaintTracking2::Configuratio
}
/**
* DEPRECATED: Use `PathInjection::Configuration` instead
*
* Holds if there is a path injection from source to sink, where the (python) path is
* normalized at least once, but never checked afterwards.
*/
predicate pathNotCheckedAfterNormalization(CustomPathNode source, CustomPathNode sink) {
deprecated predicate pathNotCheckedAfterNormalization(CustomPathNode source, CustomPathNode sink) {
exists(
FirstNormalizationConfiguration config, DataFlow::PathNode mid1, DataFlow2::PathNode mid2,
NormalizedPathNotCheckedConfiguration config2
@@ -100,8 +190,12 @@ predicate pathNotCheckedAfterNormalization(CustomPathNode source, CustomPathNode
// ---------------------------------------------------------------------------
// Query: Either case 1 or case 2.
// ---------------------------------------------------------------------------
/** Holds if there is a path injection from source to sink */
predicate pathInjection(CustomPathNode source, CustomPathNode sink) {
/**
* DEPRECATED: Use `PathInjection::Configuration` instead
*
* Holds if there is a path injection from source to sink
*/
deprecated predicate pathInjection(CustomPathNode source, CustomPathNode sink) {
pathNotNormalized(source, sink)
or
pathNotCheckedAfterNormalization(source, sink)

View File

@@ -0,0 +1,37 @@
/**
* Provides a taint-tracking configuration for detecting regular expression injection
* vulnerabilities.
*
* Note, for performance reasons: only import this file if
* `RegexInjection::Configuration` is needed, otherwise
* `RegexInjectionCustomizations` should be imported instead.
*/
private import python
import semmle.python.dataflow.new.DataFlow
import semmle.python.dataflow.new.TaintTracking
/**
* Provides a taint-tracking configuration for detecting regular expression injection
* vulnerabilities.
*/
module RegexInjection {
import RegexInjectionCustomizations::RegexInjection
/**
* A taint-tracking configuration for detecting "reflected server-side cross-site scripting" vulnerabilities.
*/
class Configuration extends TaintTracking::Configuration {
Configuration() { this = "RegexInjection" }
override predicate isSource(DataFlow::Node source) { source instanceof Source }
override predicate isSink(DataFlow::Node sink) { sink instanceof Sink }
override predicate isSanitizer(DataFlow::Node node) { node instanceof Sanitizer }
override predicate isSanitizerGuard(DataFlow::BarrierGuard guard) {
guard instanceof SanitizerGuard
}
}
}

View File

@@ -0,0 +1,62 @@
/**
* Provides default sources, sinks and sanitizers for detecting
* "regular expression injection"
* vulnerabilities, as well as extension points for adding your own.
*/
private import python
private import semmle.python.Concepts
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.TaintTracking
private import semmle.python.dataflow.new.RemoteFlowSources
/**
* Provides default sources, sinks and sanitizers for detecting
* "regular expression injection"
* vulnerabilities, as well as extension points for adding your own.
*/
module RegexInjection {
/**
* A data flow source for "regular expression injection" vulnerabilities.
*/
abstract class Source extends DataFlow::Node { }
/**
* A sink for "regular expression injection" vulnerabilities is the execution of a regular expression.
* If you have a custom way to execute regular expressions, you can extend `RegexExecution::Range`.
*/
class Sink extends DataFlow::Node {
RegexExecution regexExecution;
Sink() { this = regexExecution.getRegex() }
/** Gets the call that executes the regular expression marked by this sink. */
RegexExecution getRegexExecution() { result = regexExecution }
}
/**
* A sanitizer for "regular expression injection" vulnerabilities.
*/
abstract class Sanitizer extends DataFlow::Node { }
/**
* A sanitizer guard for "regular expression injection" vulnerabilities.
*/
abstract class SanitizerGuard extends DataFlow::BarrierGuard { }
/**
* A source of remote user input, considered as a flow source.
*/
class RemoteFlowSourceAsSource extends Source, RemoteFlowSource { }
/**
* A regex escaping, considered as a sanitizer.
*/
class RegexEscapingAsSanitizer extends Sanitizer {
RegexEscapingAsSanitizer() {
// Due to use-use flow, we want the output rather than an input
// (so the input can still flow to other sinks).
this = any(RegexEscaping esc).getOutput()
}
}
}

View File

@@ -0,0 +1,84 @@
/**
* Provides a taint-tracking configuration for detecting "Server-side request forgery" vulnerabilities.
*
* Note, for performance reasons: only import this file if
* `ServerSideRequestForgery::Configuration` is needed, otherwise
* `ServerSideRequestForgeryCustomizations` should be imported instead.
*/
private import python
import semmle.python.dataflow.new.DataFlow
import semmle.python.dataflow.new.TaintTracking
import semmle.python.Concepts
/**
* Provides a taint-tracking configuration for detecting "Server-side request forgery" vulnerabilities.
*
* This configuration has a sanitizer to limit results to cases where attacker has full control of URL.
* See `PartialServerSideRequestForgery` for a variant without this requirement.
*
* You should use the `partOfFullyControlledRequest` to only select results where all
* URL parts are fully controlled.
*/
module FullServerSideRequestForgery {
import ServerSideRequestForgeryCustomizations::ServerSideRequestForgery
/**
* A taint-tracking configuration for detecting "Server-side request forgery" vulnerabilities.
*/
class Configuration extends TaintTracking::Configuration {
Configuration() { this = "FullServerSideRequestForgery" }
override predicate isSource(DataFlow::Node source) { source instanceof Source }
override predicate isSink(DataFlow::Node sink) { sink instanceof Sink }
override predicate isSanitizer(DataFlow::Node node) {
node instanceof Sanitizer
or
node instanceof FullUrlControlSanitizer
}
override predicate isSanitizerGuard(DataFlow::BarrierGuard guard) {
guard instanceof SanitizerGuard
}
}
}
/**
* Holds if all URL parts of `request` is fully user controlled.
*/
predicate fullyControlledRequest(HTTP::Client::Request request) {
exists(FullServerSideRequestForgery::Configuration fullConfig |
forall(DataFlow::Node urlPart | urlPart = request.getAUrlPart() |
fullConfig.hasFlow(_, urlPart)
)
)
}
/**
* Provides a taint-tracking configuration for detecting "Server-side request forgery" vulnerabilities.
*
* This configuration has results, even when the attacker does not have full control over the URL.
* See `FullServerSideRequestForgery` for variant that has this requirement.
*/
module PartialServerSideRequestForgery {
import ServerSideRequestForgeryCustomizations::ServerSideRequestForgery
/**
* A taint-tracking configuration for detecting "Server-side request forgery" vulnerabilities.
*/
class Configuration extends TaintTracking::Configuration {
Configuration() { this = "PartialServerSideRequestForgery" }
override predicate isSource(DataFlow::Node source) { source instanceof Source }
override predicate isSink(DataFlow::Node sink) { sink instanceof Sink }
override predicate isSanitizer(DataFlow::Node node) { node instanceof Sanitizer }
override predicate isSanitizerGuard(DataFlow::BarrierGuard guard) {
guard instanceof SanitizerGuard
}
}
}

View File

@@ -0,0 +1,143 @@
/**
* Provides default sources, sinks and sanitizers for detecting
* "Server-side request forgery"
* vulnerabilities, as well as extension points for adding your own.
*/
private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.Concepts
private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.dataflow.new.BarrierGuards
/**
* Provides default sources, sinks and sanitizers for detecting
* "Server-side request forgery"
* vulnerabilities, as well as extension points for adding your own.
*/
module ServerSideRequestForgery {
/**
* A data flow source for "Server-side request forgery" vulnerabilities.
*/
abstract class Source extends DataFlow::Node { }
/**
* A data flow sink for "Server-side request forgery" vulnerabilities.
*/
abstract class Sink extends DataFlow::Node {
/**
* Gets the request this sink belongs to.
*/
abstract HTTP::Client::Request getRequest();
}
/**
* A sanitizer for "Server-side request forgery" vulnerabilities.
*/
abstract class Sanitizer extends DataFlow::Node { }
/**
* A sanitizer for "Server-side request forgery" vulnerabilities,
* that ensures the attacker does not have full control of the URL. (that is, might
* still be able to control path or query parameters).
*/
abstract class FullUrlControlSanitizer extends DataFlow::Node { }
/**
* A sanitizer guard for "Server-side request forgery" vulnerabilities.
*/
abstract class SanitizerGuard extends DataFlow::BarrierGuard { }
/**
* A source of remote user input, considered as a flow source.
*/
class RemoteFlowSourceAsSource extends Source, RemoteFlowSource { }
/** The URL of an HTTP request, considered as a sink. */
class HttpRequestUrlAsSink extends Sink {
HTTP::Client::Request req;
HttpRequestUrlAsSink() {
req.getAUrlPart() = this and
// if we extract the stdlib code for HTTPConnection, we will also find calls that
// make requests within the HTTPConnection implementation -- for example the
// `request` method calls the `_send_request` method internally. So without this
// extra bit of code, we would give alerts within the HTTPConnection
// implementation as well, which is just annoying.
//
// Notice that we're excluding based on the request location, and not the URL part
// location, since the URL part would be in user code for the scenario above.
//
// See comment for command injection sinks for more details.
not req.getScope().getEnclosingModule().getName() in ["http.client", "httplib"]
}
override HTTP::Client::Request getRequest() { result = req }
}
/**
* A comparison with a constant string, considered as a sanitizer-guard.
*/
class StringConstCompareAsSanitizerGuard extends SanitizerGuard, StringConstCompare { }
/**
* A string construction (concat, format, f-string) where the left side is not
* user-controlled.
*
* For all of these cases, we try to allow `http://` or `https://` on the left side
* since that will still allow full URL control.
*/
class StringConstructionAsFullUrlControlSanitizer extends FullUrlControlSanitizer {
StringConstructionAsFullUrlControlSanitizer() {
// string concat
exists(BinaryExprNode add |
add.getOp() instanceof Add and
add.getRight() = this.asCfgNode() and
not add.getLeft().getNode().(StrConst).getText().toLowerCase() in ["http://", "https://"]
)
or
// % formatting
exists(BinaryExprNode fmt |
fmt.getOp() instanceof Mod and
fmt.getRight() = this.asCfgNode() and
// detecting %-formatting is not super easy, so we simplify it to only handle
// when there is a **single** substitution going on.
not fmt.getLeft().getNode().(StrConst).getText().regexpMatch("^(?i)https?://%s[^%]*$")
)
or
// arguments to a format call
exists(DataFlow::MethodCallNode call, string httpPrefixRe |
httpPrefixRe = "^(?i)https?://(?:(\\{\\})|\\{([0-9]+)\\}|\\{([^0-9].*)\\}).*$"
|
call.getMethodName() = "format" and
(
if call.getObject().asExpr().(StrConst).getText().regexpMatch(httpPrefixRe)
then
exists(string text | text = call.getObject().asExpr().(StrConst).getText() |
// `http://{}...`
exists(text.regexpCapture(httpPrefixRe, 1)) and
this in [call.getArg(any(int i | i >= 1)), call.getArgByName(_)]
or
// `http://{123}...`
exists(int safeArgIndex | safeArgIndex = text.regexpCapture(httpPrefixRe, 2).toInt() |
this in [call.getArg(any(int i | i != safeArgIndex)), call.getArgByName(_)]
)
or
// `http://{abc}...`
exists(string safeArgName | safeArgName = text.regexpCapture(httpPrefixRe, 3) |
this in [call.getArg(_), call.getArgByName(any(string s | s != safeArgName))]
)
)
else this in [call.getArg(_), call.getArgByName(_)]
)
)
or
// f-string
exists(Fstring fstring |
if fstring.getValue(0).(StrConst).getText().toLowerCase() in ["http://", "https://"]
then fstring.getValue(any(int i | i >= 2)) = this.asExpr()
else fstring.getValue(any(int i | i >= 1)) = this.asExpr()
)
}
}
}

View File

@@ -2,7 +2,7 @@ import python
import semmle.python.security.strings.Basic
/** Assume that taint flows from argument to result for *any* call */
class AnyCallStringFlow extends DataFlowExtension::DataFlowNode {
deprecated class AnyCallStringFlow extends DataFlowExtension::DataFlowNode {
AnyCallStringFlow() { any(CallNode call).getAnArg() = this }
override ControlFlowNode getASuccessorNode() { result.(CallNode).getAnArg() = this }

View File

@@ -11,18 +11,18 @@ import semmle.python.dataflow.TaintTracking
import semmle.python.security.strings.Untrusted
/** Abstract taint sink that is potentially vulnerable to malicious shell commands. */
abstract class CommandSink extends TaintSink { }
abstract deprecated class CommandSink extends TaintSink { }
private ModuleObject osOrPopenModule() { result.getName() = ["os", "popen2"] }
deprecated private ModuleObject osOrPopenModule() { result.getName() = ["os", "popen2"] }
private Object makeOsCall() {
deprecated private Object makeOsCall() {
exists(string name | result = ModuleObject::named("subprocess").attr(name) |
name = ["Popen", "call", "check_call", "check_output", "run"]
)
}
/**Special case for first element in sequence. */
class FirstElementKind extends TaintKind {
deprecated class FirstElementKind extends TaintKind {
FirstElementKind() { this = "sequence[" + any(ExternalStringKind key) + "][0]" }
override string repr() { result = "first item in sequence of " + this.getItem().repr() }
@@ -31,7 +31,7 @@ class FirstElementKind extends TaintKind {
ExternalStringKind getItem() { this = "sequence[" + result + "][0]" }
}
class FirstElementFlow extends DataFlowExtension::DataFlowNode {
deprecated class FirstElementFlow extends DataFlowExtension::DataFlowNode {
FirstElementFlow() { this = any(SequenceNode s).getElement(0) }
override ControlFlowNode getASuccessorNode(TaintKind fromkind, TaintKind tokind) {
@@ -43,7 +43,7 @@ class FirstElementFlow extends DataFlowExtension::DataFlowNode {
* A taint sink that is potentially vulnerable to malicious shell commands.
* The `vuln` in `subprocess.call(shell=vuln)` and similar calls.
*/
class ShellCommand extends CommandSink {
deprecated class ShellCommand extends CommandSink {
override string toString() { result = "shell command" }
ShellCommand() {
@@ -81,7 +81,7 @@ class ShellCommand extends CommandSink {
* A taint sink that is potentially vulnerable to malicious shell commands.
* The `vuln` in `subprocess.call(vuln, ...)` and similar calls.
*/
class OsCommandFirstArgument extends CommandSink {
deprecated class OsCommandFirstArgument extends CommandSink {
override string toString() { result = "OS command first argument" }
OsCommandFirstArgument() {
@@ -111,7 +111,7 @@ class OsCommandFirstArgument extends CommandSink {
* A taint sink that is potentially vulnerable to malicious shell commands.
* The `vuln` in `invoke.run(vuln, ...)` and similar calls.
*/
class InvokeRun extends CommandSink {
deprecated class InvokeRun extends CommandSink {
InvokeRun() {
this = Value::named("invoke.run").(FunctionValue).getArgumentForCall(_, 0)
or
@@ -127,12 +127,12 @@ class InvokeRun extends CommandSink {
* Internal TaintKind to track the invoke.Context instance passed to functions
* marked with @invoke.task
*/
private class InvokeContextArg extends TaintKind {
deprecated private class InvokeContextArg extends TaintKind {
InvokeContextArg() { this = "InvokeContextArg" }
}
/** Internal TaintSource to track the context passed to functions marked with @invoke.task */
private class InvokeContextArgSource extends TaintSource {
deprecated private class InvokeContextArgSource extends TaintSource {
InvokeContextArgSource() {
exists(Function f, Expr decorator |
count(f.getADecorator()) = 1 and
@@ -158,7 +158,7 @@ private class InvokeContextArgSource extends TaintSource {
* A taint sink that is potentially vulnerable to malicious shell commands.
* The `vuln` in `invoke.Context().run(vuln, ...)` and similar calls.
*/
class InvokeContextRun extends CommandSink {
deprecated class InvokeContextRun extends CommandSink {
InvokeContextRun() {
exists(CallNode call |
any(InvokeContextArg k).taints(call.getFunction().(AttrNode).getObject("run"))
@@ -187,7 +187,7 @@ class InvokeContextRun extends CommandSink {
* A taint sink that is potentially vulnerable to malicious shell commands.
* The `vuln` in `fabric.Group().run(vuln, ...)` and similar calls.
*/
class FabricGroupRun extends CommandSink {
deprecated class FabricGroupRun extends CommandSink {
FabricGroupRun() {
exists(ClassValue cls |
cls.getASuperType() = Value::named("fabric.Group") and
@@ -203,7 +203,7 @@ class FabricGroupRun extends CommandSink {
// -------------------------------------------------------------------------- //
// Modeling of the 'invoke' package and 'fabric' package (v 1.x)
// -------------------------------------------------------------------------- //
class FabricV1Commands extends CommandSink {
deprecated class FabricV1Commands extends CommandSink {
FabricV1Commands() {
// since `run` and `sudo` are decorated, we can't use FunctionValue's :(
exists(CallNode call |
@@ -228,7 +228,7 @@ class FabricV1Commands extends CommandSink {
* An extension that propagates taint from the arguments of `fabric.api.execute(func, arg0, arg1, ...)`
* to the parameters of `func`, since this will call `func(arg0, arg1, ...)`.
*/
class FabricExecuteExtension extends DataFlowExtension::DataFlowNode {
deprecated class FabricExecuteExtension extends DataFlowExtension::DataFlowNode {
CallNode call;
FabricExecuteExtension() {

View File

@@ -2,7 +2,7 @@ import python
import semmle.python.dataflow.TaintTracking
/** `pickle.loads(untrusted)` vulnerability. */
abstract class DeserializationSink extends TaintSink {
abstract deprecated class DeserializationSink extends TaintSink {
bindingset[this]
DeserializationSink() { this = this }
}

View File

@@ -14,7 +14,7 @@ import semmle.python.security.strings.Untrusted
* A taint sink that represents an argument to exec or eval that is vulnerable to malicious input.
* The `vuln` in `exec(vuln)` or similar.
*/
class StringEvaluationNode extends TaintSink {
deprecated class StringEvaluationNode extends TaintSink {
override string toString() { result = "exec or eval" }
StringEvaluationNode() {

View File

@@ -11,13 +11,15 @@ import semmle.python.dataflow.TaintTracking
import semmle.python.security.strings.Untrusted
import semmle.python.security.injection.Deserialization
private FunctionObject marshalLoads() { result = ModuleObject::named("marshal").attr("loads") }
deprecated private FunctionObject marshalLoads() {
result = ModuleObject::named("marshal").attr("loads")
}
/**
* A taint sink that is potentially vulnerable to malicious marshaled objects.
* The `vuln` in `marshal.loads(vuln)`.
*/
class UnmarshalingNode extends DeserializationSink {
deprecated class UnmarshalingNode extends DeserializationSink {
override string toString() { result = "unmarshaling vulnerability" }
UnmarshalingNode() {

View File

@@ -6,7 +6,7 @@ import semmle.python.security.strings.Untrusted
* Prevents taint flowing through ntpath.normpath()
* NormalizedPath below handles that case.
*/
class PathSanitizer extends Sanitizer {
deprecated class PathSanitizer extends Sanitizer {
PathSanitizer() { this = "path.sanitizer" }
override predicate sanitizingNode(TaintKind taint, ControlFlowNode node) {
@@ -15,7 +15,7 @@ class PathSanitizer extends Sanitizer {
}
}
private FunctionObject abspath() {
deprecated private FunctionObject abspath() {
exists(ModuleObject os_path | ModuleObject::named("os").attr("path") = os_path |
os_path.attr("abspath") = result
or
@@ -24,18 +24,18 @@ private FunctionObject abspath() {
}
/** A path that has been normalized, but not verified to be safe */
class NormalizedPath extends TaintKind {
deprecated class NormalizedPath extends TaintKind {
NormalizedPath() { this = "normalized.path.injection" }
override string repr() { result = "normalized path" }
}
private predicate abspath_call(CallNode call, ControlFlowNode arg) {
deprecated private predicate abspath_call(CallNode call, ControlFlowNode arg) {
call.getFunction().refersTo(abspath()) and
arg = call.getArg(0)
}
class AbsPath extends DataFlowExtension::DataFlowNode {
deprecated class AbsPath extends DataFlowExtension::DataFlowNode {
AbsPath() { abspath_call(_, this) }
override ControlFlowNode getASuccessorNode(TaintKind fromkind, TaintKind tokind) {
@@ -45,7 +45,7 @@ class AbsPath extends DataFlowExtension::DataFlowNode {
}
}
class NormalizedPathSanitizer extends Sanitizer {
deprecated class NormalizedPathSanitizer extends Sanitizer {
NormalizedPathSanitizer() { this = "normalized.path.sanitizer" }
override predicate sanitizingEdge(TaintKind taint, PyEdgeRefinement test) {
@@ -59,7 +59,7 @@ class NormalizedPathSanitizer extends Sanitizer {
* A taint sink that is vulnerable to malicious paths.
* The `vuln` in `open(vuln)` and similar.
*/
class OpenNode extends TaintSink {
deprecated class OpenNode extends TaintSink {
override string toString() { result = "argument to open()" }
OpenNode() {

View File

@@ -11,7 +11,7 @@ import semmle.python.dataflow.TaintTracking
import semmle.python.security.strings.Untrusted
import semmle.python.security.injection.Deserialization
private ModuleObject pickleModule() {
deprecated private ModuleObject pickleModule() {
result.getName() = "pickle"
or
result.getName() = "cPickle"
@@ -19,10 +19,10 @@ private ModuleObject pickleModule() {
result.getName() = "dill"
}
private FunctionObject pickleLoads() { result = pickleModule().attr("loads") }
deprecated private FunctionObject pickleLoads() { result = pickleModule().attr("loads") }
/** `pickle.loads(untrusted)` vulnerability. */
class UnpicklingNode extends DeserializationSink {
deprecated class UnpicklingNode extends DeserializationSink {
override string toString() { result = "unpickling untrusted data" }
UnpicklingNode() {

View File

@@ -1,37 +1,6 @@
/**
* Provides a taint-tracking configuration for detecting regular expression injection
* vulnerabilities.
*
* Note, for performance reasons: only import this file if
* `RegexInjection::Configuration` is needed, otherwise
* `RegexInjectionCustomizations` should be imported instead.
*/
/** DEPRECATED: use semmle.python.security.dataflow.RegexInjection instead. */
private import python
import semmle.python.dataflow.new.DataFlow
import semmle.python.dataflow.new.TaintTracking
private import semmle.python.security.dataflow.RegexInjection as New
/**
* Provides a taint-tracking configuration for detecting regular expression injection
* vulnerabilities.
*/
module RegexInjection {
import RegexInjectionCustomizations::RegexInjection
/**
* A taint-tracking configuration for detecting "reflected server-side cross-site scripting" vulnerabilities.
*/
class Configuration extends TaintTracking::Configuration {
Configuration() { this = "RegexInjection" }
override predicate isSource(DataFlow::Node source) { source instanceof Source }
override predicate isSink(DataFlow::Node sink) { sink instanceof Sink }
override predicate isSanitizer(DataFlow::Node node) { node instanceof Sanitizer }
override predicate isSanitizerGuard(DataFlow::BarrierGuard guard) {
guard instanceof SanitizerGuard
}
}
}
/** DEPRECATED: use semmle.python.security.dataflow.RegexInjection instead. */
deprecated module RegexInjection = New::RegexInjection;

View File

@@ -1,62 +1,6 @@
/**
* Provides default sources, sinks and sanitizers for detecting
* "regular expression injection"
* vulnerabilities, as well as extension points for adding your own.
*/
/** DEPRECATED: use semmle.python.security.dataflow.RegexInjectionCustomizations instead. */
private import python
private import semmle.python.Concepts
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.TaintTracking
private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.security.dataflow.RegexInjectionCustomizations as New
/**
* Provides default sources, sinks and sanitizers for detecting
* "regular expression injection"
* vulnerabilities, as well as extension points for adding your own.
*/
module RegexInjection {
/**
* A data flow source for "regular expression injection" vulnerabilities.
*/
abstract class Source extends DataFlow::Node { }
/**
* A sink for "regular expression injection" vulnerabilities is the execution of a regular expression.
* If you have a custom way to execute regular expressions, you can extend `RegexExecution::Range`.
*/
class Sink extends DataFlow::Node {
RegexExecution regexExecution;
Sink() { this = regexExecution.getRegex() }
/** Gets the call that executes the regular expression marked by this sink. */
RegexExecution getRegexExecution() { result = regexExecution }
}
/**
* A sanitizer for "regular expression injection" vulnerabilities.
*/
abstract class Sanitizer extends DataFlow::Node { }
/**
* A sanitizer guard for "regular expression injection" vulnerabilities.
*/
abstract class SanitizerGuard extends DataFlow::BarrierGuard { }
/**
* A source of remote user input, considered as a flow source.
*/
class RemoteFlowSourceAsSource extends Source, RemoteFlowSource { }
/**
* A regex escaping, considered as a sanitizer.
*/
class RegexEscapingAsSanitizer extends Sanitizer {
RegexEscapingAsSanitizer() {
// Due to use-use flow, we want the output rather than an input
// (so the input can still flow to other sinks).
this = any(RegexEscaping esc).getOutput()
}
}
}
/** DEPRECATED: use semmle.python.security.dataflow.RegexInjectionCustomizations instead. */
deprecated module RegexInjection = New::RegexInjection;

View File

@@ -11,7 +11,7 @@ import semmle.python.dataflow.TaintTracking
import semmle.python.security.strings.Untrusted
import semmle.python.security.SQL
private StringObject first_part(ControlFlowNode command) {
deprecated private StringObject first_part(ControlFlowNode command) {
command.(BinaryExprNode).getOp() instanceof Add and
command.(BinaryExprNode).getLeft().refersTo(result)
or
@@ -26,7 +26,7 @@ private StringObject first_part(ControlFlowNode command) {
}
/** Holds if `command` appears to be a SQL command string of which `inject` is a part. */
predicate probable_sql_command(ControlFlowNode command, ControlFlowNode inject) {
deprecated predicate probable_sql_command(ControlFlowNode command, ControlFlowNode inject) {
exists(string prefix |
inject = command.getAChild*() and
first_part(command).getText().regexpMatch(" *" + prefix + ".*")
@@ -39,7 +39,7 @@ predicate probable_sql_command(ControlFlowNode command, ControlFlowNode inject)
* A taint kind representing a DB cursor.
* This will be overridden to provide specific kinds of DB cursor.
*/
abstract class DbCursor extends TaintKind {
abstract deprecated class DbCursor extends TaintKind {
bindingset[this]
DbCursor() { any() }
@@ -50,7 +50,7 @@ abstract class DbCursor extends TaintKind {
* A part of a string that appears to be a SQL command and is thus
* vulnerable to malicious input.
*/
class SimpleSqlStringInjection extends SqlInjectionSink {
deprecated class SimpleSqlStringInjection extends SqlInjectionSink {
override string toString() { result = "simple SQL string injection" }
SimpleSqlStringInjection() { probable_sql_command(_, this) }
@@ -62,13 +62,13 @@ class SimpleSqlStringInjection extends SqlInjectionSink {
* A taint source representing sources of DB connections.
* This will be overridden to provide specific kinds of DB connection sources.
*/
abstract class DbConnectionSource extends TaintSource { }
abstract deprecated class DbConnectionSource extends TaintSource { }
/**
* A taint sink that is vulnerable to malicious SQL queries.
* The `vuln` in `db.connection.execute(vuln)` and similar.
*/
class DbConnectionExecuteArgument extends SqlInjectionSink {
deprecated class DbConnectionExecuteArgument extends SqlInjectionSink {
override string toString() { result = "db.connection.execute" }
DbConnectionExecuteArgument() {

Some files were not shown because too many files have changed in this diff Show More