Merge branch 'main' into python-make-annotated-assignment-a-definitionnode

This commit is contained in:
Taus Brock-Nannestad
2021-09-07 15:01:01 +02:00
3293 changed files with 94592 additions and 24185 deletions

View File

@@ -152,4 +152,102 @@
]
},
"Type tracking class": {
"scope": "ql",
"prefix": "type tracking class",
"body": [
"/**",
" * Provides models for the `${TM_SELECTED_TEXT}` class",
" *",
" * See ${1:https://apiref (TODO)}.",
" */",
"module ${TM_SELECTED_TEXT/^(.*)\\.([^.]+)$/$2/} {",
" /** Gets a reference to the `${TM_SELECTED_TEXT}` class. */",
" private API::Node classRef() {",
" result = API::moduleImport(\"${TM_SELECTED_TEXT/\\.([^.]+)/\").getMember(\"$1/g}\")",
" }",
"",
" /**",
" * A source of instances of `${TM_SELECTED_TEXT}`, extend this class to model new instances.",
" *",
" * This can include instantiations of the class, return values from function",
" * calls, or a special parameter that will be set when functions are called by an external",
" * library.",
" *",
" * Use the predicate `${TM_SELECTED_TEXT/^(.*)\\.([^.]+)$/$2/}::instance()` to get references to instances of `${TM_SELECTED_TEXT}`.",
" */",
" abstract class InstanceSource extends DataFlow::LocalSourceNode { }",
"",
" /** A direct instantiation of `${TM_SELECTED_TEXT}`. */",
" private class ClassInstantiation extends InstanceSource, DataFlow::CallCfgNode {",
" ClassInstantiation() { this = classRef().getACall() }",
" }",
"",
" /** Gets a reference to an instance of `${TM_SELECTED_TEXT}`. */",
" private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t) {",
" t.start() and",
" result instanceof InstanceSource",
" or",
" exists(DataFlow::TypeTracker t2 | result = instance(t2).track(t2, t))",
" }",
"",
" /** Gets a reference to an instance of `${TM_SELECTED_TEXT}`. */",
" DataFlow::Node instance() { instance(DataFlow::TypeTracker::end()).flowsTo(result) }",
"",
" /**",
" * Taint propagation for `${TM_SELECTED_TEXT}`.",
" */",
" private class InstanceTaintSteps extends InstanceTaintStepsHelper {",
" InstanceTaintSteps() { this = \"${TM_SELECTED_TEXT}\" }",
" ",
" override DataFlow::Node getInstance() { result = instance() }",
" ",
" override string getAttributeName() { none() }",
" ",
" override string getMethodName() { none() }",
" ",
" override string getAsyncMethodName() { none() }",
" }",
"",
" /**",
" * Extra taint propagation for `${TM_SELECTED_TEXT}`, not covered by `InstanceTaintSteps`.",
" */",
" private class AdditionalTaintStep extends TaintTracking::AdditionalTaintStep {",
" override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {",
" // TODO",
" none()",
" }",
" }",
"}",
],
"description": "Type tracking class (select full class path before inserting)",
},
"foo": {
"scope": "ql",
"prefix": "foo",
"body": [
" /**",
" * Taint propagation for `$1`.",
" */",
" private class InstanceTaintSteps extends InstanceTaintStepsHelper {",
" InstanceTaintSteps() { this = \"$1\" }",
"",
" override DataFlow::Node getInstance() { result = instance() }",
"",
" override string getAttributeName() { none() }",
"",
" override string getMethodName() { none() }",
"",
" override string getAsyncMethodName() { none() }",
" }",
],
},
"API graph .getMember chain": {
"scope": "ql",
"prefix": "api graph .getMember chain",
"body": [
"API::moduleImport(\"${TM_SELECTED_TEXT/\\.([^.]+)/\").getMember(\"$1/g}\")"
],
"description": "API graph .getMember chain (select full path before inserting)",
},
}

View File

@@ -0,0 +1,2 @@
lgtm,codescanning
* Added modeling of raw SQL execution from the PyPI package `peewee`.

View File

@@ -1,3 +1,2 @@
lgtm,codescanning
* The `track` and `backtrack` methods on `LocalSourceNode` have been deprecated. When writing
type trackers, the corresponding methods on `TypeTrackingNode` should be used instead.
* The `track` and `backtrack` methods on `LocalSourceNode` are in the process of being deprecated. When using type trackers, the corresponding methods on `TypeTrackingNode` should be used instead.

View File

@@ -1,3 +1,3 @@
lgtm,codescanning
* Added _Inefficient regular expression_ (`py/redos`) query, which is already available in JavaScript.
* Added _Polynomial regular expression used on uncontrolled data_ (`py/polynomial-redos`), which is already available in JavaScript.
* Added an experimental _Inefficient regular expression_ (`py/redos`) query, which is already available in JavaScript.
* Added an experimental _Polynomial regular expression used on uncontrolled data_ (`py/polynomial-redos`), which is already available in JavaScript.

View File

@@ -0,0 +1,4 @@
---
dependencies: {}
compiled: false
lockVersion: 1.0.0

View File

@@ -1,3 +1,4 @@
name: codeql-python-examples
version: 0.0.0
libraryPathDependencies: codeql-python
name: codeql/python-examples
version: 0.0.2
dependencies:
codeql/python-all: "*"

View File

@@ -0,0 +1,4 @@
---
dependencies: {}
compiled: false
lockVersion: 1.0.0

7
python/ql/lib/qlpack.yml Normal file
View File

@@ -0,0 +1,7 @@
name: codeql/python-all
version: 0.0.2
dbscheme: semmlecode.python.dbscheme
extractor: python
library: true
dependencies:
codeql/python-upgrades: 0.0.2

View File

@@ -49,12 +49,15 @@ abstract class AstNode extends AstNode_ {
/** Whether this contains `inner` syntactically */
predicate contains(AstNode inner) { this.getAChildNode+() = inner }
/** Whether this contains `inner` syntactically and `inner` has the same scope as `this` */
predicate containsInScope(AstNode inner) {
pragma[noinline]
private predicate containsInScope(AstNode inner, Scope scope) {
this.contains(inner) and
this.getScope() = inner.getScope() and
not inner instanceof Scope
not inner instanceof Scope and
scope = this.getScope()
}
/** Whether this contains `inner` syntactically and `inner` has the same scope as `this` */
predicate containsInScope(AstNode inner) { this.containsInScope(inner, inner.getScope()) }
}
/* Parents */

View File

@@ -26,6 +26,7 @@ private import semmle.python.frameworks.Rsa
private import semmle.python.frameworks.Simplejson
private import semmle.python.frameworks.Stdlib
private import semmle.python.frameworks.Tornado
private import semmle.python.frameworks.Peewee
private import semmle.python.frameworks.Twisted
private import semmle.python.frameworks.Ujson
private import semmle.python.frameworks.Yaml

View File

@@ -7,6 +7,10 @@ private import semmle.python.regex
* An element containing a regular expression term, that is, either
* a string literal (parsed as a regular expression)
* or another regular expression term.
*
* For sequences and alternations, we require at least one child.
* Otherwise, we wish to represent the term differently.
* This avoids multiple representations of the same term.
*/
newtype TRegExpParent =
/** A string literal used as a regular expression */
@@ -14,9 +18,18 @@ newtype TRegExpParent =
/** A quantified term */
TRegExpQuantifier(Regex re, int start, int end) { re.qualifiedItem(start, end, _, _) } or
/** A sequence term */
TRegExpSequence(Regex re, int start, int end) { re.sequence(start, end) } or
/** An alternatio term */
TRegExpAlt(Regex re, int start, int end) { re.alternation(start, end) } or
TRegExpSequence(Regex re, int start, int end) {
re.sequence(start, end) and
exists(seqChild(re, start, end, 1)) // if a sequence does not have more than one element, it should be treated as that element instead.
} or
/** An alternation term */
TRegExpAlt(Regex re, int start, int end) {
re.alternation(start, end) and
exists(int part_end |
re.alternationOption(start, end, start, part_end) and
part_end < end
) // if an alternation does not have more than one element, it should be treated as that element instead.
} or
/** A character class term */
TRegExpCharacterClass(Regex re, int start, int end) { re.charSet(start, end) } or
/** A character range term */
@@ -61,6 +74,10 @@ class RegExpLiteral extends TRegExpLiteral, RegExpParent {
predicate isDotAll() { re.getAMode() = "DOTALL" }
predicate isIgnoreCase() { re.getAMode() = "IGNORECASE" }
string getFlags() { result = concat(string mode | mode = re.getAMode() | mode, " | ") }
override Regex getRegex() { result = re }
string getPrimaryQLClass() { result = "RegExpLiteral" }
@@ -89,8 +106,7 @@ class RegExpTerm extends RegExpParent {
or
this = TRegExpQuantifier(re, start, end)
or
this = TRegExpSequence(re, start, end) and
exists(seqChild(re, start, end, 1)) // if a sequence does not have more than one element, it should be treated as that element instead.
this = TRegExpSequence(re, start, end)
or
this = TRegExpSpecialChar(re, start, end)
}
@@ -337,10 +353,7 @@ class RegExpRange extends RegExpQuantifier {
* This is a sequence with the elements `(ECMA|Java)` and `Script`.
*/
class RegExpSequence extends RegExpTerm, TRegExpSequence {
RegExpSequence() {
this = TRegExpSequence(re, start, end) and
exists(seqChild(re, start, end, 1)) // if a sequence does not have more than one element, it should be treated as that element instead.
}
RegExpSequence() { this = TRegExpSequence(re, start, end) }
override RegExpTerm getChild(int i) { result = seqChild(re, start, end, i) }
@@ -836,6 +849,15 @@ class RegExpZeroWidthMatch extends RegExpGroup {
*/
class RegExpSubPattern extends RegExpZeroWidthMatch {
RegExpSubPattern() { not re.emptyGroup(start, end) }
/** Gets the lookahead term. */
RegExpTerm getOperand() {
exists(int in_start, int in_end | re.groupContents(start, end, in_start, in_end) |
result.getRegex() = re and
result.getStart() = in_start and
result.getEnd() = in_end
)
}
}
/**

View File

@@ -104,26 +104,20 @@ class LocalSourceNode extends Node {
}
/**
* DEPRECATED. Use `TypeTrackingNode::track` instead.
*
* Gets a node that this node may flow to using one heap and/or interprocedural step.
*
* See `TypeTracker` for more details about how to use this.
*/
pragma[inline]
deprecated LocalSourceNode track(TypeTracker t2, TypeTracker t) { t = t2.step(this, result) }
LocalSourceNode track(TypeTracker t2, TypeTracker t) { t = t2.step(this, result) }
/**
* DEPRECATED. Use `TypeTrackingNode::backtrack` instead.
*
* Gets a node that may flow into this one using one heap and/or interprocedural step.
*
* See `TypeBackTracker` for more details about how to use this.
*/
pragma[inline]
deprecated LocalSourceNode backtrack(TypeBackTracker t2, TypeBackTracker t) {
t2 = t.step(result, this)
}
LocalSourceNode backtrack(TypeBackTracker t2, TypeBackTracker t) { t2 = t.step(result, this) }
}
/**
@@ -131,40 +125,46 @@ class LocalSourceNode extends Node {
*
* All steps made during type tracking should be between instances of this class.
*/
class TypeTrackingNode extends Node {
TypeTrackingNode() {
this instanceof LocalSourceNode
or
this instanceof ModuleVariableNode
class TypeTrackingNode = LocalSourceNode;
/** Temporary holding ground for the `TypeTrackingNode` class. */
private module FutureWork {
class FutureTypeTrackingNode extends Node {
FutureTypeTrackingNode() {
this instanceof LocalSourceNode
or
this instanceof ModuleVariableNode
}
/**
* Holds if this node can flow to `nodeTo` in one or more local flow steps.
*
* For `ModuleVariableNode`s, the only "local" step is to the node itself.
* For `LocalSourceNode`s, this is the usual notion of local flow.
*/
pragma[inline]
predicate flowsTo(Node node) {
this instanceof ModuleVariableNode and this = node
or
this.(LocalSourceNode).flowsTo(node)
}
/**
* Gets a node that this node may flow to using one heap and/or interprocedural step.
*
* See `TypeTracker` for more details about how to use this.
*/
pragma[inline]
TypeTrackingNode track(TypeTracker t2, TypeTracker t) { t = t2.step(this, result) }
/**
* Gets a node that may flow into this one using one heap and/or interprocedural step.
*
* See `TypeBackTracker` for more details about how to use this.
*/
pragma[inline]
TypeTrackingNode backtrack(TypeBackTracker t2, TypeBackTracker t) { t2 = t.step(result, this) }
}
/**
* Holds if this node can flow to `nodeTo` in one or more local flow steps.
*
* For `ModuleVariableNode`s, the only "local" step is to the node itself.
* For `LocalSourceNode`s, this is the usual notion of local flow.
*/
predicate flowsTo(Node node) {
this instanceof ModuleVariableNode and this = node
or
this.(LocalSourceNode).flowsTo(node)
}
/**
* Gets a node that this node may flow to using one heap and/or interprocedural step.
*
* See `TypeTracker` for more details about how to use this.
*/
pragma[inline]
TypeTrackingNode track(TypeTracker t2, TypeTracker t) { t = t2.step(this, result) }
/**
* Gets a node that may flow into this one using one heap and/or interprocedural step.
*
* See `TypeBackTracker` for more details about how to use this.
*/
pragma[inline]
TypeTrackingNode backtrack(TypeBackTracker t2, TypeBackTracker t) { t2 = t.step(result, this) }
}
cached
@@ -179,11 +179,21 @@ private module Cached {
source = sink
or
exists(Node second |
simpleLocalFlowStep(source, second) and
simpleLocalFlowStep*(second, sink)
localSourceFlowStep(source, second) and
localSourceFlowStep*(second, sink)
)
}
/**
* Helper predicate for `hasLocalSource`. Removes any steps go to module variable reads, as these
* are already local source nodes in their own right.
*/
cached
private predicate localSourceFlowStep(Node nodeFrom, Node nodeTo) {
simpleLocalFlowStep(nodeFrom, nodeTo) and
not nodeTo = any(ModuleVariableNode v).getARead()
}
/**
* Holds if `base` flows to the base of `ref` and `ref` has attribute name `attr`.
*/

View File

@@ -1,6 +1,20 @@
import python
import semmle.python.dataflow.new.DataFlow
/**
* INTERNAL: Do not use.
*
* Provides helper predicates for pretty-printing `DataFlow::Node`s.
*
* Since these have not been performance optimized, please only use them for
* debug-queries or in tests.
*/
private import python
private import semmle.python.dataflow.new.DataFlow
/**
* INTERNAL: Do not use.
*
* Gets the pretty-printed version of the Expr `e`.
*/
string prettyExpr(Expr e) {
not e instanceof Num and
not e instanceof StrConst and
@@ -27,7 +41,9 @@ string prettyExpr(Expr e) {
}
/**
* Gets pretty-printed version of the DataFlow::Node `node`
* INTERNAL: Do not use.
*
* Gets the pretty-printed version of the DataFlow::Node `node`
*/
bindingset[node]
string prettyNode(DataFlow::Node node) {
@@ -35,7 +51,9 @@ string prettyNode(DataFlow::Node node) {
}
/**
* Gets pretty-printed version of the DataFlow::Node `node`, that is suitable for use
* INTERNAL: Do not use.
*
* Gets the pretty-printed version of the DataFlow::Node `node`, that is suitable for use
* with `TestUtilities.InlineExpectationsTest` (that is, no spaces unless required).
*/
bindingset[node]

View File

@@ -46,9 +46,13 @@ private module Cached {
or
copyStep(nodeFrom, nodeTo)
or
forStep(nodeFrom, nodeTo)
DataFlowPrivate::forReadStep(nodeFrom, _, nodeTo)
or
unpackingAssignmentStep(nodeFrom, nodeTo)
DataFlowPrivate::iterableUnpackingReadStep(nodeFrom, _, nodeTo)
or
DataFlowPrivate::iterableUnpackingStoreStep(nodeFrom, _, nodeTo)
or
awaitStep(nodeFrom, nodeTo)
}
}
@@ -201,26 +205,9 @@ predicate copyStep(DataFlow::CfgNode nodeFrom, DataFlow::CfgNode nodeTo) {
}
/**
* Holds if taint can flow from `nodeFrom` to `nodeTo` with a step related to `for`-iteration,
* for example `for x in xs`, or `for x,y in points`.
* Holds if taint can flow from `nodeFrom` to `nodeTo` with an `await`-step,
* such that the whole expression `await x` is tainted if `x` is tainted.
*/
predicate forStep(DataFlow::CfgNode nodeFrom, DataFlow::EssaNode nodeTo) {
exists(EssaNodeDefinition defn, For for |
for.getTarget().getAChildNode*() = defn.getDefiningNode().getNode() and
nodeTo.getVar() = defn and
nodeFrom.asExpr() = for.getIter()
)
}
/**
* Holds if taint can flow from `nodeFrom` to `nodeTo` with a step related to iterable unpacking.
* Only handles normal assignment (`x,y = calc_point()`), since `for x,y in points` is handled by `forStep`.
*/
predicate unpackingAssignmentStep(DataFlow::CfgNode nodeFrom, DataFlow::EssaNode nodeTo) {
// `a, b = myiterable` or `head, *tail = myiterable` (only Python 3)
exists(MultiAssignmentDefinition defn, Assign assign |
assign.getATarget().contains(defn.getDefiningNode().getNode()) and
nodeTo.getVar() = defn and
nodeFrom.asExpr() = assign.getValue()
)
predicate awaitStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
nodeTo.asExpr().(Await).getValue() = nodeFrom.asExpr()
}

Some files were not shown because too many files have changed in this diff Show More