Merge branch 'main' of github.com:github/codeql into SharedDataflow_NestedComprehensions

This commit is contained in:
Rasmus Lerchedahl Petersen
2020-09-10 10:20:55 +02:00
1108 changed files with 45281 additions and 16261 deletions

View File

@@ -0,0 +1,12 @@
<!DOCTYPE qhelp PUBLIC
"-//Semmle//qhelp//EN"
"qhelp.dtd">
<qhelp>
<overview>
<p>
This metric counts the number of lines of commented-out code in each file. Large amounts of
commented-out code often indicate poorly maintained code.
</p>
</overview>
</qhelp>

View File

@@ -0,0 +1,25 @@
<!DOCTYPE qhelp PUBLIC
"-//Semmle//qhelp//EN"
"qhelp.dtd">
<qhelp>
<overview>
<p>
Commented-out code is distracting and confusing for developers who read the surrounding code,
and its significance is often unclear. It will not get compiled or tested when the code around
it changes, so it's likely to break over time. For these reasons, commented-out code should be
avoided.
</p>
</overview>
<recommendation>
<p>
Remove or reinstate the commented-out code. If you want to include a snippet of example code
in a comment, consider enclosing it in quotes or marking it up as appropriate for the source
language.
</p>
</recommendation>
</qhelp>

View File

@@ -0,0 +1,12 @@
<!DOCTYPE qhelp PUBLIC
"-//Semmle//qhelp//EN"
"qhelp.dtd">
<qhelp>
<references>
<li>Mark Needham: <a href="http://www.markhneedham.com/blog/2009/01/17/the-danger-of-commenting-out-code/">The danger of commenting out code</a>.</li>
<li>Los Techies: <a href="http://lostechies.com/rodpaddock/2010/12/29/commented-code-technical-debt">Commented Code == Technical Debt</a>.</li>
<li>High Integrity C++ Coding Standard: <a href="http://www.codingstandard.com/rule/2-3-2-do-not-comment-out-code/">2.3.2 Do not comment out code</a>.</li>
</references>
</qhelp>

View File

@@ -12,6 +12,5 @@ a poorly designed or hastily written code base, which typically suffers from oth
problems as well.
</p>
</overview>
</qhelp>

View File

@@ -0,0 +1,35 @@
<!DOCTYPE qhelp PUBLIC
"-//Semmle//qhelp//EN"
"qhelp.dtd">
<qhelp>
<overview>
<p>
This metric measures the number of lines in a file that are contained within a block that is duplicated elsewhere. These lines may include code, comments and whitespace, and the duplicate block may be in this file or in another file.
</p>
<p>
A file that contains many lines that are duplicated within the code base is problematic
for a number of reasons.
</p>
</overview>
<include src="DuplicationProblems.qhelp" />
<recommendation>
<p>
Refactor files with lots of duplicated code to extract the common code into
a shared library or module.
</p>
</recommendation>
<references>
<li>Wikipedia: <a href="http://en.wikipedia.org/wiki/Duplicate_code">Duplicate code</a>.</li>
<li>M. Fowler, <em>Refactoring</em>. Addison-Wesley, 1999.</li>
</references>
</qhelp>

View File

@@ -6,7 +6,7 @@
<overview>
<p>
Sanitizing untrusted URLs is an important technique for
Sanitizing untrusted URLs is a common technique for
preventing attacks such as request forgeries and malicious
redirections. Often, this is done by checking that the host of a URL
is in a set of allowed hosts.

View File

@@ -6,7 +6,7 @@
<overview>
<p>
Sanitizing untrusted URLs is an important technique for
Sanitizing untrusted URLs is a common technique for
preventing attacks such as request forgeries and malicious
redirections. Usually, this is done by checking that the host of a URL
is in a set of allowed hosts.

View File

@@ -0,0 +1,20 @@
/**
* Contains customizations to the standard library.
*
* This module is imported by `python.qll`, so any customizations defined here automatically
* apply to all queries.
*
* Typical examples of customizations include adding new subclasses of abstract classes such as
* the `RemoteFlowSource::Range` and `AdditionalTaintStep` classes associated with the security
* queries to model frameworks that are not covered by the standard library.
*/
import python
/* General import that is useful */
// import experimental.dataflow.DataFlow
//
/* for extending `TaintTracking::AdditionalTaintStep` */
// import experimental.dataflow.TaintTracking
//
/* for extending `RemoteFlowSource::Range` */
// import experimental.dataflow.RemoteFlowSources

View File

@@ -15,7 +15,7 @@
* `DataFlow::localFlowStep` with arguments of type `DataFlow::Node`.
*/
import python
private import python
/**
* Provides classes for performing local (intra-procedural) and

View File

@@ -15,7 +15,7 @@
* `DataFlow::localFlowStep` with arguments of type `DataFlow::Node`.
*/
import python
private import python
/**
* Provides classes for performing local (intra-procedural) and

View File

@@ -0,0 +1,33 @@
private import python
private import experimental.dataflow.DataFlow
// Need to import since frameworks can extend `RemoteFlowSource::Range`
private import experimental.semmle.python.Frameworks
/**
* A data flow source of remote user input.
*
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `RemoteFlowSource::Range` instead.
*/
class RemoteFlowSource extends DataFlow::Node {
RemoteFlowSource::Range self;
RemoteFlowSource() { this = self }
/** Gets a string that describes the type of this remote flow source. */
string getSourceType() { result = self.getSourceType() }
}
/** Provides a class for modeling new sources of remote user input. */
module RemoteFlowSource {
/**
* A data flow source of remote user input.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `RemoteFlowSource` instead.
*/
abstract class Range extends DataFlow::Node {
/** Gets a string that describes the type of this remote flow source. */
abstract string getSourceType();
}
}

View File

@@ -8,7 +8,7 @@
* `TaintTracking::localTaintStep` with arguments of type `DataFlow::Node`.
*/
import python
private import python
/**
* Provides classes for performing local (intra-procedural) and

View File

@@ -0,0 +1,289 @@
/** Step Summaries and Type Tracking */
private import python
private import internal.DataFlowPublic
private import internal.DataFlowPrivate
/** Any string that may appear as the name of an attribute or access path. */
class AttributeName extends string {
AttributeName() { this = any(Attribute a).getName() }
}
/** Either an attribute name, or the empty string (representing no attribute). */
class OptionalAttributeName extends string {
OptionalAttributeName() { this instanceof AttributeName or this = "" }
}
/**
* A description of a step on an inter-procedural data flow path.
*/
private newtype TStepSummary =
LevelStep() or
CallStep() or
ReturnStep() or
StoreStep(AttributeName attr) or
LoadStep(AttributeName attr)
/**
* INTERNAL: Use `TypeTracker` or `TypeBackTracker` instead.
*
* A description of a step on an inter-procedural data flow path.
*/
class StepSummary extends TStepSummary {
/** Gets a textual representation of this step summary. */
string toString() {
this instanceof LevelStep and result = "level"
or
this instanceof CallStep and result = "call"
or
this instanceof ReturnStep and result = "return"
or
exists(string attr | this = StoreStep(attr) | result = "store " + attr)
or
exists(string attr | this = LoadStep(attr) | result = "load " + attr)
}
}
module StepSummary {
cached
predicate step(Node nodeFrom, Node nodeTo, StepSummary summary) {
exists(Node mid | EssaFlow::essaFlowStep*(nodeFrom, mid) and smallstep(mid, nodeTo, summary))
}
predicate smallstep(Node nodeFrom, Node nodeTo, StepSummary summary) {
EssaFlow::essaFlowStep(nodeFrom, nodeTo) and
summary = LevelStep()
or
callStep(nodeFrom, nodeTo) and summary = CallStep()
or
returnStep(nodeFrom, nodeTo) and
summary = ReturnStep()
or
exists(string attr |
basicStoreStep(nodeFrom, nodeTo, attr) and
summary = StoreStep(attr)
or
basicLoadStep(nodeFrom, nodeTo, attr) and summary = LoadStep(attr)
)
}
}
/** Holds if `nodeFrom` steps to `nodeTo` by being passed as a parameter in a call. */
predicate callStep(ArgumentNode nodeFrom, ParameterNode nodeTo) {
// TODO: Support special methods?
exists(DataFlowCall call, int i |
nodeFrom.argumentOf(call, i) and nodeTo.isParameterOf(call.getCallable(), i)
)
}
/** Holds if `nodeFrom` steps to `nodeTo` by being returned from a call. */
predicate returnStep(ReturnNode nodeFrom, Node nodeTo) {
exists(DataFlowCall call |
nodeFrom.getEnclosingCallable() = call.getCallable() and nodeTo.asCfgNode() = call.getNode()
)
}
/**
* Holds if `nodeFrom` is being written to the `attr` attribute of the object in `nodeTo`.
*
* Note that the choice of `nodeTo` does not have to make sense "chronologically".
* All we care about is whether the `attr` attribute of `nodeTo` can have a specific type,
* and the assumption is that if a specific type appears here, then any access of that
* particular attribute can yield something of that particular type.
*
* Thus, in an example such as
*
* ```python
* def foo(y):
* x = Foo()
* bar(x)
* x.attr = y
* baz(x)
*
* def bar(x):
* z = x.attr
* ```
* for the attribute write `x.attr = y`, we will have `attr` being the literal string `"attr"`,
* `nodeFrom` will be `y`, and `nodeTo` will be the object `Foo()` created on the first line of the
* function. This means we will track the fact that `x.attr` can have the type of `y` into the
* assignment to `z` inside `bar`, even though this attribute write happens _after_ `bar` is called.
*/
predicate basicStoreStep(Node nodeFrom, Node nodeTo, string attr) {
exists(AttributeAssignment a, Node var |
a.getName() = attr and
EssaFlow::essaFlowStep*(nodeTo, var) and
var.asVar() = a.getInput() and
nodeFrom.asCfgNode() = a.getValue()
)
}
/**
* Holds if `nodeTo` is the result of accessing the `attr` attribute of `nodeFrom`.
*/
predicate basicLoadStep(Node nodeFrom, Node nodeTo, string attr) {
exists(AttrNode s | nodeTo.asCfgNode() = s and s.getObject(attr) = nodeFrom.asCfgNode())
}
/**
* A utility class that is equivalent to `boolean` but does not require type joining.
*/
private class Boolean extends boolean {
Boolean() { this = true or this = false }
}
private newtype TTypeTracker = MkTypeTracker(Boolean hasCall, OptionalAttributeName attr)
/**
* Summary of the steps needed to track a value to a given dataflow node.
*
* This can be used to track objects that implement a certain API in order to
* recognize calls to that API. Note that type-tracking does not by itself provide a
* source/sink relation, that is, it may determine that a node has a given type,
* but it won't determine where that type came from.
*
* It is recommended that all uses of this type are written in the following form,
* for tracking some type `myType`:
* ```
* DataFlow::Node myType(DataFlow::TypeTracker t) {
* t.start() and
* result = < source of myType >
* or
* exists (TypeTracker t2 |
* result = myType(t2).track(t2, t)
* )
* }
*
* DataFlow::Node myType() { result = myType(DataFlow::TypeTracker::end()) }
* ```
*
* Instead of `result = myType(t2).track(t2, t)`, you can also use the equivalent
* `t = t2.step(myType(t2), result)`. If you additionally want to track individual
* intra-procedural steps, use `t = t2.smallstep(myCallback(t2), result)`.
*/
class TypeTracker extends TTypeTracker {
Boolean hasCall;
OptionalAttributeName attr;
TypeTracker() { this = MkTypeTracker(hasCall, attr) }
/** Gets the summary resulting from appending `step` to this type-tracking summary. */
cached
TypeTracker append(StepSummary step) {
step = LevelStep() and result = this
or
step = CallStep() and result = MkTypeTracker(true, attr)
or
step = ReturnStep() and hasCall = false and result = this
or
step = LoadStep(attr) and result = MkTypeTracker(hasCall, "")
or
exists(string p | step = StoreStep(p) and attr = "" and result = MkTypeTracker(hasCall, p))
}
/** Gets a textual representation of this summary. */
string toString() {
exists(string withCall, string withAttr |
(if hasCall = true then withCall = "with" else withCall = "without") and
(if attr != "" then withAttr = " with attribute " + attr else withAttr = "") and
result = "type tracker " + withCall + " call steps" + withAttr
)
}
/**
* Holds if this is the starting point of type tracking.
*/
predicate start() { hasCall = false and attr = "" }
/**
* Holds if this is the starting point of type tracking, and the value starts in the attribute named `attrName`.
* The type tracking only ends after the attribute has been loaded.
*/
predicate startInAttr(AttributeName attrName) { hasCall = false and attr = attrName }
/**
* Holds if this is the starting point of type tracking
* when tracking a parameter into a call, but not out of it.
*/
predicate call() { hasCall = true and attr = "" }
/**
* Holds if this is the end point of type tracking.
*/
predicate end() { attr = "" }
/**
* INTERNAL. DO NOT USE.
*
* Holds if this type has been tracked into a call.
*/
boolean hasCall() { result = hasCall }
/**
* INTERNAL. DO NOT USE.
*
* Gets the attribute associated with this type tracker.
*/
string getAttr() { result = attr }
/**
* Gets a type tracker that starts where this one has left off to allow continued
* tracking.
*
* This predicate is only defined if the type has not been tracked into an attribute.
*/
TypeTracker continue() { attr = "" and result = this }
/**
* Gets the summary that corresponds to having taken a forwards
* heap and/or inter-procedural step from `nodeFrom` to `nodeTo`.
*/
pragma[inline]
TypeTracker step(Node nodeFrom, Node nodeTo) {
exists(StepSummary summary |
StepSummary::step(nodeFrom, nodeTo, summary) and
result = this.append(summary)
)
}
/**
* Gets the summary that corresponds to having taken a forwards
* local, heap and/or inter-procedural step from `nodeFrom` to `nodeTo`.
*
* Unlike `TypeTracker::step`, this predicate exposes all edges
* in the flow graph, and not just the edges between `Node`s.
* It may therefore be less performant.
*
* Type tracking predicates using small steps typically take the following form:
* ```ql
* DataFlow::Node myType(DataFlow::TypeTracker t) {
* t.start() and
* result = < source of myType >
* or
* exists (DataFlow::TypeTracker t2 |
* t = t2.smallstep(myType(t2), result)
* )
* }
*
* DataFlow::Node myType() {
* result = myType(DataFlow::TypeTracker::end())
* }
* ```
*/
pragma[inline]
TypeTracker smallstep(Node nodeFrom, Node nodeTo) {
exists(StepSummary summary |
StepSummary::smallstep(nodeFrom, nodeTo, summary) and
result = this.append(summary)
)
or
EssaFlow::essaFlowStep(nodeFrom, nodeTo) and
result = this
}
}
module TypeTracker {
/**
* Gets a valid end point of type tracking.
*/
TypeTracker end() { result.end() }
}

View File

@@ -123,8 +123,18 @@ module Consistency {
n.getEnclosingCallable() != call.getEnclosingCallable()
}
// This predicate helps the compiler forget that in some languages
// it is impossible for a result of `getPreUpdateNode` to be an
// instance of `PostUpdateNode`.
private Node getPre(PostUpdateNode n) {
result = n.getPreUpdateNode()
or
none()
}
query predicate postIsNotPre(PostUpdateNode n, string msg) {
n.getPreUpdateNode() = n and msg = "PostUpdateNode should not equal its pre-update node."
getPre(n) = n and
msg = "PostUpdateNode should not equal its pre-update node."
}
query predicate postHasUniquePre(PostUpdateNode n, string msg) {
@@ -152,12 +162,6 @@ module Consistency {
msg = "Origin of readStep is missing a PostUpdateNode."
}
query predicate storeIsPostUpdate(Node n, string msg) {
storeStep(_, _, n) and
not n instanceof PostUpdateNode and
msg = "Store targets should be PostUpdateNodes."
}
query predicate argHasPostUpdate(ArgumentNode n, string msg) {
not hasPost(n) and
not isImmutableOrUnobservable(n) and

View File

@@ -1,5 +1,6 @@
private import python
private import DataFlowPublic
import semmle.python.SpecialMethods
//--------
// Data flow graph
@@ -7,6 +8,39 @@ private import DataFlowPublic
//--------
// Nodes
//--------
predicate isExpressionNode(ControlFlowNode node) { node.getNode() instanceof Expr }
/** A control flow node which is also a dataflow node */
class DataFlowCfgNode extends ControlFlowNode {
DataFlowCfgNode() { isExpressionNode(this) }
}
/** A data flow node which should have an associated post-update node. */
abstract class PreUpdateNode extends Node { }
/** An argument might have its value changed as a result of a call. */
class ArgumentPreUpdateNode extends PreUpdateNode, ArgumentNode { }
/** An object might have its value changed after a store. */
class StorePreUpdateNode extends PreUpdateNode, CfgNode {
StorePreUpdateNode() {
exists(Attribute a |
node = a.getObject().getAFlowNode() and
a.getCtx() instanceof Store
)
}
}
/** A node marking the state change of an object after a read */
class ReadPreUpdateNode extends PreUpdateNode, CfgNode {
ReadPreUpdateNode() {
exists(Attribute a |
node = a.getObject().getAFlowNode() and
a.getCtx() instanceof Load
)
}
}
/**
* A node associated with an object after an operation that might have
* changed its state.
@@ -16,12 +50,21 @@ private import DataFlowPublic
* an update to the field.
*
* Nodes corresponding to AST elements, for example `ExprNode`, usually refer
* to the value before the update with the exception of `ObjectCreation`,
* which represents the value after the constructor has run.
* to the value before the update.
*/
abstract class PostUpdateNode extends Node {
class PostUpdateNode extends Node, TPostUpdateNode {
PreUpdateNode pre;
PostUpdateNode() { this = TPostUpdateNode(pre) }
/** Gets the node before the state update. */
abstract Node getPreUpdateNode();
Node getPreUpdateNode() { result = pre }
override string toString() { result = "[post] " + pre.toString() }
override Scope getScope() { result = pre.getScope() }
override Location getLocation() { result = pre.getLocation() }
}
class DataFlowExpr = Expr;
@@ -59,7 +102,7 @@ module EssaFlow {
// `x = f(y)`
// nodeFrom is `y` on first line, essa var
// nodeTo is `y` on second line, cfg node
nodeFrom.(EssaNode).getVar().getAUse() = nodeTo.(CfgNode).getNode()
nodeFrom.(EssaNode).getVar().getASourceUse() = nodeTo.(CfgNode).getNode()
or
// Refinements
exists(EssaEdgeRefinement r |
@@ -90,7 +133,17 @@ module EssaFlow {
predicate simpleLocalFlowStep(Node nodeFrom, Node nodeTo) {
not nodeFrom.(EssaNode).getVar() instanceof GlobalSsaVariable and
not nodeTo.(EssaNode).getVar() instanceof GlobalSsaVariable and
EssaFlow::essaFlowStep(nodeFrom, nodeTo)
EssaFlow::essaFlowStep(update(nodeFrom), nodeTo)
}
private Node update(Node node) {
exists(PostUpdateNode pun |
node = pun.getPreUpdateNode() and
result = pun
)
or
not exists(PostUpdateNode pun | node = pun.getPreUpdateNode()) and
result = node
}
// TODO: Make modules for these headings
@@ -157,17 +210,67 @@ class DataFlowClassValue extends DataFlowCallable, TClassValue {
override string getName() { result = c.getName() }
}
/** Represents a call to a callable */
class DataFlowCall extends CallNode {
DataFlowCallable callable;
newtype TDataFlowCall =
TCallNode(CallNode call) or
TSpecialCall(SpecialMethodCallNode special)
DataFlowCall() { this = callable.getACall() }
abstract class DataFlowCall extends TDataFlowCall {
/** Gets a textual representation of this element. */
abstract string toString();
/** Get the callable to which this call goes. */
DataFlowCallable getCallable() { result = callable }
abstract DataFlowCallable getCallable();
/** Get the specified argument to this call. */
abstract ControlFlowNode getArg(int n);
/** Get the control flow node representing this call. */
abstract ControlFlowNode getNode();
/** Gets the enclosing callable of this call. */
DataFlowCallable getEnclosingCallable() { result.getScope() = this.getNode().getScope() }
abstract DataFlowCallable getEnclosingCallable();
}
/** Represents a call to a callable. */
class CallNodeCall extends DataFlowCall, TCallNode {
CallNode call;
DataFlowCallable callable;
CallNodeCall() {
this = TCallNode(call) and
call = callable.getACall()
}
override string toString() { result = call.toString() }
override ControlFlowNode getArg(int n) { result = call.getArg(n) }
override ControlFlowNode getNode() { result = call }
override DataFlowCallable getCallable() { result = callable }
override DataFlowCallable getEnclosingCallable() { result.getScope() = call.getNode().getScope() }
}
/** Represents a call to a special method. */
class SpecialCall extends DataFlowCall, TSpecialCall {
SpecialMethodCallNode special;
SpecialCall() { this = TSpecialCall(special) }
override string toString() { result = special.toString() }
override ControlFlowNode getArg(int n) { result = special.(SpecialMethod::Potential).getArg(n) }
override ControlFlowNode getNode() { result = special }
override DataFlowCallable getCallable() {
result = TCallableValue(special.getResolvedSpecialMethod())
}
override DataFlowCallable getEnclosingCallable() {
result.getScope() = special.getNode().getScope()
}
}
/** A data flow node that represents a call argument. */
@@ -220,7 +323,7 @@ class OutNode extends CfgNode {
* `kind`.
*/
OutNode getAnOutNode(DataFlowCall call, ReturnKind kind) {
call = result.getNode() and
call.getNode() = result.getNode() and
kind = TNormalReturnKind()
}

View File

@@ -2,8 +2,9 @@
* Provides Python-specific definitions for use in the data flow library.
*/
import python
private import python
private import DataFlowPrivate
import experimental.dataflow.TypeTracker
/**
* IPA type for data flow nodes.
@@ -20,7 +21,9 @@ newtype TNode =
/** A node corresponding to an SSA variable. */
TEssaNode(EssaVariable var) or
/** A node corresponding to a control flow node. */
TCfgNode(ControlFlowNode node)
TCfgNode(DataFlowCfgNode node) or
/** A node representing the value of an object after a state change */
TPostUpdateNode(PreUpdateNode pre)
/**
* An element, viewed as a node in a data flow graph. Either an SSA variable
@@ -58,6 +61,23 @@ class Node extends TNode {
) {
this.getLocation().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
}
/** Convenience method for casting to EssaNode and calling getVar. */
EssaVariable asVar() { none() }
/** Convenience method for casting to CfgNode and calling getNode. */
ControlFlowNode asCfgNode() { none() }
/** Convenience method for casting to ExprNode and calling getNode and getNode again. */
Expr asExpr() { none() }
/**
* Gets a node that this node may flow to using one heap and/or interprocedural step.
*
* See `TypeTracker` for more details about how to use this.
*/
pragma[inline]
Node track(TypeTracker t2, TypeTracker t) { t = t2.step(this, result) }
}
class EssaNode extends Node, TEssaNode {
@@ -67,6 +87,8 @@ class EssaNode extends Node, TEssaNode {
EssaVariable getVar() { result = var }
override EssaVariable asVar() { result = var }
/** Gets a textual representation of this element. */
override string toString() { result = var.toString() }
@@ -76,12 +98,14 @@ class EssaNode extends Node, TEssaNode {
}
class CfgNode extends Node, TCfgNode {
ControlFlowNode node;
DataFlowCfgNode node;
CfgNode() { this = TCfgNode(node) }
ControlFlowNode getNode() { result = node }
override ControlFlowNode asCfgNode() { result = node }
/** Gets a textual representation of this element. */
override string toString() { result = node.toString() }
@@ -97,10 +121,14 @@ class CfgNode extends Node, TCfgNode {
* to multiple `ExprNode`s, just like it may correspond to multiple
* `ControlFlow::Node`s.
*/
class ExprNode extends Node { }
class ExprNode extends CfgNode {
ExprNode() { isExpressionNode(node) }
override Expr asExpr() { result = node.getNode() }
}
/** Gets a node corresponding to expression `e`. */
ExprNode exprNode(DataFlowExpr e) { none() }
ExprNode exprNode(DataFlowExpr e) { result.getNode().getNode() = e }
/**
* The value of a parameter at function entry, viewed as a node in a data

View File

@@ -4,10 +4,10 @@ private import experimental.dataflow.internal.DataFlowPrivate
private import experimental.dataflow.internal.TaintTrackingPublic
/**
* Holds if `node` should be a barrier in all global taint flow configurations
* Holds if `node` should be a sanitizer in all global taint flow configurations
* but not in local taint.
*/
predicate defaultTaintBarrier(DataFlow::Node node) { none() }
predicate defaultTaintSanitizer(DataFlow::Node node) { none() }
/**
* Holds if the additional step from `nodeFrom` to `nodeTo` should be included in all
@@ -30,14 +30,24 @@ predicate localAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeT
subscriptStep(nodeFrom, nodeTo)
or
stringManipulation(nodeFrom, nodeTo)
or
jsonStep(nodeFrom, nodeTo)
or
containerStep(nodeFrom, nodeTo)
or
copyStep(nodeFrom, nodeTo)
or
forStep(nodeFrom, nodeTo)
or
unpackingAssignmentStep(nodeFrom, nodeTo)
}
/**
* Holds if taint can flow from `nodeFrom` to `nodeTo` with a step related to concatenation.
*
* Note that since we cannot easily distinguish interesting types (like string, list, tuple),
* we consider any `+` operation to propagate taint. After consulting with the JS team, this
* doesn't sound like it is a big problem in practice.
* we consider any `+` operation to propagate taint. This is what is done in the JS libraries,
* and isn't a big problem in practice.
*/
predicate concatStep(DataFlow::CfgNode nodeFrom, DataFlow::CfgNode nodeTo) {
exists(BinaryExprNode add | add = nodeTo.getNode() |
@@ -118,8 +128,101 @@ predicate stringManipulation(DataFlow::CfgNode nodeFrom, DataFlow::CfgNode nodeT
)
or
// f-strings
nodeTo.getNode().getNode().(Fstring).getAValue() = nodeFrom.getNode().getNode()
nodeTo.asExpr().(Fstring).getAValue() = nodeFrom.asExpr()
// TODO: Handle encode/decode from base64/quopri
// TODO: Handle os.path.join
// TODO: Handle functions in https://docs.python.org/3/library/binascii.html
}
/**
* Holds if taint can flow from `nodeFrom` to `nodeTo` with a step related to JSON encoding/decoding.
*/
predicate jsonStep(DataFlow::CfgNode nodeFrom, DataFlow::CfgNode nodeTo) {
exists(CallNode call | call = nodeTo.getNode() |
call.getFunction().(AttrNode).getObject(["load", "loads", "dumps"]).(NameNode).getId() = "json" and
call.getArg(0) = nodeFrom.getNode()
)
}
/**
* Holds if taint can flow from `nodeFrom` to `nodeTo` with a step related to containers
* (lists/sets/dictionaries): literals, constructor invocation, methods. Note that this
* is currently very imprecise, as an example, since we model `dict.get`, we treat any
* `<tainted object>.get(<arg>)` will be tainted, whether it's true or not.
*/
predicate containerStep(DataFlow::CfgNode nodeFrom, DataFlow::Node nodeTo) {
// construction by literal
// TODO: Not limiting the content argument here feels like a BIG hack, but we currently get nothing for free :|
storeStep(nodeFrom, _, nodeTo)
or
// constructor call
exists(CallNode call | call = nodeTo.asCfgNode() |
call.getFunction().(NameNode).getId() in ["list", "set", "frozenset", "dict", "defaultdict",
"tuple"] and
call.getArg(0) = nodeFrom.getNode()
)
or
// functions operating on collections
exists(CallNode call | call = nodeTo.asCfgNode() |
call.getFunction().(NameNode).getId() in ["sorted", "reversed", "iter", "next"] and
call.getArg(0) = nodeFrom.getNode()
)
or
// methods
exists(CallNode call, string name | call = nodeTo.asCfgNode() |
name in ["copy",
// general
"pop",
// dict
"values", "items", "get", "popitem"] and
call.getFunction().(AttrNode).getObject(name) = nodeFrom.asCfgNode()
)
or
// list.append, set.add
exists(CallNode call, string name |
name in ["append", "add"] and
call.getFunction().(AttrNode).getObject(name) =
nodeTo.(PostUpdateNode).getPreUpdateNode().asCfgNode() and
call.getArg(0) = nodeFrom.getNode()
)
}
/**
* Holds if taint can flow from `nodeFrom` to `nodeTo` with a step related to copying.
*/
predicate copyStep(DataFlow::CfgNode nodeFrom, DataFlow::CfgNode nodeTo) {
exists(CallNode call | call = nodeTo.getNode() |
// Fully qualified: copy.copy, copy.deepcopy
(
call.getFunction().(NameNode).getId() in ["copy", "deepcopy"]
or
call.getFunction().(AttrNode).getObject(["copy", "deepcopy"]).(NameNode).getId() = "copy"
) and
call.getArg(0) = nodeFrom.getNode()
)
}
/**
* Holds if taint can flow from `nodeFrom` to `nodeTo` with a step related to `for`-iteration,
* for example `for x in xs`, or `for x,y in points`.
*/
predicate forStep(DataFlow::CfgNode nodeFrom, DataFlow::EssaNode nodeTo) {
exists(EssaNodeDefinition defn, For for |
for.getTarget().getAChildNode*() = defn.getDefiningNode().getNode() and
nodeTo.getVar() = defn and
nodeFrom.asExpr() = for.getIter()
)
}
/**
* Holds if taint can flow from `nodeFrom` to `nodeTo` with a step related to iterable unpacking.
* Only handles normal assignment (`x,y = calc_point()`), since `for x,y in points` is handled by `forStep`.
*/
predicate unpackingAssignmentStep(DataFlow::CfgNode nodeFrom, DataFlow::EssaNode nodeTo) {
// `a, b = myiterable` or `head, *tail = myiterable` (only Python 3)
exists(MultiAssignmentDefinition defn, Assign assign |
assign.getATarget().contains(defn.getDefiningNode().getNode()) and
nodeTo.getVar() = defn and
nodeFrom.asExpr() = assign.getValue()
)
}

View File

@@ -6,6 +6,8 @@
private import python
private import TaintTrackingPrivate
private import experimental.dataflow.DataFlow
// Need to import since frameworks can extend `AdditionalTaintStep`
private import experimental.semmle.python.Frameworks
// Local taint flow and helpers
/**

View File

@@ -76,20 +76,20 @@ abstract class Configuration extends DataFlow::Configuration {
final override predicate isBarrier(DataFlow::Node node) {
isSanitizer(node) or
defaultTaintBarrier(node)
defaultTaintSanitizer(node)
}
/** Holds if data flow into `node` is prohibited. */
/** Holds if taint propagation into `node` is prohibited. */
predicate isSanitizerIn(DataFlow::Node node) { none() }
final override predicate isBarrierIn(DataFlow::Node node) { isSanitizerIn(node) }
/** Holds if data flow out of `node` is prohibited. */
/** Holds if taint propagation out of `node` is prohibited. */
predicate isSanitizerOut(DataFlow::Node node) { none() }
final override predicate isBarrierOut(DataFlow::Node node) { isSanitizerOut(node) }
/** Holds if data flow through nodes guarded by `guard` is prohibited. */
/** Holds if taint propagation through nodes guarded by `guard` is prohibited. */
predicate isSanitizerGuard(DataFlow::BarrierGuard guard) { none() }
final override predicate isBarrierGuard(DataFlow::BarrierGuard guard) { isSanitizerGuard(guard) }

View File

@@ -0,0 +1,40 @@
/**
* Provides abstract classes representing generic concepts such as file system
* access or system command execution, for which individual framework libraries
* provide concrete subclasses.
*/
import python
private import experimental.dataflow.DataFlow
private import experimental.semmle.python.Frameworks
/**
* A data-flow node that executes an operating system command,
* for instance by spawning a new process.
*
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `SystemCommandExecution::Range` instead.
*/
class SystemCommandExecution extends DataFlow::Node {
SystemCommandExecution::Range self;
SystemCommandExecution() { this = self }
/** Gets the argument that specifies the command to be executed. */
DataFlow::Node getCommand() { result = self.getCommand() }
}
/** Provides a class for modeling new system-command execution APIs. */
module SystemCommandExecution {
/**
* A data-flow node that executes an operating system command,
* for instance by spawning a new process.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `SystemCommandExecution` instead.
*/
abstract class Range extends DataFlow::Node {
/** Gets the argument that specifies the command to be executed. */
abstract DataFlow::Node getCommand();
}
}

View File

@@ -0,0 +1,7 @@
/**
* Helper file that imports all framework modeling.
*/
private import experimental.semmle.python.frameworks.Flask
private import experimental.semmle.python.frameworks.Django
private import experimental.semmle.python.frameworks.Stdlib

View File

@@ -0,0 +1,10 @@
/**
* Provides classes modeling security-relevant aspects of the `django` package.
*/
private import python
private import experimental.dataflow.DataFlow
private import experimental.dataflow.RemoteFlowSources
private import experimental.semmle.python.Concepts
private module Django { }

View File

@@ -0,0 +1,10 @@
/**
* Provides classes modeling security-relevant aspects of the `flask` package.
*/
private import python
private import experimental.dataflow.DataFlow
private import experimental.dataflow.RemoteFlowSources
private import experimental.semmle.python.Concepts
private module Flask { }

View File

@@ -0,0 +1,9 @@
/**
* Provides classes modeling security-relevant aspects of the standard libraries.
* Note: some modeling is done internally in the dataflow/taint tracking implementation.
*/
private import python
private import experimental.dataflow.DataFlow
private import experimental.dataflow.RemoteFlowSources
private import experimental.semmle.python.Concepts

View File

@@ -0,0 +1,117 @@
/**
* Provides support for special methods.
* This is done in two steps:
* - A subset of `ControlFlowNode`s are labelled as potentially corresponding to
* a special method call (by being an instance of `SpecialMethod::Potential`).
* - A subset of the potential special method calls are labelled as being actual
* special method calls (`SpecialMethodCallNode`) if the appropriate method is defined.
* Extend `SpecialMethod::Potential` to capture more cases.
*/
import python
/** A control flow node which might correspond to a special method call. */
class PotentialSpecialMethodCallNode extends ControlFlowNode {
PotentialSpecialMethodCallNode() { this instanceof SpecialMethod::Potential }
}
/**
* Machinery for detecting special method calls.
* Extend `SpecialMethod::Potential` to capture more cases.
*/
module SpecialMethod {
/** A control flow node which might correspond to a special method call. */
abstract class Potential extends ControlFlowNode {
/** Gets the name of the method that would be called. */
abstract string getSpecialMethodName();
/** Gets the control flow node that would be passed as the specified argument. */
abstract ControlFlowNode getArg(int n);
/**
* Gets the control flow node corresponding to the instance
* that would define the special method.
*/
ControlFlowNode getSelf() { result = this.getArg(0) }
}
/** A binary expression node that might correspond to a special method call. */
class SpecialBinOp extends Potential, BinaryExprNode {
Operator operator;
SpecialBinOp() { this.getOp() = operator }
override string getSpecialMethodName() { result = operator.getSpecialMethodName() }
override ControlFlowNode getArg(int n) {
n = 0 and result = this.getLeft()
or
n = 1 and result = this.getRight()
}
}
/** A subscript expression node that might correspond to a special method call. */
abstract class SpecialSubscript extends Potential, SubscriptNode {
override ControlFlowNode getArg(int n) {
n = 0 and result = this.getObject()
or
n = 1 and result = this.getIndex()
}
}
/** A subscript expression node that might correspond to a call to __getitem__. */
class SpecialGetItem extends SpecialSubscript {
SpecialGetItem() { this.isLoad() }
override string getSpecialMethodName() { result = "__getitem__" }
}
/** A subscript expression node that might correspond to a call to __setitem__. */
class SpecialSetItem extends SpecialSubscript {
SpecialSetItem() { this.isStore() }
override string getSpecialMethodName() { result = "__setitem__" }
override ControlFlowNode getArg(int n) {
n = 0 and result = this.getObject()
or
n = 1 and result = this.getIndex()
or
n = 2 and result = this.getValueNode()
}
private ControlFlowNode getValueNode() {
exists(AssignStmt a |
a.getATarget() = this.getNode() and
result.getNode() = a.getValue()
)
or
exists(AugAssign a |
a.getTarget() = this.getNode() and
result.getNode() = a.getValue()
)
}
}
/** A subscript expression node that might correspond to a call to __delitem__. */
class SpecialDelItem extends SpecialSubscript {
SpecialDelItem() { this.isDelete() }
override string getSpecialMethodName() { result = "__delitem__" }
}
}
/** A control flow node corresponding to a special method call. */
class SpecialMethodCallNode extends PotentialSpecialMethodCallNode {
Value resolvedSpecialMethod;
SpecialMethodCallNode() {
exists(SpecialMethod::Potential pot |
this.(SpecialMethod::Potential) = pot and
pot.getSelf().pointsTo().getClass().lookup(pot.getSpecialMethodName()) = resolvedSpecialMethod
)
}
/** The method that is called. */
Value getResolvedSpecialMethod() { result = resolvedSpecialMethod }
}

View File

@@ -27,7 +27,8 @@ abstract class StringKind extends TaintKind {
os_path_join(fromnode, tonode) or
str_format(fromnode, tonode) or
encode_decode(fromnode, tonode) or
to_str(fromnode, tonode)
to_str(fromnode, tonode) or
f_string(fromnode, tonode)
)
or
result = this and copy_call(fromnode, tonode)
@@ -61,13 +62,13 @@ private class StringEqualitySanitizer extends Sanitizer {
}
}
/* tonode = ....format(fromnode) */
/** tonode = ....format(fromnode) */
private predicate str_format(ControlFlowNode fromnode, CallNode tonode) {
tonode.getFunction().(AttrNode).getName() = "format" and
tonode.getAnArg() = fromnode
}
/* tonode = codec.[en|de]code(fromnode)*/
/** tonode = codec.[en|de]code(fromnode) */
private predicate encode_decode(ControlFlowNode fromnode, CallNode tonode) {
exists(FunctionObject func, string name |
not func.getFunction().isMethod() and
@@ -81,7 +82,7 @@ private predicate encode_decode(ControlFlowNode fromnode, CallNode tonode) {
)
}
/* tonode = str(fromnode)*/
/** tonode = str(fromnode) */
private predicate to_str(ControlFlowNode fromnode, CallNode tonode) {
tonode.getAnArg() = fromnode and
(
@@ -91,7 +92,7 @@ private predicate to_str(ControlFlowNode fromnode, CallNode tonode) {
)
}
/* tonode = fromnode[:] */
/** tonode = fromnode[:] */
private predicate slice(ControlFlowNode fromnode, SubscriptNode tonode) {
exists(Slice all |
all = tonode.getIndex().getNode() and
@@ -101,12 +102,17 @@ private predicate slice(ControlFlowNode fromnode, SubscriptNode tonode) {
)
}
/* tonode = os.path.join(..., fromnode, ...) */
/** tonode = os.path.join(..., fromnode, ...) */
private predicate os_path_join(ControlFlowNode fromnode, CallNode tonode) {
tonode = Value::named("os.path.join").getACall() and
tonode.getAnArg() = fromnode
}
/** tonode = f"... {fromnode} ..." */
private predicate f_string(ControlFlowNode fromnode, ControlFlowNode tonode) {
tonode.getNode().(Fstring).getAValue() = fromnode.getNode()
}
/**
* A kind of "taint", representing a dictionary mapping str->"taint"
*