Merge branch 'main' into python-more-complete-dataflow-tests

This commit is contained in:
Rasmus Wriedt Larsen
2020-09-03 14:58:20 +02:00
458 changed files with 11018 additions and 1637 deletions

View File

@@ -0,0 +1,12 @@
<!DOCTYPE qhelp PUBLIC
"-//Semmle//qhelp//EN"
"qhelp.dtd">
<qhelp>
<overview>
<p>
This metric counts the number of lines of commented-out code in each file. Large amounts of
commented-out code often indicate poorly maintained code.
</p>
</overview>
</qhelp>

View File

@@ -0,0 +1,25 @@
<!DOCTYPE qhelp PUBLIC
"-//Semmle//qhelp//EN"
"qhelp.dtd">
<qhelp>
<overview>
<p>
Commented-out code is distracting and confusing for developers who read the surrounding code,
and its significance is often unclear. It will not get compiled or tested when the code around
it changes, so it's likely to break over time. For these reasons, commented-out code should be
avoided.
</p>
</overview>
<recommendation>
<p>
Remove or reinstate the commented-out code. If you want to include a snippet of example code
in a comment, consider enclosing it in quotes or marking it up as appropriate for the source
language.
</p>
</recommendation>
</qhelp>

View File

@@ -0,0 +1,12 @@
<!DOCTYPE qhelp PUBLIC
"-//Semmle//qhelp//EN"
"qhelp.dtd">
<qhelp>
<references>
<li>Mark Needham: <a href="http://www.markhneedham.com/blog/2009/01/17/the-danger-of-commenting-out-code/">The danger of commenting out code</a>.</li>
<li>Los Techies: <a href="http://lostechies.com/rodpaddock/2010/12/29/commented-code-technical-debt">Commented Code == Technical Debt</a>.</li>
<li>High Integrity C++ Coding Standard: <a href="http://www.codingstandard.com/rule/2-3-2-do-not-comment-out-code/">2.3.2 Do not comment out code</a>.</li>
</references>
</qhelp>

View File

@@ -12,6 +12,5 @@ a poorly designed or hastily written code base, which typically suffers from oth
problems as well.
</p>
</overview>
</qhelp>

View File

@@ -0,0 +1,35 @@
<!DOCTYPE qhelp PUBLIC
"-//Semmle//qhelp//EN"
"qhelp.dtd">
<qhelp>
<overview>
<p>
This metric measures the number of lines in a file that are contained within a block that is duplicated elsewhere. These lines may include code, comments and whitespace, and the duplicate block may be in this file or in another file.
</p>
<p>
A file that contains many lines that are duplicated within the code base is problematic
for a number of reasons.
</p>
</overview>
<include src="DuplicationProblems.qhelp" />
<recommendation>
<p>
Refactor files with lots of duplicated code to extract the common code into
a shared library or module.
</p>
</recommendation>
<references>
<li>Wikipedia: <a href="http://en.wikipedia.org/wiki/Duplicate_code">Duplicate code</a>.</li>
<li>M. Fowler, <em>Refactoring</em>. Addison-Wesley, 1999.</li>
</references>
</qhelp>

View File

@@ -15,7 +15,7 @@
* `DataFlow::localFlowStep` with arguments of type `DataFlow::Node`.
*/
import python
private import python
/**
* Provides classes for performing local (intra-procedural) and

View File

@@ -15,7 +15,7 @@
* `DataFlow::localFlowStep` with arguments of type `DataFlow::Node`.
*/
import python
private import python
/**
* Provides classes for performing local (intra-procedural) and

View File

@@ -8,7 +8,7 @@
* `TaintTracking::localTaintStep` with arguments of type `DataFlow::Node`.
*/
import python
private import python
/**
* Provides classes for performing local (intra-procedural) and

View File

@@ -0,0 +1,282 @@
/** Step Summaries and Type Tracking */
import python
import internal.DataFlowPublic
import internal.DataFlowPrivate
/** Any string that may appear as the name of an attribute or access path. */
class AttributeName extends string {
AttributeName() { this = any(Attribute a).getName() }
}
/** Either an attribute name, or the empty string (representing no attribute). */
class OptionalAttributeName extends string {
OptionalAttributeName() { this instanceof AttributeName or this = "" }
}
/**
* A description of a step on an inter-procedural data flow path.
*/
private newtype TStepSummary =
LevelStep() or
CallStep() or
ReturnStep() or
StoreStep(AttributeName attr) or
LoadStep(AttributeName attr)
/**
* INTERNAL: Use `TypeTracker` or `TypeBackTracker` instead.
*
* A description of a step on an inter-procedural data flow path.
*/
class StepSummary extends TStepSummary {
/** Gets a textual representation of this step summary. */
string toString() {
this instanceof LevelStep and result = "level"
or
this instanceof CallStep and result = "call"
or
this instanceof ReturnStep and result = "return"
or
exists(string attr | this = StoreStep(attr) | result = "store " + attr)
or
exists(string attr | this = LoadStep(attr) | result = "load " + attr)
}
}
module StepSummary {
cached
predicate step(Node nodeFrom, Node nodeTo, StepSummary summary) {
exists(Node mid | EssaFlow::essaFlowStep*(nodeFrom, mid) and smallstep(mid, nodeTo, summary))
}
predicate smallstep(Node nodeFrom, Node nodeTo, StepSummary summary) {
EssaFlow::essaFlowStep(nodeFrom, nodeTo) and
summary = LevelStep()
or
callStep(nodeFrom, nodeTo) and summary = CallStep()
or
returnStep(nodeFrom, nodeTo) and
summary = ReturnStep()
or
exists(string attr |
basicStoreStep(nodeFrom, nodeTo, attr) and
summary = StoreStep(attr)
or
basicLoadStep(nodeFrom, nodeTo, attr) and summary = LoadStep(attr)
)
}
}
/** Holds if `nodeFrom` steps to `nodeTo` by being passed as a parameter in a call. */
predicate callStep(ArgumentNode nodeFrom, ParameterNode nodeTo) {
// TODO: Support special methods?
exists(DataFlowCall call, int i |
nodeFrom.argumentOf(call, i) and nodeTo.isParameterOf(call.getCallable(), i)
)
}
/** Holds if `nodeFrom` steps to `nodeTo` by being returned from a call. */
predicate returnStep(ReturnNode nodeFrom, Node nodeTo) {
exists(DataFlowCall call |
nodeFrom.getEnclosingCallable() = call.getCallable() and nodeTo.asCfgNode() = call.getNode()
)
}
/**
* Holds if `nodeFrom` is being written to the `attr` attribute of the object in `nodeTo`.
*
* Note that the choice of `nodeTo` does not have to make sense "chronologically".
* All we care about is whether the `attr` attribute of `nodeTo` can have a specific type,
* and the assumption is that if a specific type appears here, then any access of that
* particular attribute can yield something of that particular type.
*
* Thus, in an example such as
*
* ```python
* def foo(y):
* x = Foo()
* bar(x)
* x.attr = y
* baz(x)
*
* def bar(x):
* z = x.attr
* ```
* for the attribute write `x.attr = y`, we will have `attr` being the literal string `"attr"`,
* `nodeFrom` will be `y`, and `nodeTo` will be the object `Foo()` created on the first line of the
* function. This means we will track the fact that `x.attr` can have the type of `y` into the
* assignment to `z` inside `bar`, even though this attribute write happens _after_ `bar` is called.
*/
predicate basicStoreStep(Node nodeFrom, Node nodeTo, string attr) {
exists(AttributeAssignment a, Node var |
a.getName() = attr and
EssaFlow::essaFlowStep*(nodeTo, var) and
var.asVar() = a.getInput() and
nodeFrom.asCfgNode() = a.getValue()
)
}
/**
* Holds if `nodeTo` is the result of accessing the `attr` attribute of `nodeFrom`.
*/
predicate basicLoadStep(Node nodeFrom, Node nodeTo, string attr) {
exists(AttrNode s | nodeTo.asCfgNode() = s and s.getObject(attr) = nodeFrom.asCfgNode())
}
/**
* A utility class that is equivalent to `boolean` but does not require type joining.
*/
private class Boolean extends boolean {
Boolean() { this = true or this = false }
}
private newtype TTypeTracker = MkTypeTracker(Boolean hasCall, OptionalAttributeName attr)
/**
* Summary of the steps needed to track a value to a given dataflow node.
*
* This can be used to track objects that implement a certain API in order to
* recognize calls to that API. Note that type-tracking does not by itself provide a
* source/sink relation, that is, it may determine that a node has a given type,
* but it won't determine where that type came from.
*
* It is recommended that all uses of this type are written in the following form,
* for tracking some type `myType`:
* ```
* Node myType(DataFlow::TypeTracker t) {
* t.start() and
* result = < source of myType >
* or
* exists (TypeTracker t2 |
* result = myType(t2).track(t2, t)
* )
* }
*
* DataFlow::SourceNode myType() { result = myType(DataFlow::TypeTracker::end()) }
* ```
*
* Instead of `result = myType(t2).track(t2, t)`, you can also use the equivalent
* `t = t2.step(myType(t2), result)`. If you additionally want to track individual
* intra-procedural steps, use `t = t2.smallstep(myCallback(t2), result)`.
*/
class TypeTracker extends TTypeTracker {
Boolean hasCall;
OptionalAttributeName attr;
TypeTracker() { this = MkTypeTracker(hasCall, attr) }
/** Gets the summary resulting from appending `step` to this type-tracking summary. */
cached
TypeTracker append(StepSummary step) {
step = LevelStep() and result = this
or
step = CallStep() and result = MkTypeTracker(true, attr)
or
step = ReturnStep() and hasCall = false and result = this
or
step = LoadStep(attr) and result = MkTypeTracker(hasCall, "")
or
exists(string p | step = StoreStep(p) and attr = "" and result = MkTypeTracker(hasCall, p))
}
/** Gets a textual representation of this summary. */
string toString() {
exists(string withCall, string withAttr |
(if hasCall = true then withCall = "with" else withCall = "without") and
(if attr != "" then withAttr = " with attribute " + attr else withAttr = "") and
result = "type tracker " + withCall + " call steps" + withAttr
)
}
/**
* Holds if this is the starting point of type tracking.
*/
predicate start() { hasCall = false and attr = "" }
/**
* Holds if this is the starting point of type tracking, and the value starts in the attribute named `attrName`.
* The type tracking only ends after the attribute has been loaded.
*/
predicate startInAttr(AttributeName attrName) { hasCall = false and attr = attrName }
/**
* Holds if this is the starting point of type tracking
* when tracking a parameter into a call, but not out of it.
*/
predicate call() { hasCall = true and attr = "" }
/**
* Holds if this is the end point of type tracking.
*/
predicate end() { attr = "" }
/**
* INTERNAL. DO NOT USE.
*
* Holds if this type has been tracked into a call.
*/
boolean hasCall() { result = hasCall }
/**
* INTERNAL. DO NOT USE.
*
* Gets the attribute associated with this type tracker.
*/
string getAttr() { result = attr }
/**
* Gets a type tracker that starts where this one has left off to allow continued
* tracking.
*
* This predicate is only defined if the type has not been tracked into an attribute.
*/
TypeTracker continue() { attr = "" and result = this }
/**
* Gets the summary that corresponds to having taken a forwards
* heap and/or inter-procedural step from `nodeFrom` to `nodeTo`.
*/
pragma[inline]
TypeTracker step(Node nodeFrom, Node nodeTo) {
exists(StepSummary summary |
StepSummary::step(nodeFrom, nodeTo, summary) and
result = this.append(summary)
)
}
/**
* Gets the summary that corresponds to having taken a forwards
* local, heap and/or inter-procedural step from `nodeFrom` to `nodeTo`.
*
* Unlike `TypeTracker::step`, this predicate exposes all edges
* in the flow graph, and not just the edges between `Node`s.
* It may therefore be less performant.
*
* Type tracking predicates using small steps typically take the following form:
* ```ql
* DataFlow::Node myType(DataFlow::TypeTracker t) {
* t.start() and
* result = < source of myType >
* or
* exists (DataFlow::TypeTracker t2 |
* t = t2.smallstep(myType(t2), result)
* )
* }
*
* DataFlow::Node myType() {
* result = myType(DataFlow::TypeTracker::end())
* }
* ```
*/
pragma[inline]
TypeTracker smallstep(Node nodeFrom, Node nodeTo) {
exists(StepSummary summary |
StepSummary::smallstep(nodeFrom, nodeTo, summary) and
result = this.append(summary)
)
or
EssaFlow::essaFlowStep(nodeFrom, nodeTo) and
result = this
}
}

View File

@@ -123,8 +123,18 @@ module Consistency {
n.getEnclosingCallable() != call.getEnclosingCallable()
}
// This predicate helps the compiler forget that in some languages
// it is impossible for a result of `getPreUpdateNode` to be an
// instance of `PostUpdateNode`.
private Node getPre(PostUpdateNode n) {
result = n.getPreUpdateNode()
or
none()
}
query predicate postIsNotPre(PostUpdateNode n, string msg) {
n.getPreUpdateNode() = n and msg = "PostUpdateNode should not equal its pre-update node."
getPre(n) = n and
msg = "PostUpdateNode should not equal its pre-update node."
}
query predicate postHasUniquePre(PostUpdateNode n, string msg) {
@@ -152,12 +162,6 @@ module Consistency {
msg = "Origin of readStep is missing a PostUpdateNode."
}
query predicate storeIsPostUpdate(Node n, string msg) {
storeStep(_, _, n) and
not n instanceof PostUpdateNode and
msg = "Store targets should be PostUpdateNodes."
}
query predicate argHasPostUpdate(ArgumentNode n, string msg) {
not hasPost(n) and
not isImmutableOrUnobservable(n) and

View File

@@ -15,6 +15,32 @@ class DataFlowCfgNode extends ControlFlowNode {
DataFlowCfgNode() { isExpressionNode(this) }
}
/** A data flow node which should have an associated post-update node. */
abstract class PreUpdateNode extends Node { }
/** An argument might have its value changed as a result of a call. */
class ArgumentPreUpdateNode extends PreUpdateNode, ArgumentNode { }
/** An object might have its value changed after a store. */
class StorePreUpdateNode extends PreUpdateNode, CfgNode {
StorePreUpdateNode() {
exists(Attribute a |
node = a.getObject().getAFlowNode() and
a.getCtx() instanceof Store
)
}
}
/** A node marking the state change of an object after a read */
class ReadPreUpdateNode extends PreUpdateNode, CfgNode {
ReadPreUpdateNode() {
exists(Attribute a |
node = a.getObject().getAFlowNode() and
a.getCtx() instanceof Load
)
}
}
/**
* A node associated with an object after an operation that might have
* changed its state.
@@ -24,12 +50,21 @@ class DataFlowCfgNode extends ControlFlowNode {
* an update to the field.
*
* Nodes corresponding to AST elements, for example `ExprNode`, usually refer
* to the value before the update with the exception of `ObjectCreation`,
* which represents the value after the constructor has run.
* to the value before the update.
*/
abstract class PostUpdateNode extends Node {
class PostUpdateNode extends Node, TPostUpdateNode {
PreUpdateNode pre;
PostUpdateNode() { this = TPostUpdateNode(pre) }
/** Gets the node before the state update. */
abstract Node getPreUpdateNode();
Node getPreUpdateNode() { result = pre }
override string toString() { result = "[post] " + pre.toString() }
override Scope getScope() { result = pre.getScope() }
override Location getLocation() { result = pre.getLocation() }
}
class DataFlowExpr = Expr;
@@ -98,7 +133,17 @@ module EssaFlow {
predicate simpleLocalFlowStep(Node nodeFrom, Node nodeTo) {
not nodeFrom.(EssaNode).getVar() instanceof GlobalSsaVariable and
not nodeTo.(EssaNode).getVar() instanceof GlobalSsaVariable and
EssaFlow::essaFlowStep(nodeFrom, nodeTo)
EssaFlow::essaFlowStep(update(nodeFrom), nodeTo)
}
private Node update(Node node) {
exists(PostUpdateNode pun |
node = pun.getPreUpdateNode() and
result = pun
)
or
not exists(PostUpdateNode pun | node = pun.getPreUpdateNode()) and
result = node
}
// TODO: Make modules for these headings

View File

@@ -2,8 +2,9 @@
* Provides Python-specific definitions for use in the data flow library.
*/
import python
private import python
private import DataFlowPrivate
import experimental.dataflow.TypeTracker
/**
* IPA type for data flow nodes.
@@ -20,7 +21,9 @@ newtype TNode =
/** A node corresponding to an SSA variable. */
TEssaNode(EssaVariable var) or
/** A node corresponding to a control flow node. */
TCfgNode(DataFlowCfgNode node)
TCfgNode(DataFlowCfgNode node) or
/** A node representing the value of an object after a state change */
TPostUpdateNode(PreUpdateNode pre)
/**
* An element, viewed as a node in a data flow graph. Either an SSA variable
@@ -67,6 +70,14 @@ class Node extends TNode {
/** Convenience method for casting to ExprNode and calling getNode and getNode again. */
Expr asExpr() { none() }
/**
* Gets a node that this node may flow to using one heap and/or interprocedural step.
*
* See `TypeTracker` for more details about how to use this.
*/
pragma[inline]
Node track(TypeTracker t2, TypeTracker t) { t = t2.step(this, result) }
}
class EssaNode extends Node, TEssaNode {

View File

@@ -30,14 +30,24 @@ predicate localAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeT
subscriptStep(nodeFrom, nodeTo)
or
stringManipulation(nodeFrom, nodeTo)
or
jsonStep(nodeFrom, nodeTo)
or
containerStep(nodeFrom, nodeTo)
or
copyStep(nodeFrom, nodeTo)
or
forStep(nodeFrom, nodeTo)
or
unpackingAssignmentStep(nodeFrom, nodeTo)
}
/**
* Holds if taint can flow from `nodeFrom` to `nodeTo` with a step related to concatenation.
*
* Note that since we cannot easily distinguish interesting types (like string, list, tuple),
* we consider any `+` operation to propagate taint. After consulting with the JS team, this
* doesn't sound like it is a big problem in practice.
* we consider any `+` operation to propagate taint. This is what is done in the JS libraries,
* and isn't a big problem in practice.
*/
predicate concatStep(DataFlow::CfgNode nodeFrom, DataFlow::CfgNode nodeTo) {
exists(BinaryExprNode add | add = nodeTo.getNode() |
@@ -118,8 +128,101 @@ predicate stringManipulation(DataFlow::CfgNode nodeFrom, DataFlow::CfgNode nodeT
)
or
// f-strings
nodeTo.getNode().getNode().(Fstring).getAValue() = nodeFrom.getNode().getNode()
nodeTo.asExpr().(Fstring).getAValue() = nodeFrom.asExpr()
// TODO: Handle encode/decode from base64/quopri
// TODO: Handle os.path.join
// TODO: Handle functions in https://docs.python.org/3/library/binascii.html
}
/**
* Holds if taint can flow from `nodeFrom` to `nodeTo` with a step related to JSON encoding/decoding.
*/
predicate jsonStep(DataFlow::CfgNode nodeFrom, DataFlow::CfgNode nodeTo) {
exists(CallNode call | call = nodeTo.getNode() |
call.getFunction().(AttrNode).getObject(["load", "loads", "dumps"]).(NameNode).getId() = "json" and
call.getArg(0) = nodeFrom.getNode()
)
}
/**
* Holds if taint can flow from `nodeFrom` to `nodeTo` with a step related to containers
* (lists/sets/dictionaries): literals, constructor invocation, methods. Note that this
* is currently very imprecise, as an example, since we model `dict.get`, we treat any
* `<tainted object>.get(<arg>)` will be tainted, whether it's true or not.
*/
predicate containerStep(DataFlow::CfgNode nodeFrom, DataFlow::Node nodeTo) {
// construction by literal
// TODO: Not limiting the content argument here feels like a BIG hack, but we currently get nothing for free :|
storeStep(nodeFrom, _, nodeTo)
or
// constructor call
exists(CallNode call | call = nodeTo.asCfgNode() |
call.getFunction().(NameNode).getId() in ["list", "set", "frozenset", "dict", "defaultdict",
"tuple"] and
call.getArg(0) = nodeFrom.getNode()
)
or
// functions operating on collections
exists(CallNode call | call = nodeTo.asCfgNode() |
call.getFunction().(NameNode).getId() in ["sorted", "reversed", "iter", "next"] and
call.getArg(0) = nodeFrom.getNode()
)
or
// methods
exists(CallNode call, string name | call = nodeTo.asCfgNode() |
name in ["copy",
// general
"pop",
// dict
"values", "items", "get", "popitem"] and
call.getFunction().(AttrNode).getObject(name) = nodeFrom.asCfgNode()
)
or
// list.append, set.add
exists(CallNode call, string name |
name in ["append", "add"] and
call.getFunction().(AttrNode).getObject(name) =
nodeTo.(PostUpdateNode).getPreUpdateNode().asCfgNode() and
call.getArg(0) = nodeFrom.getNode()
)
}
/**
* Holds if taint can flow from `nodeFrom` to `nodeTo` with a step related to copying.
*/
predicate copyStep(DataFlow::CfgNode nodeFrom, DataFlow::CfgNode nodeTo) {
exists(CallNode call | call = nodeTo.getNode() |
// Fully qualified: copy.copy, copy.deepcopy
(
call.getFunction().(NameNode).getId() in ["copy", "deepcopy"]
or
call.getFunction().(AttrNode).getObject(["copy", "deepcopy"]).(NameNode).getId() = "copy"
) and
call.getArg(0) = nodeFrom.getNode()
)
}
/**
* Holds if taint can flow from `nodeFrom` to `nodeTo` with a step related to `for`-iteration,
* for example `for x in xs`, or `for x,y in points`.
*/
predicate forStep(DataFlow::CfgNode nodeFrom, DataFlow::EssaNode nodeTo) {
exists(EssaNodeDefinition defn, For for |
for.getTarget().getAChildNode*() = defn.getDefiningNode().getNode() and
nodeTo.getVar() = defn and
nodeFrom.asExpr() = for.getIter()
)
}
/**
* Holds if taint can flow from `nodeFrom` to `nodeTo` with a step related to iterable unpacking.
* Only handles normal assignment (`x,y = calc_point()`), since `for x,y in points` is handled by `forStep`.
*/
predicate unpackingAssignmentStep(DataFlow::CfgNode nodeFrom, DataFlow::EssaNode nodeTo) {
// `a, b = myiterable` or `head, *tail = myiterable` (only Python 3)
exists(MultiAssignmentDefinition defn, Assign assign |
assign.getATarget().contains(defn.getDefiningNode().getNode()) and
nodeTo.getVar() = defn and
nodeFrom.asExpr() = assign.getValue()
)
}

View File

@@ -28,11 +28,11 @@
* }
*
* override predicate hasActualResult(
* Location location, string element, string tag, string valuesasas
* Location location, string element, string tag, string value
* ) {
* exists(Expr e |
* tag = "const" and // The tag for this test.
* valuesasas = e.getValue() and // The expected value. Will only hold for constant expressions.
* value = e.getValue() and // The expected value. Will only hold for constant expressions.
* location = e.getLocation() and // The location of the result to be reported.
* element = e.toString() // The display text for the result.
* )

View File

@@ -1 +0,0 @@
experimental/CWE-074/TemplateInjection.ql

View File

@@ -1 +0,0 @@
semmle-extractor-options: --max-import-depth=3 -p ../../query-tests/Security/lib/

View File

@@ -1 +0,0 @@
experimental/CWE-091/Xslt.ql

View File

@@ -1 +0,0 @@
semmle-extractor-options: -p ../../query-tests/Security/lib/ --max-import-depth=3

View File

@@ -1 +0,0 @@
semmle-extractor-options: --max-import-depth=3 -p ../../query-tests/Security/lib/

View File

@@ -1 +0,0 @@
experimental/CWE-643/xpath.ql

View File

@@ -38,3 +38,4 @@
| test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() |
| test.py:7:5:7:20 | GSSA Variable a | test.py:7:5:7:20 | GSSA Variable a |
| test.py:7:19:7:19 | ControlFlowNode for a | test.py:7:19:7:19 | ControlFlowNode for a |
| test.py:7:19:7:19 | [post] ControlFlowNode for a | test.py:7:19:7:19 | [post] ControlFlowNode for a |

View File

@@ -23,3 +23,4 @@
| test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() |
| test.py:7:5:7:20 | GSSA Variable a |
| test.py:7:19:7:19 | ControlFlowNode for a |
| test.py:7:19:7:19 | [post] ControlFlowNode for a |

View File

@@ -23,3 +23,4 @@
| test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() |
| test.py:7:5:7:20 | GSSA Variable a |
| test.py:7:19:7:19 | ControlFlowNode for a |
| test.py:7:19:7:19 | [post] ControlFlowNode for a |

View File

@@ -1,3 +1,4 @@
private import python
import experimental.dataflow.DataFlow
/**

View File

@@ -127,27 +127,4 @@ postHasUniquePre
uniquePostUpdate
postIsInSameCallable
reverseRead
storeIsPostUpdate
| test.py:172:9:172:16 | ControlFlowNode for List | Store targets should be PostUpdateNodes. |
| test.py:173:9:173:24 | ControlFlowNode for Dict | Store targets should be PostUpdateNodes. |
| test.py:212:11:212:18 | ControlFlowNode for List | Store targets should be PostUpdateNodes. |
argHasPostUpdate
| test.py:25:10:25:10 | ControlFlowNode for t | ArgumentNode is missing PostUpdateNode. |
| test.py:29:10:29:10 | ControlFlowNode for t | ArgumentNode is missing PostUpdateNode. |
| test.py:48:19:48:21 | ControlFlowNode for arg | ArgumentNode is missing PostUpdateNode. |
| test.py:51:10:51:12 | ControlFlowNode for arg | ArgumentNode is missing PostUpdateNode. |
| test.py:55:14:55:16 | ControlFlowNode for arg | ArgumentNode is missing PostUpdateNode. |
| test.py:59:11:59:11 | ControlFlowNode for t | ArgumentNode is missing PostUpdateNode. |
| test.py:67:11:67:14 | ControlFlowNode for cond | ArgumentNode is missing PostUpdateNode. |
| test.py:67:17:67:17 | ControlFlowNode for t | ArgumentNode is missing PostUpdateNode. |
| test.py:74:11:74:14 | ControlFlowNode for cond | ArgumentNode is missing PostUpdateNode. |
| test.py:74:17:74:17 | ControlFlowNode for t | ArgumentNode is missing PostUpdateNode. |
| test.py:81:13:81:13 | ControlFlowNode for t | ArgumentNode is missing PostUpdateNode. |
| test.py:86:13:86:13 | ControlFlowNode for t | ArgumentNode is missing PostUpdateNode. |
| test.py:120:13:120:13 | ControlFlowNode for t | ArgumentNode is missing PostUpdateNode. |
| test.py:121:12:121:12 | ControlFlowNode for t | ArgumentNode is missing PostUpdateNode. |
| test.py:125:13:125:29 | ControlFlowNode for TAINT_FROM_ARG() | ArgumentNode is missing PostUpdateNode. |
| test.py:178:15:178:15 | ControlFlowNode for l | ArgumentNode is missing PostUpdateNode. |
| test.py:179:15:179:15 | ControlFlowNode for d | ArgumentNode is missing PostUpdateNode. |
| test.py:200:19:200:19 | ControlFlowNode for t | ArgumentNode is missing PostUpdateNode. |
| test.py:200:22:200:24 | ControlFlowNode for str | ArgumentNode is missing PostUpdateNode. |

View File

@@ -1,3 +1,4 @@
import python
import experimental.dataflow.DataFlow
/**

View File

@@ -1,3 +1,4 @@
import python
import experimental.dataflow.DataFlow
/**

View File

@@ -1,3 +1,4 @@
import python
import experimental.dataflow.DataFlow
/**

View File

@@ -1,3 +1,4 @@
import python
import experimental.dataflow.DataFlow
/**

View File

@@ -93,7 +93,7 @@ class With_str:
def __str__(self):
SINK1(self) # Flow not found
OK() # Call not found # Call not found
OK() # Call not found
return "Awesome"
@@ -108,7 +108,7 @@ class With_bytes:
def __bytes__(self):
SINK1(self) # Flow not found
OK() # Call not found # Call not found
OK() # Call not found
return b"Awesome"
@@ -124,7 +124,7 @@ class With_format:
def __format__(self, format_spec):
SINK2(format_spec) # Flow not found
SINK1(self) # Flow not found
OK() # Call not found # Call not found
OK() # Call not found
return "Awesome"
@@ -151,7 +151,7 @@ class With_lt:
def __lt__(self, other):
SINK2(other) # Flow not found
SINK1(self) # Flow not found
OK() # Call not found # Call not found
OK() # Call not found
return ""

View File

@@ -2,6 +2,7 @@
* @kind path-problem
*/
import python
import experimental.dataflow.testConfig
import DataFlow::PathGraph

View File

@@ -5,6 +5,7 @@
* hope to remove the false positive.
*/
import python
import experimental.dataflow.testConfig
from DataFlow::Node source, DataFlow::Node sink

View File

@@ -6,7 +6,8 @@ class TestTaintTrackingConfiguration extends TaintTracking::Configuration {
TestTaintTrackingConfiguration() { this = "TestTaintTrackingConfiguration" }
override predicate isSource(DataFlow::Node source) {
source.(DataFlow::CfgNode).getNode().(NameNode).getId() in ["TAINTED_STRING", "TAINTED_BYTES"]
source.(DataFlow::CfgNode).getNode().(NameNode).getId() in ["TAINTED_STRING", "TAINTED_BYTES",
"TAINTED_LIST", "TAINTED_DICT"]
}
override predicate isSink(DataFlow::Node sink) {
@@ -44,7 +45,8 @@ private string repr(Expr e) {
query predicate test_taint(string arg_location, string test_res, string function_name, string repr) {
exists(Call call, Expr arg, boolean expected_taint, boolean has_taint |
call.getLocation().getFile().getShortName() = "test.py" and
// only consider files that are extracted as part of the test
exists(call.getLocation().getFile().getRelativePath()) and
(
call.getFunc().(Name).getId() = "ensure_tainted" and
expected_taint = true

View File

@@ -0,0 +1,22 @@
| test_collections.py:16 | ok | test_access | tainted_list.copy() |
| test_collections.py:24 | ok | list_clear | tainted_list |
| test_collections.py:27 | fail | list_clear | tainted_list |
| test_string.py:17 | ok | str_methods | ts.casefold() |
| test_string.py:19 | ok | str_methods | ts.format_map(..) |
| test_string.py:20 | ok | str_methods | "{unsafe}".format_map(..) |
| test_string.py:31 | fail | binary_decode_encode | base64.a85encode(..) |
| test_string.py:32 | fail | binary_decode_encode | base64.a85decode(..) |
| test_string.py:35 | fail | binary_decode_encode | base64.b85encode(..) |
| test_string.py:36 | fail | binary_decode_encode | base64.b85decode(..) |
| test_string.py:39 | fail | binary_decode_encode | base64.encodebytes(..) |
| test_string.py:40 | fail | binary_decode_encode | base64.decodebytes(..) |
| test_string.py:48 | ok | f_strings | Fstring |
| test_unpacking.py:18 | ok | extended_unpacking | first |
| test_unpacking.py:18 | ok | extended_unpacking | last |
| test_unpacking.py:18 | ok | extended_unpacking | rest |
| test_unpacking.py:23 | ok | also_allowed | a |
| test_unpacking.py:31 | ok | also_allowed | b |
| test_unpacking.py:31 | ok | also_allowed | c |
| test_unpacking.py:39 | ok | nested | x |
| test_unpacking.py:39 | ok | nested | xs |
| test_unpacking.py:39 | ok | nested | ys |

View File

@@ -0,0 +1,32 @@
# Add taintlib to PATH so it can be imported during runtime without any hassle
import sys; import os; sys.path.append(os.path.dirname(os.path.dirname((__file__))))
from taintlib import *
# This has no runtime impact, but allows autocomplete to work
from typing import TYPE_CHECKING
if TYPE_CHECKING:
from ..taintlib import *
# Actual tests
def test_access():
tainted_list = TAINTED_LIST
ensure_tainted(
tainted_list.copy(),
)
def list_clear():
tainted_string = TAINTED_STRING
tainted_list = [tainted_string]
ensure_tainted(tainted_list)
tainted_list.clear()
ensure_not_tainted(tainted_list)
# Make tests runable
test_access()
list_clear()

View File

@@ -1,20 +1,11 @@
# Python 3 specific taint tracking for string
TAINTED_STRING = "TAINTED_STRING"
TAINTED_BYTES = b"TAINTED_BYTES"
def ensure_tainted(*args):
print("- ensure_tainted")
for i, arg in enumerate(args):
print("arg {}: {!r}".format(i, arg))
def ensure_not_tainted(*args):
print("- ensure_not_tainted")
for i, arg in enumerate(args):
print("arg {}: {!r}".format(i, arg))
# Add taintlib to PATH so it can be imported during runtime without any hassle
import sys; import os; sys.path.append(os.path.dirname(os.path.dirname((__file__))))
from taintlib import *
# This has no runtime impact, but allows autocomplete to work
from typing import TYPE_CHECKING
if TYPE_CHECKING:
from ..taintlib import *
# Actual tests

View File

@@ -0,0 +1,46 @@
# Add taintlib to PATH so it can be imported during runtime without any hassle
import sys; import os; sys.path.append(os.path.dirname(os.path.dirname((__file__))))
from taintlib import *
# This has no runtime impact, but allows autocomplete to work
from typing import TYPE_CHECKING
if TYPE_CHECKING:
from ..taintlib import *
# Actual tests
# Extended Iterable Unpacking -- PEP 3132
# https://www.python.org/dev/peps/pep-3132/
def extended_unpacking():
first, *rest, last = TAINTED_LIST
ensure_tainted(first, rest, last)
def also_allowed():
*a, = TAINTED_LIST
ensure_tainted(a)
# for b, *c in [(1, 2, 3), (4, 5, 6, 7)]:
# print(c)
# i=0; c=[2,3]
# i=1; c=[5,6,7]
for b, *c in [TAINTED_LIST, TAINTED_LIST]:
ensure_tainted(b, c)
def nested():
l = TAINTED_LIST
ll = [l,l]
[[x, *xs], ys] = ll
ensure_tainted(x, xs, ys)
# Make tests runable
extended_unpacking()
also_allowed()
nested()

View File

@@ -0,0 +1,172 @@
| test_collections.py:23 | ok | test_construction | tainted_string |
| test_collections.py:24 | ok | test_construction | tainted_list |
| test_collections.py:25 | ok | test_construction | tainted_tuple |
| test_collections.py:26 | ok | test_construction | tainted_set |
| test_collections.py:27 | ok | test_construction | tainted_dict |
| test_collections.py:31 | ok | test_construction | list(..) |
| test_collections.py:32 | ok | test_construction | list(..) |
| test_collections.py:33 | ok | test_construction | list(..) |
| test_collections.py:34 | ok | test_construction | list(..) |
| test_collections.py:35 | ok | test_construction | list(..) |
| test_collections.py:37 | ok | test_construction | tuple(..) |
| test_collections.py:38 | ok | test_construction | set(..) |
| test_collections.py:39 | ok | test_construction | frozenset(..) |
| test_collections.py:47 | ok | test_access | tainted_list[0] |
| test_collections.py:48 | ok | test_access | tainted_list[x] |
| test_collections.py:49 | ok | test_access | tainted_list[Slice] |
| test_collections.py:51 | ok | test_access | sorted(..) |
| test_collections.py:52 | ok | test_access | reversed(..) |
| test_collections.py:53 | ok | test_access | iter(..) |
| test_collections.py:54 | ok | test_access | next(..) |
| test_collections.py:58 | ok | test_access | a |
| test_collections.py:58 | ok | test_access | b |
| test_collections.py:58 | ok | test_access | c |
| test_collections.py:61 | ok | test_access | h |
| test_collections.py:63 | ok | test_access | i |
| test_collections.py:70 | ok | test_dict_access | tainted_dict["name"] |
| test_collections.py:71 | ok | test_dict_access | tainted_dict.get(..) |
| test_collections.py:72 | ok | test_dict_access | tainted_dict[x] |
| test_collections.py:73 | ok | test_dict_access | tainted_dict.copy() |
| test_collections.py:77 | ok | test_dict_access | v |
| test_collections.py:79 | ok | test_dict_access | v |
| test_collections.py:87 | fail | test_named_tuple | point[0] |
| test_collections.py:88 | fail | test_named_tuple | point.x |
| test_collections.py:92 | ok | test_named_tuple | point[1] |
| test_collections.py:93 | ok | test_named_tuple | point.y |
| test_collections.py:97 | fail | test_named_tuple | a |
| test_collections.py:98 | ok | test_named_tuple | b |
| test_collections.py:106 | fail | test_defaultdict | tainted_default_dict["name"] |
| test_collections.py:107 | fail | test_defaultdict | tainted_default_dict.get(..) |
| test_collections.py:108 | fail | test_defaultdict | tainted_default_dict[x] |
| test_collections.py:109 | fail | test_defaultdict | tainted_default_dict.copy() |
| test_collections.py:112 | fail | test_defaultdict | v |
| test_collections.py:114 | fail | test_defaultdict | v |
| test_collections.py:121 | ok | test_copy_1 | copy(..) |
| test_collections.py:122 | ok | test_copy_1 | deepcopy(..) |
| test_collections.py:130 | ok | test_copy_2 | copy.copy(..) |
| test_collections.py:131 | ok | test_copy_2 | copy.deepcopy(..) |
| test_collections.py:139 | ok | list_index_assign | my_list |
| test_collections.py:142 | fail | list_index_assign | my_list |
| test_collections.py:149 | ok | list_index_aug_assign | my_list |
| test_collections.py:152 | fail | list_index_aug_assign | my_list |
| test_collections.py:159 | ok | list_append | my_list |
| test_collections.py:162 | fail | list_append | my_list |
| test_collections.py:169 | ok | list_extend | my_list |
| test_collections.py:172 | fail | list_extend | my_list |
| test_collections.py:179 | ok | dict_update_dict | my_dict |
| test_collections.py:182 | fail | dict_update_dict | my_dict |
| test_collections.py:189 | ok | dict_update_kv_list | my_dict |
| test_collections.py:192 | fail | dict_update_kv_list | my_dict |
| test_collections.py:198 | ok | dict_update_kv_arg | my_dict |
| test_collections.py:201 | fail | dict_update_kv_arg | my_dict |
| test_collections.py:208 | ok | dict_manual_update | my_dict |
| test_collections.py:212 | fail | dict_manual_update | my_dict |
| test_collections.py:220 | fail | dict_merge | merged |
| test_collections.py:227 | ok | set_add | my_set |
| test_collections.py:230 | fail | set_add | my_set |
| test_json.py:26 | ok | test | json.dumps(..) |
| test_json.py:27 | ok | test | json.loads(..) |
| test_json.py:34 | fail | test | tainted_filelike |
| test_json.py:35 | fail | test | json.load(..) |
| test_json.py:48 | fail | non_syntacical | dumps(..) |
| test_json.py:49 | fail | non_syntacical | dumps_alias(..) |
| test_json.py:50 | fail | non_syntacical | loads(..) |
| test_json.py:57 | fail | non_syntacical | tainted_filelike |
| test_json.py:58 | fail | non_syntacical | load(..) |
| test_string.py:25 | ok | str_operations | ts |
| test_string.py:26 | ok | str_operations | BinaryExpr |
| test_string.py:27 | ok | str_operations | BinaryExpr |
| test_string.py:28 | ok | str_operations | BinaryExpr |
| test_string.py:29 | ok | str_operations | ts[Slice] |
| test_string.py:30 | ok | str_operations | ts[Slice] |
| test_string.py:31 | ok | str_operations | ts[Slice] |
| test_string.py:32 | ok | str_operations | ts[0] |
| test_string.py:33 | ok | str_operations | str(..) |
| test_string.py:34 | ok | str_operations | bytes(..) |
| test_string.py:35 | ok | str_operations | unicode(..) |
| test_string.py:39 | ok | str_operations | aug_assignment |
| test_string.py:41 | ok | str_operations | aug_assignment |
| test_string.py:49 | ok | str_methods | ts.capitalize() |
| test_string.py:50 | ok | str_methods | ts.center(..) |
| test_string.py:51 | ok | str_methods | ts.expandtabs() |
| test_string.py:53 | ok | str_methods | ts.format() |
| test_string.py:54 | ok | str_methods | "{}".format(..) |
| test_string.py:55 | ok | str_methods | "{unsafe}".format(..) |
| test_string.py:57 | ok | str_methods | ts.join(..) |
| test_string.py:58 | ok | str_methods | "".join(..) |
| test_string.py:60 | ok | str_methods | ts.ljust(..) |
| test_string.py:61 | ok | str_methods | ts.lstrip() |
| test_string.py:62 | ok | str_methods | ts.lower() |
| test_string.py:64 | ok | str_methods | ts.replace(..) |
| test_string.py:65 | ok | str_methods | "safe".replace(..) |
| test_string.py:67 | ok | str_methods | ts.rjust(..) |
| test_string.py:68 | ok | str_methods | ts.rstrip() |
| test_string.py:69 | ok | str_methods | ts.strip() |
| test_string.py:70 | ok | str_methods | ts.swapcase() |
| test_string.py:71 | ok | str_methods | ts.title() |
| test_string.py:72 | ok | str_methods | ts.upper() |
| test_string.py:73 | ok | str_methods | ts.zfill(..) |
| test_string.py:75 | ok | str_methods | ts.encode(..) |
| test_string.py:76 | ok | str_methods | ts.encode(..).decode(..) |
| test_string.py:78 | ok | str_methods | tb.decode(..) |
| test_string.py:79 | ok | str_methods | tb.decode(..).encode(..) |
| test_string.py:82 | ok | str_methods | ts.partition(..) |
| test_string.py:83 | ok | str_methods | ts.rpartition(..) |
| test_string.py:84 | ok | str_methods | ts.rsplit(..) |
| test_string.py:85 | ok | str_methods | ts.split(..) |
| test_string.py:86 | ok | str_methods | ts.splitlines() |
| test_string.py:91 | ok | str_methods | "safe".replace(..) |
| test_string.py:93 | fail | str_methods | ts.join(..) |
| test_string.py:94 | fail | str_methods | ts.join(..) |
| test_string.py:104 | fail | non_syntactic | meth() |
| test_string.py:105 | fail | non_syntactic | _str(..) |
| test_string.py:114 | ok | percent_fmt | BinaryExpr |
| test_string.py:115 | ok | percent_fmt | BinaryExpr |
| test_string.py:116 | ok | percent_fmt | BinaryExpr |
| test_string.py:126 | fail | binary_decode_encode | base64.b64encode(..) |
| test_string.py:127 | fail | binary_decode_encode | base64.b64decode(..) |
| test_string.py:129 | fail | binary_decode_encode | base64.standard_b64encode(..) |
| test_string.py:130 | fail | binary_decode_encode | base64.standard_b64decode(..) |
| test_string.py:132 | fail | binary_decode_encode | base64.urlsafe_b64encode(..) |
| test_string.py:133 | fail | binary_decode_encode | base64.urlsafe_b64decode(..) |
| test_string.py:135 | fail | binary_decode_encode | base64.b32encode(..) |
| test_string.py:136 | fail | binary_decode_encode | base64.b32decode(..) |
| test_string.py:138 | fail | binary_decode_encode | base64.b16encode(..) |
| test_string.py:139 | fail | binary_decode_encode | base64.b16decode(..) |
| test_string.py:142 | fail | binary_decode_encode | base64.encodestring(..) |
| test_string.py:143 | fail | binary_decode_encode | base64.decodestring(..) |
| test_string.py:148 | fail | binary_decode_encode | quopri.encodestring(..) |
| test_string.py:149 | fail | binary_decode_encode | quopri.decodestring(..) |
| test_unpacking.py:16 | ok | unpacking | a |
| test_unpacking.py:16 | ok | unpacking | b |
| test_unpacking.py:16 | ok | unpacking | c |
| test_unpacking.py:22 | ok | unpacking_to_list | a |
| test_unpacking.py:22 | ok | unpacking_to_list | b |
| test_unpacking.py:22 | ok | unpacking_to_list | c |
| test_unpacking.py:31 | ok | nested | a1 |
| test_unpacking.py:31 | ok | nested | a2 |
| test_unpacking.py:31 | ok | nested | a3 |
| test_unpacking.py:31 | ok | nested | b |
| test_unpacking.py:31 | ok | nested | c |
| test_unpacking.py:35 | ok | nested | a1 |
| test_unpacking.py:35 | ok | nested | a2 |
| test_unpacking.py:35 | ok | nested | a3 |
| test_unpacking.py:35 | ok | nested | b |
| test_unpacking.py:35 | ok | nested | c |
| test_unpacking.py:39 | ok | nested | a1 |
| test_unpacking.py:39 | ok | nested | a2 |
| test_unpacking.py:39 | ok | nested | a3 |
| test_unpacking.py:39 | ok | nested | b |
| test_unpacking.py:39 | ok | nested | c |
| test_unpacking.py:46 | ok | unpack_from_set | a |
| test_unpacking.py:46 | ok | unpack_from_set | b |
| test_unpacking.py:46 | ok | unpack_from_set | c |
| test_unpacking.py:55 | ok | contrived_1 | a |
| test_unpacking.py:55 | ok | contrived_1 | b |
| test_unpacking.py:55 | ok | contrived_1 | c |
| test_unpacking.py:56 | fail | contrived_1 | d |
| test_unpacking.py:56 | fail | contrived_1 | e |
| test_unpacking.py:56 | fail | contrived_1 | f |
| test_unpacking.py:65 | ok | contrived_2 | a |
| test_unpacking.py:65 | ok | contrived_2 | b |
| test_unpacking.py:65 | ok | contrived_2 | c |

View File

@@ -0,0 +1,254 @@
# Add taintlib to PATH so it can be imported during runtime without any hassle
import sys; import os; sys.path.append(os.path.dirname(os.path.dirname((__file__))))
from taintlib import *
# This has no runtime impact, but allows autocomplete to work
from typing import TYPE_CHECKING
if TYPE_CHECKING:
from ..taintlib import *
# Actual tests
from collections import defaultdict, namedtuple
def test_construction():
tainted_string = TAINTED_STRING
tainted_list = [tainted_string]
tainted_tuple = (tainted_string,)
tainted_set = {tainted_string}
tainted_dict = {'key': tainted_string}
ensure_tainted(
tainted_string,
tainted_list,
tainted_tuple,
tainted_set,
tainted_dict,
)
ensure_tainted(
list(tainted_list),
list(tainted_tuple),
list(tainted_set),
list(tainted_dict.values()),
list(tainted_dict.items()),
tuple(tainted_list),
set(tainted_list),
frozenset(tainted_list),
)
def test_access(x, y, z):
tainted_list = TAINTED_LIST
ensure_tainted(
tainted_list[0],
tainted_list[x],
tainted_list[y:z],
sorted(tainted_list),
reversed(tainted_list),
iter(tainted_list),
next(iter(tainted_list)),
)
a, b, c = tainted_list[0:3]
ensure_tainted(a, b, c)
for h in tainted_list:
ensure_tainted(h)
for i in reversed(tainted_list):
ensure_tainted(i)
def test_dict_access(x):
tainted_dict = TAINTED_DICT
ensure_tainted(
tainted_dict["name"],
tainted_dict.get("name"),
tainted_dict[x],
tainted_dict.copy(),
)
for v in tainted_dict.values():
ensure_tainted(v)
for k, v in tainted_dict.items():
ensure_tainted(v)
def test_named_tuple(): # TODO: namedtuple currently not handled
Point = namedtuple('Point', ['x', 'y'])
point = Point(TAINTED_STRING, 'safe')
ensure_tainted(
point[0],
point.x,
)
ensure_not_tainted(
point[1],
point.y,
)
a, b = point
ensure_tainted(a)
ensure_not_tainted(b)
def test_defaultdict(key, x): # TODO: defaultdict currently not handled
tainted_default_dict = defaultdict(str)
tainted_default_dict[key] += TAINTED_STRING
ensure_tainted(
tainted_default_dict["name"],
tainted_default_dict.get("name"),
tainted_default_dict[x],
tainted_default_dict.copy(),
)
for v in tainted_default_dict.values():
ensure_tainted(v)
for k, v in tainted_default_dict.items():
ensure_tainted(v)
def test_copy_1():
from copy import copy, deepcopy
ensure_tainted(
copy(TAINTED_LIST),
deepcopy(TAINTED_LIST),
)
def test_copy_2():
import copy
ensure_tainted(
copy.copy(TAINTED_LIST),
copy.deepcopy(TAINTED_LIST),
)
def list_index_assign():
tainted_string = TAINTED_STRING
my_list = ["safe"]
ensure_not_tainted(my_list)
my_list[0] = tainted_string
ensure_tainted(my_list)
def list_index_aug_assign():
tainted_string = TAINTED_STRING
my_list = ["safe"]
ensure_not_tainted(my_list)
my_list[0] += tainted_string
ensure_tainted(my_list)
def list_append():
tainted_string = TAINTED_STRING
my_list = ["safe"]
ensure_not_tainted(my_list)
my_list.append(tainted_string)
ensure_tainted(my_list)
def list_extend():
my_list = ["safe"]
tainted_list = [TAINTED_STRING]
ensure_not_tainted(my_list)
my_list.extend(tainted_list)
ensure_tainted(my_list)
def dict_update_dict():
my_dict = {"key1": "safe"}
tainted_dict = {"key2": TAINTED_STRING}
ensure_not_tainted(my_dict)
my_dict.update(tainted_dict)
ensure_tainted(my_dict)
def dict_update_kv_list():
my_dict = {"key1": "safe"}
tainted_kv_list = [("key2", TAINTED_STRING)]
ensure_not_tainted(my_dict)
my_dict.update(tainted_kv_list)
ensure_tainted(my_dict)
def dict_update_kv_arg():
my_dict = {"key1": "safe"}
ensure_not_tainted(my_dict)
my_dict.update(key2=TAINTED_STRING)
ensure_tainted(my_dict)
def dict_manual_update():
my_dict = {"key1": "safe"}
tainted_dict = {"key2": TAINTED_STRING}
ensure_not_tainted(my_dict)
for k in tainted_dict:
my_dict[k] = tainted_dict[k]
ensure_tainted(my_dict)
def dict_merge():
my_dict = {"key1": "safe"}
tainted_dict = {"key2": TAINTED_STRING}
merged = {**my_dict, **tainted_dict}
ensure_tainted(merged)
def set_add():
tainted_string = TAINTED_STRING
my_set = {"safe"}
ensure_not_tainted(my_set)
my_set.add(tainted_string)
ensure_tainted(my_set)
# Make tests runable
test_construction()
test_access(0, 0, 2)
test_dict_access("name")
test_named_tuple()
test_defaultdict("key", "key")
test_copy_1()
test_copy_2()
list_index_assign()
list_index_aug_assign()
list_append()
list_extend()
dict_update_dict()
dict_update_kv_list()
dict_update_kv_arg()
dict_manual_update()
dict_merge()
set_add()

View File

@@ -0,0 +1,64 @@
# Add taintlib to PATH so it can be imported during runtime without any hassle
import sys; import os; sys.path.append(os.path.dirname(os.path.dirname((__file__))))
from taintlib import *
# This has no runtime impact, but allows autocomplete to work
from typing import TYPE_CHECKING
if TYPE_CHECKING:
from ..taintlib import *
# Actual tests
from io import StringIO
# Workaround for Python3 not having unicode
import sys
if sys.version_info[0] == 3:
unicode = str
def test():
print("\n# test")
ts = TAINTED_STRING
import json
ensure_tainted(
json.dumps(ts),
json.loads(json.dumps(ts)),
)
# For Python2, need to convert to unicode for StringIO to work
tainted_filelike = StringIO(unicode(json.dumps(ts)))
ensure_tainted(
tainted_filelike,
json.load(tainted_filelike),
)
def non_syntacical():
print("\n# non_syntacical")
ts = TAINTED_STRING
# a less syntactical approach
from json import load, loads, dumps
dumps_alias = dumps
ensure_tainted(
dumps(ts),
dumps_alias(ts),
loads(dumps(ts)),
)
# For Python2, need to convert to unicode for StringIO to work
tainted_filelike = StringIO(unicode(dumps(ts)))
ensure_tainted(
tainted_filelike,
load(tainted_filelike),
)
# Make tests runable
test()
non_syntacical()

View File

@@ -1,27 +1,20 @@
import sys
# Add taintlib to PATH so it can be imported during runtime without any hassle
import sys; import os; sys.path.append(os.path.dirname(os.path.dirname((__file__))))
from taintlib import *
if sys.version_info[0] == 3:
unicode = str
TAINTED_STRING = "TAINTED_STRING"
TAINTED_BYTES = b"TAINTED_BYTES"
def ensure_tainted(*args):
print("- ensure_tainted")
for i, arg in enumerate(args):
print("arg {}: {!r}".format(i, arg))
def ensure_not_tainted(*args):
print("- ensure_not_tainted")
for i, arg in enumerate(args):
print("arg {}: {!r}".format(i, arg))
# This has no runtime impact, but allows autocomplete to work
from typing import TYPE_CHECKING
if TYPE_CHECKING:
from ..taintlib import *
# Actual tests
# Workaround for Python3 not having unicode
import sys
if sys.version_info[0] == 3:
unicode = str
def str_operations():
print("\n# str_operations")
@@ -42,6 +35,11 @@ def str_operations():
unicode(ts),
)
aug_assignment = "safe"
ensure_not_tainted(aug_assignment)
aug_assignment += TAINTED_STRING
ensure_tainted(aug_assignment)
def str_methods():
print("\n# str_methods")
@@ -140,18 +138,6 @@ def binary_decode_encode():
base64.b16encode(tb),
base64.b16decode(base64.b16encode(tb)),
# # New in Python 3.4
# base64.a85encode(tb),
# base64.a85decode(base64.a85encode(tb)),
# # New in Python 3.4
# base64.b85encode(tb),
# base64.b85decode(base64.b85encode(tb)),
# # New in Python 3.1
# base64.encodebytes(tb),
# base64.decodebytes(base64.encodebytes(tb)),
# deprecated since Python 3.1, but still works
base64.encodestring(tb),
base64.decodestring(base64.encodestring(tb)),

View File

@@ -0,0 +1,75 @@
# Add taintlib to PATH so it can be imported during runtime without any hassle
import sys; import os; sys.path.append(os.path.dirname(os.path.dirname((__file__))))
from taintlib import *
# This has no runtime impact, but allows autocomplete to work
from typing import TYPE_CHECKING
if TYPE_CHECKING:
from ..taintlib import *
# Actual tests
def unpacking():
l = TAINTED_LIST[0:3]
a, b, c = l
ensure_tainted(a, b, c)
def unpacking_to_list():
l = TAINTED_LIST[0:3]
[a, b, c] = l
ensure_tainted(a, b, c)
def nested():
l = TAINTED_LIST[0:3]
ll = [l, l, l]
# list
[[a1, a2, a3], b, c] = ll
ensure_tainted(a1, a2, a3, b, c)
# tuple
((a1, a2, a3), b, c) = ll
ensure_tainted(a1, a2, a3, b, c)
# mixed
[(a1, a2, a3), b, c] = ll
ensure_tainted(a1, a2, a3, b, c)
def unpack_from_set():
# no guarantee on ordering ... don't know why you would ever do this
a, b, c = {"foo", "bar", TAINTED_STRING}
# either all should be tainted, or none of them
ensure_tainted(a, b, c)
def contrived_1():
# A contrived example. Don't know why anyone would ever actually do this.
tainted_list = TAINTED_LIST[0:3]
no_taint_list = [1,2,3]
(a, b, c), (d, e, f) = tainted_list, no_taint_list
ensure_tainted(a, b, c)
ensure_not_tainted(d, e, f) # FP: we mark `d`, `e` and `f` as tainted.
def contrived_2():
# A contrived example. Don't know why anyone would ever actually do this.
# Old taint tracking was only able to handle taint nested 2 levels in sequences,
# so would not mark a, b, c as tainted
[[[ (a, b, c) ]]] = [[[ TAINTED_LIST[0:3] ]]]
ensure_tainted(a, b, c)
# Make tests runable
unpacking()
unpacking_to_list()
nested()
unpack_from_set()
contrived_1()
contrived_2()

View File

@@ -1,10 +0,0 @@
| test.py:26 | ok | str_methods | ts.casefold() |
| test.py:28 | ok | str_methods | ts.format_map(..) |
| test.py:29 | fail | str_methods | "{unsafe}".format_map(..) |
| test.py:40 | fail | binary_decode_encode | base64.a85encode(..) |
| test.py:41 | fail | binary_decode_encode | base64.a85decode(..) |
| test.py:44 | fail | binary_decode_encode | base64.b85encode(..) |
| test.py:45 | fail | binary_decode_encode | base64.b85decode(..) |
| test.py:48 | fail | binary_decode_encode | base64.encodebytes(..) |
| test.py:49 | fail | binary_decode_encode | base64.decodebytes(..) |
| test.py:57 | ok | f_strings | Fstring |

View File

@@ -1,62 +0,0 @@
| test.py:32 | ok | str_operations | ts |
| test.py:33 | ok | str_operations | BinaryExpr |
| test.py:34 | ok | str_operations | BinaryExpr |
| test.py:35 | ok | str_operations | BinaryExpr |
| test.py:36 | ok | str_operations | ts[Slice] |
| test.py:37 | ok | str_operations | ts[Slice] |
| test.py:38 | ok | str_operations | ts[Slice] |
| test.py:39 | ok | str_operations | ts[0] |
| test.py:40 | ok | str_operations | str(..) |
| test.py:41 | ok | str_operations | bytes(..) |
| test.py:42 | ok | str_operations | unicode(..) |
| test.py:51 | ok | str_methods | ts.capitalize() |
| test.py:52 | ok | str_methods | ts.center(..) |
| test.py:53 | ok | str_methods | ts.expandtabs() |
| test.py:55 | ok | str_methods | ts.format() |
| test.py:56 | ok | str_methods | "{}".format(..) |
| test.py:57 | ok | str_methods | "{unsafe}".format(..) |
| test.py:59 | ok | str_methods | ts.join(..) |
| test.py:60 | fail | str_methods | "".join(..) |
| test.py:62 | ok | str_methods | ts.ljust(..) |
| test.py:63 | ok | str_methods | ts.lstrip() |
| test.py:64 | ok | str_methods | ts.lower() |
| test.py:66 | ok | str_methods | ts.replace(..) |
| test.py:67 | ok | str_methods | "safe".replace(..) |
| test.py:69 | ok | str_methods | ts.rjust(..) |
| test.py:70 | ok | str_methods | ts.rstrip() |
| test.py:71 | ok | str_methods | ts.strip() |
| test.py:72 | ok | str_methods | ts.swapcase() |
| test.py:73 | ok | str_methods | ts.title() |
| test.py:74 | ok | str_methods | ts.upper() |
| test.py:75 | ok | str_methods | ts.zfill(..) |
| test.py:77 | ok | str_methods | ts.encode(..) |
| test.py:78 | ok | str_methods | ts.encode(..).decode(..) |
| test.py:80 | ok | str_methods | tb.decode(..) |
| test.py:81 | ok | str_methods | tb.decode(..).encode(..) |
| test.py:84 | ok | str_methods | ts.partition(..) |
| test.py:85 | ok | str_methods | ts.rpartition(..) |
| test.py:86 | ok | str_methods | ts.rsplit(..) |
| test.py:87 | ok | str_methods | ts.split(..) |
| test.py:88 | ok | str_methods | ts.splitlines() |
| test.py:93 | ok | str_methods | "safe".replace(..) |
| test.py:95 | fail | str_methods | ts.join(..) |
| test.py:96 | fail | str_methods | ts.join(..) |
| test.py:106 | fail | non_syntactic | meth() |
| test.py:107 | fail | non_syntactic | _str(..) |
| test.py:116 | ok | percent_fmt | BinaryExpr |
| test.py:117 | ok | percent_fmt | BinaryExpr |
| test.py:118 | fail | percent_fmt | BinaryExpr |
| test.py:128 | fail | binary_decode_encode | base64.b64encode(..) |
| test.py:129 | fail | binary_decode_encode | base64.b64decode(..) |
| test.py:131 | fail | binary_decode_encode | base64.standard_b64encode(..) |
| test.py:132 | fail | binary_decode_encode | base64.standard_b64decode(..) |
| test.py:134 | fail | binary_decode_encode | base64.urlsafe_b64encode(..) |
| test.py:135 | fail | binary_decode_encode | base64.urlsafe_b64decode(..) |
| test.py:137 | fail | binary_decode_encode | base64.b32encode(..) |
| test.py:138 | fail | binary_decode_encode | base64.b32decode(..) |
| test.py:140 | fail | binary_decode_encode | base64.b16encode(..) |
| test.py:141 | fail | binary_decode_encode | base64.b16decode(..) |
| test.py:156 | fail | binary_decode_encode | base64.encodestring(..) |
| test.py:157 | fail | binary_decode_encode | base64.decodestring(..) |
| test.py:162 | fail | binary_decode_encode | quopri.encodestring(..) |
| test.py:163 | fail | binary_decode_encode | quopri.decodestring(..) |

View File

@@ -0,0 +1,15 @@
TAINTED_STRING = "TAINTED_STRING"
TAINTED_BYTES = b"TAINTED_BYTES"
TAINTED_LIST = ["tainted-{}".format(i) for i in range(5)]
TAINTED_DICT = {"name": TAINTED_STRING, "some key": "foo"}
def ensure_tainted(*args):
print("- ensure_tainted")
for i, arg in enumerate(args):
print("arg {}: {!r}".format(i, arg))
def ensure_not_tainted(*args):
print("- ensure_not_tainted")
for i, arg in enumerate(args):
print("arg {}: {!r}".format(i, arg))

View File

@@ -20,6 +20,7 @@
* complex | `42j` (not supported yet)
*/
private import python
import experimental.dataflow.DataFlow
class TestConfiguration extends DataFlow::Configuration {

View File

@@ -0,0 +1,31 @@
class SomeClass:
pass
def simple_read_write():
x = SomeClass() # $tracked=foo
x.foo = tracked # $tracked $tracked=foo
y = x.foo # $tracked=foo $tracked
do_stuff(y) # $tracked
def foo():
x = SomeClass() # $tracked=attr
bar(x) # $tracked=attr
x.attr = tracked # $tracked=attr $tracked
baz(x) # $tracked=attr
def bar(x): # $tracked=attr
z = x.attr # $tracked $tracked=attr
do_stuff(z) # $tracked
def expects_int(x): # $int=field $f+:str=field
do_int_stuff(x.field) # $int $f+:str $int=field $f+:str=field
def expects_string(x): # $f+:int=field $str=field
do_string_stuff(x.field) # $f+:int $str $f+:int=field $str=field
def test_incompatible_types():
x = SomeClass() # $int,str=field
x.field = int(5) # $int=field $f+:str=field $int $f+:str
expects_int(x) # $int=field $f+:str=field
x.field = str("Hello") # $f+:int=field $str=field $f+:int $str
expects_string(x) # $f+:int=field $str=field

View File

@@ -0,0 +1,61 @@
def get_tracked():
x = tracked # $tracked
return x # $tracked
def use_tracked_foo(x): # $tracked
do_stuff(x) # $tracked
def foo():
use_tracked_foo(
get_tracked() # $tracked
)
def use_tracked_bar(x): # $tracked
do_stuff(x) # $tracked
def bar():
x = get_tracked() # $tracked
use_tracked_bar(x) # $tracked
def use_tracked_baz(x): # $tracked
do_stuff(x) # $tracked
def baz():
x = tracked # $tracked
use_tracked_baz(x) # $tracked
def id(x): # $tracked
return x # $tracked
def use_tracked_quux(x): # $f-:tracked
do_stuff(y) # call after return -- not tracked in here.
def quux():
x = tracked # $tracked
y = id(x) # $tracked
use_tracked_quux(y) # not tracked out of call to id.
g = None
def write_g(x): # $tracked
g = x # $tracked
def use_g():
do_stuff(g) # $f-:tracked // no global flow for now.
def global_var_write_test():
x = tracked # $tracked
write_g(x) # $tracked
use_g()
def expects_int(x): # $int
do_int_stuff(x) # $int
def expects_string(x): # $str
do_string_stuff(x) # $str
def redefine_test():
x = int(5) # $int
expects_int(x) # $int
x = str("Hello") # $str
expects_string(x) # $str

View File

@@ -0,0 +1,72 @@
import python
import experimental.dataflow.TypeTracker
import TestUtilities.InlineExpectationsTest
Node tracked(TypeTracker t) {
t.start() and
result.asCfgNode() = any(NameNode n | n.getId() = "tracked")
or
exists(TypeTracker t2 | result = tracked(t2).track(t2, t))
}
class TrackedTest extends InlineExpectationsTest {
TrackedTest() { this = "TrackedTest" }
override string getARelevantTag() { result = "tracked" }
override predicate hasActualResult(Location location, string element, string tag, string value) {
exists(Node e, TypeTracker t |
e = tracked(t) and
tag = "tracked" and
location = e.getLocation() and
value = t.getAttr() and
element = e.toString()
)
}
}
Node int_type(TypeTracker t) {
t.start() and
result.asCfgNode() = any(CallNode c | c.getFunction().(NameNode).getId() = "int")
or
exists(TypeTracker t2 | result = int_type(t2).track(t2, t))
}
Node string_type(TypeTracker t) {
t.start() and
result.asCfgNode() = any(CallNode c | c.getFunction().(NameNode).getId() = "str")
or
exists(TypeTracker t2 | result = string_type(t2).track(t2, t))
}
class TrackedIntTest extends InlineExpectationsTest {
TrackedIntTest() { this = "TrackedIntTest" }
override string getARelevantTag() { result = "int" }
override predicate hasActualResult(Location location, string element, string tag, string value) {
exists(Node e, TypeTracker t |
e = int_type(t) and
tag = "int" and
location = e.getLocation() and
value = t.getAttr() and
element = e.toString()
)
}
}
class TrackedStringTest extends InlineExpectationsTest {
TrackedStringTest() { this = "TrackedStringTest" }
override string getARelevantTag() { result = "str" }
override predicate hasActualResult(Location location, string element, string tag, string value) {
exists(Node e, TypeTracker t |
e = string_type(t) and
tag = "str" and
location = e.getLocation() and
value = t.getAttr() and
element = e.toString()
)
}
}

View File

@@ -0,0 +1 @@
experimental/Security/CWE-074/TemplateInjection.ql

View File

@@ -0,0 +1 @@
semmle-extractor-options: --max-import-depth=3 -p ../../../../query-tests/Security/lib/

View File

@@ -0,0 +1 @@
experimental/Security/CWE-091/Xslt.ql

View File

@@ -0,0 +1 @@
semmle-extractor-options: --max-import-depth=3 -p ../../../../query-tests/Security/lib/

View File

@@ -0,0 +1 @@
semmle-extractor-options: --max-import-depth=3 -p ../../../../query-tests/Security/lib/

View File

@@ -0,0 +1 @@
experimental/Security/CWE-643/xpath.ql

View File

@@ -0,0 +1 @@
semmle-extractor-options: --max-import-depth=1