mirror of
https://github.com/github/codeql.git
synced 2026-05-01 03:35:13 +02:00
Merge branch 'main' into python-more-complete-dataflow-tests
This commit is contained in:
12
python/ql/src/Lexical/CommentedOutCodeMetricOverview.qhelp
Normal file
12
python/ql/src/Lexical/CommentedOutCodeMetricOverview.qhelp
Normal file
@@ -0,0 +1,12 @@
|
||||
<!DOCTYPE qhelp PUBLIC
|
||||
"-//Semmle//qhelp//EN"
|
||||
"qhelp.dtd">
|
||||
<qhelp>
|
||||
<overview>
|
||||
<p>
|
||||
This metric counts the number of lines of commented-out code in each file. Large amounts of
|
||||
commented-out code often indicate poorly maintained code.
|
||||
</p>
|
||||
|
||||
</overview>
|
||||
</qhelp>
|
||||
25
python/ql/src/Lexical/CommentedOutCodeQuery.qhelp
Normal file
25
python/ql/src/Lexical/CommentedOutCodeQuery.qhelp
Normal file
@@ -0,0 +1,25 @@
|
||||
<!DOCTYPE qhelp PUBLIC
|
||||
"-//Semmle//qhelp//EN"
|
||||
"qhelp.dtd">
|
||||
<qhelp>
|
||||
|
||||
<overview>
|
||||
<p>
|
||||
Commented-out code is distracting and confusing for developers who read the surrounding code,
|
||||
and its significance is often unclear. It will not get compiled or tested when the code around
|
||||
it changes, so it's likely to break over time. For these reasons, commented-out code should be
|
||||
avoided.
|
||||
</p>
|
||||
|
||||
</overview>
|
||||
|
||||
<recommendation>
|
||||
|
||||
<p>
|
||||
Remove or reinstate the commented-out code. If you want to include a snippet of example code
|
||||
in a comment, consider enclosing it in quotes or marking it up as appropriate for the source
|
||||
language.
|
||||
</p>
|
||||
|
||||
</recommendation>
|
||||
</qhelp>
|
||||
12
python/ql/src/Lexical/CommentedOutCodeReferences.qhelp
Normal file
12
python/ql/src/Lexical/CommentedOutCodeReferences.qhelp
Normal file
@@ -0,0 +1,12 @@
|
||||
<!DOCTYPE qhelp PUBLIC
|
||||
"-//Semmle//qhelp//EN"
|
||||
"qhelp.dtd">
|
||||
<qhelp>
|
||||
<references>
|
||||
|
||||
<li>Mark Needham: <a href="http://www.markhneedham.com/blog/2009/01/17/the-danger-of-commenting-out-code/">The danger of commenting out code</a>.</li>
|
||||
<li>Los Techies: <a href="http://lostechies.com/rodpaddock/2010/12/29/commented-code-technical-debt">Commented Code == Technical Debt</a>.</li>
|
||||
<li>High Integrity C++ Coding Standard: <a href="http://www.codingstandard.com/rule/2-3-2-do-not-comment-out-code/">2.3.2 Do not comment out code</a>.</li>
|
||||
|
||||
</references>
|
||||
</qhelp>
|
||||
@@ -12,6 +12,5 @@ a poorly designed or hastily written code base, which typically suffers from oth
|
||||
problems as well.
|
||||
</p>
|
||||
|
||||
|
||||
</overview>
|
||||
</qhelp>
|
||||
|
||||
35
python/ql/src/Metrics/FLinesOfDuplicatedCodeCommon.qhelp
Normal file
35
python/ql/src/Metrics/FLinesOfDuplicatedCodeCommon.qhelp
Normal file
@@ -0,0 +1,35 @@
|
||||
<!DOCTYPE qhelp PUBLIC
|
||||
"-//Semmle//qhelp//EN"
|
||||
"qhelp.dtd">
|
||||
<qhelp>
|
||||
<overview>
|
||||
|
||||
<p>
|
||||
This metric measures the number of lines in a file that are contained within a block that is duplicated elsewhere. These lines may include code, comments and whitespace, and the duplicate block may be in this file or in another file.
|
||||
</p>
|
||||
|
||||
<p>
|
||||
A file that contains many lines that are duplicated within the code base is problematic
|
||||
for a number of reasons.
|
||||
</p>
|
||||
|
||||
</overview>
|
||||
<include src="DuplicationProblems.qhelp" />
|
||||
|
||||
<recommendation>
|
||||
|
||||
<p>
|
||||
Refactor files with lots of duplicated code to extract the common code into
|
||||
a shared library or module.
|
||||
</p>
|
||||
|
||||
</recommendation>
|
||||
<references>
|
||||
|
||||
|
||||
<li>Wikipedia: <a href="http://en.wikipedia.org/wiki/Duplicate_code">Duplicate code</a>.</li>
|
||||
<li>M. Fowler, <em>Refactoring</em>. Addison-Wesley, 1999.</li>
|
||||
|
||||
|
||||
</references>
|
||||
</qhelp>
|
||||
@@ -15,7 +15,7 @@
|
||||
* `DataFlow::localFlowStep` with arguments of type `DataFlow::Node`.
|
||||
*/
|
||||
|
||||
import python
|
||||
private import python
|
||||
|
||||
/**
|
||||
* Provides classes for performing local (intra-procedural) and
|
||||
|
||||
@@ -15,7 +15,7 @@
|
||||
* `DataFlow::localFlowStep` with arguments of type `DataFlow::Node`.
|
||||
*/
|
||||
|
||||
import python
|
||||
private import python
|
||||
|
||||
/**
|
||||
* Provides classes for performing local (intra-procedural) and
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
* `TaintTracking::localTaintStep` with arguments of type `DataFlow::Node`.
|
||||
*/
|
||||
|
||||
import python
|
||||
private import python
|
||||
|
||||
/**
|
||||
* Provides classes for performing local (intra-procedural) and
|
||||
|
||||
282
python/ql/src/experimental/dataflow/TypeTracker.qll
Normal file
282
python/ql/src/experimental/dataflow/TypeTracker.qll
Normal file
@@ -0,0 +1,282 @@
|
||||
/** Step Summaries and Type Tracking */
|
||||
|
||||
import python
|
||||
import internal.DataFlowPublic
|
||||
import internal.DataFlowPrivate
|
||||
|
||||
/** Any string that may appear as the name of an attribute or access path. */
|
||||
class AttributeName extends string {
|
||||
AttributeName() { this = any(Attribute a).getName() }
|
||||
}
|
||||
|
||||
/** Either an attribute name, or the empty string (representing no attribute). */
|
||||
class OptionalAttributeName extends string {
|
||||
OptionalAttributeName() { this instanceof AttributeName or this = "" }
|
||||
}
|
||||
|
||||
/**
|
||||
* A description of a step on an inter-procedural data flow path.
|
||||
*/
|
||||
private newtype TStepSummary =
|
||||
LevelStep() or
|
||||
CallStep() or
|
||||
ReturnStep() or
|
||||
StoreStep(AttributeName attr) or
|
||||
LoadStep(AttributeName attr)
|
||||
|
||||
/**
|
||||
* INTERNAL: Use `TypeTracker` or `TypeBackTracker` instead.
|
||||
*
|
||||
* A description of a step on an inter-procedural data flow path.
|
||||
*/
|
||||
class StepSummary extends TStepSummary {
|
||||
/** Gets a textual representation of this step summary. */
|
||||
string toString() {
|
||||
this instanceof LevelStep and result = "level"
|
||||
or
|
||||
this instanceof CallStep and result = "call"
|
||||
or
|
||||
this instanceof ReturnStep and result = "return"
|
||||
or
|
||||
exists(string attr | this = StoreStep(attr) | result = "store " + attr)
|
||||
or
|
||||
exists(string attr | this = LoadStep(attr) | result = "load " + attr)
|
||||
}
|
||||
}
|
||||
|
||||
module StepSummary {
|
||||
cached
|
||||
predicate step(Node nodeFrom, Node nodeTo, StepSummary summary) {
|
||||
exists(Node mid | EssaFlow::essaFlowStep*(nodeFrom, mid) and smallstep(mid, nodeTo, summary))
|
||||
}
|
||||
|
||||
predicate smallstep(Node nodeFrom, Node nodeTo, StepSummary summary) {
|
||||
EssaFlow::essaFlowStep(nodeFrom, nodeTo) and
|
||||
summary = LevelStep()
|
||||
or
|
||||
callStep(nodeFrom, nodeTo) and summary = CallStep()
|
||||
or
|
||||
returnStep(nodeFrom, nodeTo) and
|
||||
summary = ReturnStep()
|
||||
or
|
||||
exists(string attr |
|
||||
basicStoreStep(nodeFrom, nodeTo, attr) and
|
||||
summary = StoreStep(attr)
|
||||
or
|
||||
basicLoadStep(nodeFrom, nodeTo, attr) and summary = LoadStep(attr)
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/** Holds if `nodeFrom` steps to `nodeTo` by being passed as a parameter in a call. */
|
||||
predicate callStep(ArgumentNode nodeFrom, ParameterNode nodeTo) {
|
||||
// TODO: Support special methods?
|
||||
exists(DataFlowCall call, int i |
|
||||
nodeFrom.argumentOf(call, i) and nodeTo.isParameterOf(call.getCallable(), i)
|
||||
)
|
||||
}
|
||||
|
||||
/** Holds if `nodeFrom` steps to `nodeTo` by being returned from a call. */
|
||||
predicate returnStep(ReturnNode nodeFrom, Node nodeTo) {
|
||||
exists(DataFlowCall call |
|
||||
nodeFrom.getEnclosingCallable() = call.getCallable() and nodeTo.asCfgNode() = call.getNode()
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `nodeFrom` is being written to the `attr` attribute of the object in `nodeTo`.
|
||||
*
|
||||
* Note that the choice of `nodeTo` does not have to make sense "chronologically".
|
||||
* All we care about is whether the `attr` attribute of `nodeTo` can have a specific type,
|
||||
* and the assumption is that if a specific type appears here, then any access of that
|
||||
* particular attribute can yield something of that particular type.
|
||||
*
|
||||
* Thus, in an example such as
|
||||
*
|
||||
* ```python
|
||||
* def foo(y):
|
||||
* x = Foo()
|
||||
* bar(x)
|
||||
* x.attr = y
|
||||
* baz(x)
|
||||
*
|
||||
* def bar(x):
|
||||
* z = x.attr
|
||||
* ```
|
||||
* for the attribute write `x.attr = y`, we will have `attr` being the literal string `"attr"`,
|
||||
* `nodeFrom` will be `y`, and `nodeTo` will be the object `Foo()` created on the first line of the
|
||||
* function. This means we will track the fact that `x.attr` can have the type of `y` into the
|
||||
* assignment to `z` inside `bar`, even though this attribute write happens _after_ `bar` is called.
|
||||
*/
|
||||
predicate basicStoreStep(Node nodeFrom, Node nodeTo, string attr) {
|
||||
exists(AttributeAssignment a, Node var |
|
||||
a.getName() = attr and
|
||||
EssaFlow::essaFlowStep*(nodeTo, var) and
|
||||
var.asVar() = a.getInput() and
|
||||
nodeFrom.asCfgNode() = a.getValue()
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `nodeTo` is the result of accessing the `attr` attribute of `nodeFrom`.
|
||||
*/
|
||||
predicate basicLoadStep(Node nodeFrom, Node nodeTo, string attr) {
|
||||
exists(AttrNode s | nodeTo.asCfgNode() = s and s.getObject(attr) = nodeFrom.asCfgNode())
|
||||
}
|
||||
|
||||
/**
|
||||
* A utility class that is equivalent to `boolean` but does not require type joining.
|
||||
*/
|
||||
private class Boolean extends boolean {
|
||||
Boolean() { this = true or this = false }
|
||||
}
|
||||
|
||||
private newtype TTypeTracker = MkTypeTracker(Boolean hasCall, OptionalAttributeName attr)
|
||||
|
||||
/**
|
||||
* Summary of the steps needed to track a value to a given dataflow node.
|
||||
*
|
||||
* This can be used to track objects that implement a certain API in order to
|
||||
* recognize calls to that API. Note that type-tracking does not by itself provide a
|
||||
* source/sink relation, that is, it may determine that a node has a given type,
|
||||
* but it won't determine where that type came from.
|
||||
*
|
||||
* It is recommended that all uses of this type are written in the following form,
|
||||
* for tracking some type `myType`:
|
||||
* ```
|
||||
* Node myType(DataFlow::TypeTracker t) {
|
||||
* t.start() and
|
||||
* result = < source of myType >
|
||||
* or
|
||||
* exists (TypeTracker t2 |
|
||||
* result = myType(t2).track(t2, t)
|
||||
* )
|
||||
* }
|
||||
*
|
||||
* DataFlow::SourceNode myType() { result = myType(DataFlow::TypeTracker::end()) }
|
||||
* ```
|
||||
*
|
||||
* Instead of `result = myType(t2).track(t2, t)`, you can also use the equivalent
|
||||
* `t = t2.step(myType(t2), result)`. If you additionally want to track individual
|
||||
* intra-procedural steps, use `t = t2.smallstep(myCallback(t2), result)`.
|
||||
*/
|
||||
class TypeTracker extends TTypeTracker {
|
||||
Boolean hasCall;
|
||||
OptionalAttributeName attr;
|
||||
|
||||
TypeTracker() { this = MkTypeTracker(hasCall, attr) }
|
||||
|
||||
/** Gets the summary resulting from appending `step` to this type-tracking summary. */
|
||||
cached
|
||||
TypeTracker append(StepSummary step) {
|
||||
step = LevelStep() and result = this
|
||||
or
|
||||
step = CallStep() and result = MkTypeTracker(true, attr)
|
||||
or
|
||||
step = ReturnStep() and hasCall = false and result = this
|
||||
or
|
||||
step = LoadStep(attr) and result = MkTypeTracker(hasCall, "")
|
||||
or
|
||||
exists(string p | step = StoreStep(p) and attr = "" and result = MkTypeTracker(hasCall, p))
|
||||
}
|
||||
|
||||
/** Gets a textual representation of this summary. */
|
||||
string toString() {
|
||||
exists(string withCall, string withAttr |
|
||||
(if hasCall = true then withCall = "with" else withCall = "without") and
|
||||
(if attr != "" then withAttr = " with attribute " + attr else withAttr = "") and
|
||||
result = "type tracker " + withCall + " call steps" + withAttr
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if this is the starting point of type tracking.
|
||||
*/
|
||||
predicate start() { hasCall = false and attr = "" }
|
||||
|
||||
/**
|
||||
* Holds if this is the starting point of type tracking, and the value starts in the attribute named `attrName`.
|
||||
* The type tracking only ends after the attribute has been loaded.
|
||||
*/
|
||||
predicate startInAttr(AttributeName attrName) { hasCall = false and attr = attrName }
|
||||
|
||||
/**
|
||||
* Holds if this is the starting point of type tracking
|
||||
* when tracking a parameter into a call, but not out of it.
|
||||
*/
|
||||
predicate call() { hasCall = true and attr = "" }
|
||||
|
||||
/**
|
||||
* Holds if this is the end point of type tracking.
|
||||
*/
|
||||
predicate end() { attr = "" }
|
||||
|
||||
/**
|
||||
* INTERNAL. DO NOT USE.
|
||||
*
|
||||
* Holds if this type has been tracked into a call.
|
||||
*/
|
||||
boolean hasCall() { result = hasCall }
|
||||
|
||||
/**
|
||||
* INTERNAL. DO NOT USE.
|
||||
*
|
||||
* Gets the attribute associated with this type tracker.
|
||||
*/
|
||||
string getAttr() { result = attr }
|
||||
|
||||
/**
|
||||
* Gets a type tracker that starts where this one has left off to allow continued
|
||||
* tracking.
|
||||
*
|
||||
* This predicate is only defined if the type has not been tracked into an attribute.
|
||||
*/
|
||||
TypeTracker continue() { attr = "" and result = this }
|
||||
|
||||
/**
|
||||
* Gets the summary that corresponds to having taken a forwards
|
||||
* heap and/or inter-procedural step from `nodeFrom` to `nodeTo`.
|
||||
*/
|
||||
pragma[inline]
|
||||
TypeTracker step(Node nodeFrom, Node nodeTo) {
|
||||
exists(StepSummary summary |
|
||||
StepSummary::step(nodeFrom, nodeTo, summary) and
|
||||
result = this.append(summary)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the summary that corresponds to having taken a forwards
|
||||
* local, heap and/or inter-procedural step from `nodeFrom` to `nodeTo`.
|
||||
*
|
||||
* Unlike `TypeTracker::step`, this predicate exposes all edges
|
||||
* in the flow graph, and not just the edges between `Node`s.
|
||||
* It may therefore be less performant.
|
||||
*
|
||||
* Type tracking predicates using small steps typically take the following form:
|
||||
* ```ql
|
||||
* DataFlow::Node myType(DataFlow::TypeTracker t) {
|
||||
* t.start() and
|
||||
* result = < source of myType >
|
||||
* or
|
||||
* exists (DataFlow::TypeTracker t2 |
|
||||
* t = t2.smallstep(myType(t2), result)
|
||||
* )
|
||||
* }
|
||||
*
|
||||
* DataFlow::Node myType() {
|
||||
* result = myType(DataFlow::TypeTracker::end())
|
||||
* }
|
||||
* ```
|
||||
*/
|
||||
pragma[inline]
|
||||
TypeTracker smallstep(Node nodeFrom, Node nodeTo) {
|
||||
exists(StepSummary summary |
|
||||
StepSummary::smallstep(nodeFrom, nodeTo, summary) and
|
||||
result = this.append(summary)
|
||||
)
|
||||
or
|
||||
EssaFlow::essaFlowStep(nodeFrom, nodeTo) and
|
||||
result = this
|
||||
}
|
||||
}
|
||||
@@ -123,8 +123,18 @@ module Consistency {
|
||||
n.getEnclosingCallable() != call.getEnclosingCallable()
|
||||
}
|
||||
|
||||
// This predicate helps the compiler forget that in some languages
|
||||
// it is impossible for a result of `getPreUpdateNode` to be an
|
||||
// instance of `PostUpdateNode`.
|
||||
private Node getPre(PostUpdateNode n) {
|
||||
result = n.getPreUpdateNode()
|
||||
or
|
||||
none()
|
||||
}
|
||||
|
||||
query predicate postIsNotPre(PostUpdateNode n, string msg) {
|
||||
n.getPreUpdateNode() = n and msg = "PostUpdateNode should not equal its pre-update node."
|
||||
getPre(n) = n and
|
||||
msg = "PostUpdateNode should not equal its pre-update node."
|
||||
}
|
||||
|
||||
query predicate postHasUniquePre(PostUpdateNode n, string msg) {
|
||||
@@ -152,12 +162,6 @@ module Consistency {
|
||||
msg = "Origin of readStep is missing a PostUpdateNode."
|
||||
}
|
||||
|
||||
query predicate storeIsPostUpdate(Node n, string msg) {
|
||||
storeStep(_, _, n) and
|
||||
not n instanceof PostUpdateNode and
|
||||
msg = "Store targets should be PostUpdateNodes."
|
||||
}
|
||||
|
||||
query predicate argHasPostUpdate(ArgumentNode n, string msg) {
|
||||
not hasPost(n) and
|
||||
not isImmutableOrUnobservable(n) and
|
||||
|
||||
@@ -15,6 +15,32 @@ class DataFlowCfgNode extends ControlFlowNode {
|
||||
DataFlowCfgNode() { isExpressionNode(this) }
|
||||
}
|
||||
|
||||
/** A data flow node which should have an associated post-update node. */
|
||||
abstract class PreUpdateNode extends Node { }
|
||||
|
||||
/** An argument might have its value changed as a result of a call. */
|
||||
class ArgumentPreUpdateNode extends PreUpdateNode, ArgumentNode { }
|
||||
|
||||
/** An object might have its value changed after a store. */
|
||||
class StorePreUpdateNode extends PreUpdateNode, CfgNode {
|
||||
StorePreUpdateNode() {
|
||||
exists(Attribute a |
|
||||
node = a.getObject().getAFlowNode() and
|
||||
a.getCtx() instanceof Store
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/** A node marking the state change of an object after a read */
|
||||
class ReadPreUpdateNode extends PreUpdateNode, CfgNode {
|
||||
ReadPreUpdateNode() {
|
||||
exists(Attribute a |
|
||||
node = a.getObject().getAFlowNode() and
|
||||
a.getCtx() instanceof Load
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A node associated with an object after an operation that might have
|
||||
* changed its state.
|
||||
@@ -24,12 +50,21 @@ class DataFlowCfgNode extends ControlFlowNode {
|
||||
* an update to the field.
|
||||
*
|
||||
* Nodes corresponding to AST elements, for example `ExprNode`, usually refer
|
||||
* to the value before the update with the exception of `ObjectCreation`,
|
||||
* which represents the value after the constructor has run.
|
||||
* to the value before the update.
|
||||
*/
|
||||
abstract class PostUpdateNode extends Node {
|
||||
class PostUpdateNode extends Node, TPostUpdateNode {
|
||||
PreUpdateNode pre;
|
||||
|
||||
PostUpdateNode() { this = TPostUpdateNode(pre) }
|
||||
|
||||
/** Gets the node before the state update. */
|
||||
abstract Node getPreUpdateNode();
|
||||
Node getPreUpdateNode() { result = pre }
|
||||
|
||||
override string toString() { result = "[post] " + pre.toString() }
|
||||
|
||||
override Scope getScope() { result = pre.getScope() }
|
||||
|
||||
override Location getLocation() { result = pre.getLocation() }
|
||||
}
|
||||
|
||||
class DataFlowExpr = Expr;
|
||||
@@ -98,7 +133,17 @@ module EssaFlow {
|
||||
predicate simpleLocalFlowStep(Node nodeFrom, Node nodeTo) {
|
||||
not nodeFrom.(EssaNode).getVar() instanceof GlobalSsaVariable and
|
||||
not nodeTo.(EssaNode).getVar() instanceof GlobalSsaVariable and
|
||||
EssaFlow::essaFlowStep(nodeFrom, nodeTo)
|
||||
EssaFlow::essaFlowStep(update(nodeFrom), nodeTo)
|
||||
}
|
||||
|
||||
private Node update(Node node) {
|
||||
exists(PostUpdateNode pun |
|
||||
node = pun.getPreUpdateNode() and
|
||||
result = pun
|
||||
)
|
||||
or
|
||||
not exists(PostUpdateNode pun | node = pun.getPreUpdateNode()) and
|
||||
result = node
|
||||
}
|
||||
|
||||
// TODO: Make modules for these headings
|
||||
|
||||
@@ -2,8 +2,9 @@
|
||||
* Provides Python-specific definitions for use in the data flow library.
|
||||
*/
|
||||
|
||||
import python
|
||||
private import python
|
||||
private import DataFlowPrivate
|
||||
import experimental.dataflow.TypeTracker
|
||||
|
||||
/**
|
||||
* IPA type for data flow nodes.
|
||||
@@ -20,7 +21,9 @@ newtype TNode =
|
||||
/** A node corresponding to an SSA variable. */
|
||||
TEssaNode(EssaVariable var) or
|
||||
/** A node corresponding to a control flow node. */
|
||||
TCfgNode(DataFlowCfgNode node)
|
||||
TCfgNode(DataFlowCfgNode node) or
|
||||
/** A node representing the value of an object after a state change */
|
||||
TPostUpdateNode(PreUpdateNode pre)
|
||||
|
||||
/**
|
||||
* An element, viewed as a node in a data flow graph. Either an SSA variable
|
||||
@@ -67,6 +70,14 @@ class Node extends TNode {
|
||||
|
||||
/** Convenience method for casting to ExprNode and calling getNode and getNode again. */
|
||||
Expr asExpr() { none() }
|
||||
|
||||
/**
|
||||
* Gets a node that this node may flow to using one heap and/or interprocedural step.
|
||||
*
|
||||
* See `TypeTracker` for more details about how to use this.
|
||||
*/
|
||||
pragma[inline]
|
||||
Node track(TypeTracker t2, TypeTracker t) { t = t2.step(this, result) }
|
||||
}
|
||||
|
||||
class EssaNode extends Node, TEssaNode {
|
||||
|
||||
@@ -30,14 +30,24 @@ predicate localAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeT
|
||||
subscriptStep(nodeFrom, nodeTo)
|
||||
or
|
||||
stringManipulation(nodeFrom, nodeTo)
|
||||
or
|
||||
jsonStep(nodeFrom, nodeTo)
|
||||
or
|
||||
containerStep(nodeFrom, nodeTo)
|
||||
or
|
||||
copyStep(nodeFrom, nodeTo)
|
||||
or
|
||||
forStep(nodeFrom, nodeTo)
|
||||
or
|
||||
unpackingAssignmentStep(nodeFrom, nodeTo)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if taint can flow from `nodeFrom` to `nodeTo` with a step related to concatenation.
|
||||
*
|
||||
* Note that since we cannot easily distinguish interesting types (like string, list, tuple),
|
||||
* we consider any `+` operation to propagate taint. After consulting with the JS team, this
|
||||
* doesn't sound like it is a big problem in practice.
|
||||
* we consider any `+` operation to propagate taint. This is what is done in the JS libraries,
|
||||
* and isn't a big problem in practice.
|
||||
*/
|
||||
predicate concatStep(DataFlow::CfgNode nodeFrom, DataFlow::CfgNode nodeTo) {
|
||||
exists(BinaryExprNode add | add = nodeTo.getNode() |
|
||||
@@ -118,8 +128,101 @@ predicate stringManipulation(DataFlow::CfgNode nodeFrom, DataFlow::CfgNode nodeT
|
||||
)
|
||||
or
|
||||
// f-strings
|
||||
nodeTo.getNode().getNode().(Fstring).getAValue() = nodeFrom.getNode().getNode()
|
||||
nodeTo.asExpr().(Fstring).getAValue() = nodeFrom.asExpr()
|
||||
// TODO: Handle encode/decode from base64/quopri
|
||||
// TODO: Handle os.path.join
|
||||
// TODO: Handle functions in https://docs.python.org/3/library/binascii.html
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if taint can flow from `nodeFrom` to `nodeTo` with a step related to JSON encoding/decoding.
|
||||
*/
|
||||
predicate jsonStep(DataFlow::CfgNode nodeFrom, DataFlow::CfgNode nodeTo) {
|
||||
exists(CallNode call | call = nodeTo.getNode() |
|
||||
call.getFunction().(AttrNode).getObject(["load", "loads", "dumps"]).(NameNode).getId() = "json" and
|
||||
call.getArg(0) = nodeFrom.getNode()
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if taint can flow from `nodeFrom` to `nodeTo` with a step related to containers
|
||||
* (lists/sets/dictionaries): literals, constructor invocation, methods. Note that this
|
||||
* is currently very imprecise, as an example, since we model `dict.get`, we treat any
|
||||
* `<tainted object>.get(<arg>)` will be tainted, whether it's true or not.
|
||||
*/
|
||||
predicate containerStep(DataFlow::CfgNode nodeFrom, DataFlow::Node nodeTo) {
|
||||
// construction by literal
|
||||
// TODO: Not limiting the content argument here feels like a BIG hack, but we currently get nothing for free :|
|
||||
storeStep(nodeFrom, _, nodeTo)
|
||||
or
|
||||
// constructor call
|
||||
exists(CallNode call | call = nodeTo.asCfgNode() |
|
||||
call.getFunction().(NameNode).getId() in ["list", "set", "frozenset", "dict", "defaultdict",
|
||||
"tuple"] and
|
||||
call.getArg(0) = nodeFrom.getNode()
|
||||
)
|
||||
or
|
||||
// functions operating on collections
|
||||
exists(CallNode call | call = nodeTo.asCfgNode() |
|
||||
call.getFunction().(NameNode).getId() in ["sorted", "reversed", "iter", "next"] and
|
||||
call.getArg(0) = nodeFrom.getNode()
|
||||
)
|
||||
or
|
||||
// methods
|
||||
exists(CallNode call, string name | call = nodeTo.asCfgNode() |
|
||||
name in ["copy",
|
||||
// general
|
||||
"pop",
|
||||
// dict
|
||||
"values", "items", "get", "popitem"] and
|
||||
call.getFunction().(AttrNode).getObject(name) = nodeFrom.asCfgNode()
|
||||
)
|
||||
or
|
||||
// list.append, set.add
|
||||
exists(CallNode call, string name |
|
||||
name in ["append", "add"] and
|
||||
call.getFunction().(AttrNode).getObject(name) =
|
||||
nodeTo.(PostUpdateNode).getPreUpdateNode().asCfgNode() and
|
||||
call.getArg(0) = nodeFrom.getNode()
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if taint can flow from `nodeFrom` to `nodeTo` with a step related to copying.
|
||||
*/
|
||||
predicate copyStep(DataFlow::CfgNode nodeFrom, DataFlow::CfgNode nodeTo) {
|
||||
exists(CallNode call | call = nodeTo.getNode() |
|
||||
// Fully qualified: copy.copy, copy.deepcopy
|
||||
(
|
||||
call.getFunction().(NameNode).getId() in ["copy", "deepcopy"]
|
||||
or
|
||||
call.getFunction().(AttrNode).getObject(["copy", "deepcopy"]).(NameNode).getId() = "copy"
|
||||
) and
|
||||
call.getArg(0) = nodeFrom.getNode()
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if taint can flow from `nodeFrom` to `nodeTo` with a step related to `for`-iteration,
|
||||
* for example `for x in xs`, or `for x,y in points`.
|
||||
*/
|
||||
predicate forStep(DataFlow::CfgNode nodeFrom, DataFlow::EssaNode nodeTo) {
|
||||
exists(EssaNodeDefinition defn, For for |
|
||||
for.getTarget().getAChildNode*() = defn.getDefiningNode().getNode() and
|
||||
nodeTo.getVar() = defn and
|
||||
nodeFrom.asExpr() = for.getIter()
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if taint can flow from `nodeFrom` to `nodeTo` with a step related to iterable unpacking.
|
||||
* Only handles normal assignment (`x,y = calc_point()`), since `for x,y in points` is handled by `forStep`.
|
||||
*/
|
||||
predicate unpackingAssignmentStep(DataFlow::CfgNode nodeFrom, DataFlow::EssaNode nodeTo) {
|
||||
// `a, b = myiterable` or `head, *tail = myiterable` (only Python 3)
|
||||
exists(MultiAssignmentDefinition defn, Assign assign |
|
||||
assign.getATarget().contains(defn.getDefiningNode().getNode()) and
|
||||
nodeTo.getVar() = defn and
|
||||
nodeFrom.asExpr() = assign.getValue()
|
||||
)
|
||||
}
|
||||
|
||||
@@ -28,11 +28,11 @@
|
||||
* }
|
||||
*
|
||||
* override predicate hasActualResult(
|
||||
* Location location, string element, string tag, string valuesasas
|
||||
* Location location, string element, string tag, string value
|
||||
* ) {
|
||||
* exists(Expr e |
|
||||
* tag = "const" and // The tag for this test.
|
||||
* valuesasas = e.getValue() and // The expected value. Will only hold for constant expressions.
|
||||
* value = e.getValue() and // The expected value. Will only hold for constant expressions.
|
||||
* location = e.getLocation() and // The location of the result to be reported.
|
||||
* element = e.toString() // The display text for the result.
|
||||
* )
|
||||
|
||||
@@ -1 +0,0 @@
|
||||
experimental/CWE-074/TemplateInjection.ql
|
||||
@@ -1 +0,0 @@
|
||||
semmle-extractor-options: --max-import-depth=3 -p ../../query-tests/Security/lib/
|
||||
@@ -1 +0,0 @@
|
||||
experimental/CWE-091/Xslt.ql
|
||||
@@ -1 +0,0 @@
|
||||
semmle-extractor-options: -p ../../query-tests/Security/lib/ --max-import-depth=3
|
||||
@@ -1 +0,0 @@
|
||||
semmle-extractor-options: --max-import-depth=3 -p ../../query-tests/Security/lib/
|
||||
@@ -1 +0,0 @@
|
||||
experimental/CWE-643/xpath.ql
|
||||
@@ -38,3 +38,4 @@
|
||||
| test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() |
|
||||
| test.py:7:5:7:20 | GSSA Variable a | test.py:7:5:7:20 | GSSA Variable a |
|
||||
| test.py:7:19:7:19 | ControlFlowNode for a | test.py:7:19:7:19 | ControlFlowNode for a |
|
||||
| test.py:7:19:7:19 | [post] ControlFlowNode for a | test.py:7:19:7:19 | [post] ControlFlowNode for a |
|
||||
|
||||
@@ -23,3 +23,4 @@
|
||||
| test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() |
|
||||
| test.py:7:5:7:20 | GSSA Variable a |
|
||||
| test.py:7:19:7:19 | ControlFlowNode for a |
|
||||
| test.py:7:19:7:19 | [post] ControlFlowNode for a |
|
||||
|
||||
@@ -23,3 +23,4 @@
|
||||
| test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() |
|
||||
| test.py:7:5:7:20 | GSSA Variable a |
|
||||
| test.py:7:19:7:19 | ControlFlowNode for a |
|
||||
| test.py:7:19:7:19 | [post] ControlFlowNode for a |
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
private import python
|
||||
import experimental.dataflow.DataFlow
|
||||
|
||||
/**
|
||||
|
||||
@@ -127,27 +127,4 @@ postHasUniquePre
|
||||
uniquePostUpdate
|
||||
postIsInSameCallable
|
||||
reverseRead
|
||||
storeIsPostUpdate
|
||||
| test.py:172:9:172:16 | ControlFlowNode for List | Store targets should be PostUpdateNodes. |
|
||||
| test.py:173:9:173:24 | ControlFlowNode for Dict | Store targets should be PostUpdateNodes. |
|
||||
| test.py:212:11:212:18 | ControlFlowNode for List | Store targets should be PostUpdateNodes. |
|
||||
argHasPostUpdate
|
||||
| test.py:25:10:25:10 | ControlFlowNode for t | ArgumentNode is missing PostUpdateNode. |
|
||||
| test.py:29:10:29:10 | ControlFlowNode for t | ArgumentNode is missing PostUpdateNode. |
|
||||
| test.py:48:19:48:21 | ControlFlowNode for arg | ArgumentNode is missing PostUpdateNode. |
|
||||
| test.py:51:10:51:12 | ControlFlowNode for arg | ArgumentNode is missing PostUpdateNode. |
|
||||
| test.py:55:14:55:16 | ControlFlowNode for arg | ArgumentNode is missing PostUpdateNode. |
|
||||
| test.py:59:11:59:11 | ControlFlowNode for t | ArgumentNode is missing PostUpdateNode. |
|
||||
| test.py:67:11:67:14 | ControlFlowNode for cond | ArgumentNode is missing PostUpdateNode. |
|
||||
| test.py:67:17:67:17 | ControlFlowNode for t | ArgumentNode is missing PostUpdateNode. |
|
||||
| test.py:74:11:74:14 | ControlFlowNode for cond | ArgumentNode is missing PostUpdateNode. |
|
||||
| test.py:74:17:74:17 | ControlFlowNode for t | ArgumentNode is missing PostUpdateNode. |
|
||||
| test.py:81:13:81:13 | ControlFlowNode for t | ArgumentNode is missing PostUpdateNode. |
|
||||
| test.py:86:13:86:13 | ControlFlowNode for t | ArgumentNode is missing PostUpdateNode. |
|
||||
| test.py:120:13:120:13 | ControlFlowNode for t | ArgumentNode is missing PostUpdateNode. |
|
||||
| test.py:121:12:121:12 | ControlFlowNode for t | ArgumentNode is missing PostUpdateNode. |
|
||||
| test.py:125:13:125:29 | ControlFlowNode for TAINT_FROM_ARG() | ArgumentNode is missing PostUpdateNode. |
|
||||
| test.py:178:15:178:15 | ControlFlowNode for l | ArgumentNode is missing PostUpdateNode. |
|
||||
| test.py:179:15:179:15 | ControlFlowNode for d | ArgumentNode is missing PostUpdateNode. |
|
||||
| test.py:200:19:200:19 | ControlFlowNode for t | ArgumentNode is missing PostUpdateNode. |
|
||||
| test.py:200:22:200:24 | ControlFlowNode for str | ArgumentNode is missing PostUpdateNode. |
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
import python
|
||||
import experimental.dataflow.DataFlow
|
||||
|
||||
/**
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
import python
|
||||
import experimental.dataflow.DataFlow
|
||||
|
||||
/**
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
import python
|
||||
import experimental.dataflow.DataFlow
|
||||
|
||||
/**
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
import python
|
||||
import experimental.dataflow.DataFlow
|
||||
|
||||
/**
|
||||
|
||||
@@ -93,7 +93,7 @@ class With_str:
|
||||
|
||||
def __str__(self):
|
||||
SINK1(self) # Flow not found
|
||||
OK() # Call not found # Call not found
|
||||
OK() # Call not found
|
||||
return "Awesome"
|
||||
|
||||
|
||||
@@ -108,7 +108,7 @@ class With_bytes:
|
||||
|
||||
def __bytes__(self):
|
||||
SINK1(self) # Flow not found
|
||||
OK() # Call not found # Call not found
|
||||
OK() # Call not found
|
||||
return b"Awesome"
|
||||
|
||||
|
||||
@@ -124,7 +124,7 @@ class With_format:
|
||||
def __format__(self, format_spec):
|
||||
SINK2(format_spec) # Flow not found
|
||||
SINK1(self) # Flow not found
|
||||
OK() # Call not found # Call not found
|
||||
OK() # Call not found
|
||||
return "Awesome"
|
||||
|
||||
|
||||
@@ -151,7 +151,7 @@ class With_lt:
|
||||
def __lt__(self, other):
|
||||
SINK2(other) # Flow not found
|
||||
SINK1(self) # Flow not found
|
||||
OK() # Call not found # Call not found
|
||||
OK() # Call not found
|
||||
return ""
|
||||
|
||||
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
* @kind path-problem
|
||||
*/
|
||||
|
||||
import python
|
||||
import experimental.dataflow.testConfig
|
||||
import DataFlow::PathGraph
|
||||
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
* hope to remove the false positive.
|
||||
*/
|
||||
|
||||
import python
|
||||
import experimental.dataflow.testConfig
|
||||
|
||||
from DataFlow::Node source, DataFlow::Node sink
|
||||
|
||||
@@ -6,7 +6,8 @@ class TestTaintTrackingConfiguration extends TaintTracking::Configuration {
|
||||
TestTaintTrackingConfiguration() { this = "TestTaintTrackingConfiguration" }
|
||||
|
||||
override predicate isSource(DataFlow::Node source) {
|
||||
source.(DataFlow::CfgNode).getNode().(NameNode).getId() in ["TAINTED_STRING", "TAINTED_BYTES"]
|
||||
source.(DataFlow::CfgNode).getNode().(NameNode).getId() in ["TAINTED_STRING", "TAINTED_BYTES",
|
||||
"TAINTED_LIST", "TAINTED_DICT"]
|
||||
}
|
||||
|
||||
override predicate isSink(DataFlow::Node sink) {
|
||||
@@ -44,7 +45,8 @@ private string repr(Expr e) {
|
||||
|
||||
query predicate test_taint(string arg_location, string test_res, string function_name, string repr) {
|
||||
exists(Call call, Expr arg, boolean expected_taint, boolean has_taint |
|
||||
call.getLocation().getFile().getShortName() = "test.py" and
|
||||
// only consider files that are extracted as part of the test
|
||||
exists(call.getLocation().getFile().getRelativePath()) and
|
||||
(
|
||||
call.getFunc().(Name).getId() = "ensure_tainted" and
|
||||
expected_taint = true
|
||||
|
||||
@@ -0,0 +1,22 @@
|
||||
| test_collections.py:16 | ok | test_access | tainted_list.copy() |
|
||||
| test_collections.py:24 | ok | list_clear | tainted_list |
|
||||
| test_collections.py:27 | fail | list_clear | tainted_list |
|
||||
| test_string.py:17 | ok | str_methods | ts.casefold() |
|
||||
| test_string.py:19 | ok | str_methods | ts.format_map(..) |
|
||||
| test_string.py:20 | ok | str_methods | "{unsafe}".format_map(..) |
|
||||
| test_string.py:31 | fail | binary_decode_encode | base64.a85encode(..) |
|
||||
| test_string.py:32 | fail | binary_decode_encode | base64.a85decode(..) |
|
||||
| test_string.py:35 | fail | binary_decode_encode | base64.b85encode(..) |
|
||||
| test_string.py:36 | fail | binary_decode_encode | base64.b85decode(..) |
|
||||
| test_string.py:39 | fail | binary_decode_encode | base64.encodebytes(..) |
|
||||
| test_string.py:40 | fail | binary_decode_encode | base64.decodebytes(..) |
|
||||
| test_string.py:48 | ok | f_strings | Fstring |
|
||||
| test_unpacking.py:18 | ok | extended_unpacking | first |
|
||||
| test_unpacking.py:18 | ok | extended_unpacking | last |
|
||||
| test_unpacking.py:18 | ok | extended_unpacking | rest |
|
||||
| test_unpacking.py:23 | ok | also_allowed | a |
|
||||
| test_unpacking.py:31 | ok | also_allowed | b |
|
||||
| test_unpacking.py:31 | ok | also_allowed | c |
|
||||
| test_unpacking.py:39 | ok | nested | x |
|
||||
| test_unpacking.py:39 | ok | nested | xs |
|
||||
| test_unpacking.py:39 | ok | nested | ys |
|
||||
@@ -0,0 +1,32 @@
|
||||
# Add taintlib to PATH so it can be imported during runtime without any hassle
|
||||
import sys; import os; sys.path.append(os.path.dirname(os.path.dirname((__file__))))
|
||||
from taintlib import *
|
||||
|
||||
# This has no runtime impact, but allows autocomplete to work
|
||||
from typing import TYPE_CHECKING
|
||||
if TYPE_CHECKING:
|
||||
from ..taintlib import *
|
||||
|
||||
# Actual tests
|
||||
|
||||
def test_access():
|
||||
tainted_list = TAINTED_LIST
|
||||
|
||||
ensure_tainted(
|
||||
tainted_list.copy(),
|
||||
)
|
||||
|
||||
|
||||
def list_clear():
|
||||
tainted_string = TAINTED_STRING
|
||||
tainted_list = [tainted_string]
|
||||
|
||||
ensure_tainted(tainted_list)
|
||||
|
||||
tainted_list.clear()
|
||||
ensure_not_tainted(tainted_list)
|
||||
|
||||
# Make tests runable
|
||||
|
||||
test_access()
|
||||
list_clear()
|
||||
@@ -1,20 +1,11 @@
|
||||
# Python 3 specific taint tracking for string
|
||||
|
||||
TAINTED_STRING = "TAINTED_STRING"
|
||||
TAINTED_BYTES = b"TAINTED_BYTES"
|
||||
|
||||
|
||||
def ensure_tainted(*args):
|
||||
print("- ensure_tainted")
|
||||
for i, arg in enumerate(args):
|
||||
print("arg {}: {!r}".format(i, arg))
|
||||
|
||||
|
||||
def ensure_not_tainted(*args):
|
||||
print("- ensure_not_tainted")
|
||||
for i, arg in enumerate(args):
|
||||
print("arg {}: {!r}".format(i, arg))
|
||||
# Add taintlib to PATH so it can be imported during runtime without any hassle
|
||||
import sys; import os; sys.path.append(os.path.dirname(os.path.dirname((__file__))))
|
||||
from taintlib import *
|
||||
|
||||
# This has no runtime impact, but allows autocomplete to work
|
||||
from typing import TYPE_CHECKING
|
||||
if TYPE_CHECKING:
|
||||
from ..taintlib import *
|
||||
|
||||
# Actual tests
|
||||
|
||||
@@ -0,0 +1,46 @@
|
||||
# Add taintlib to PATH so it can be imported during runtime without any hassle
|
||||
import sys; import os; sys.path.append(os.path.dirname(os.path.dirname((__file__))))
|
||||
from taintlib import *
|
||||
|
||||
# This has no runtime impact, but allows autocomplete to work
|
||||
from typing import TYPE_CHECKING
|
||||
if TYPE_CHECKING:
|
||||
from ..taintlib import *
|
||||
|
||||
# Actual tests
|
||||
|
||||
# Extended Iterable Unpacking -- PEP 3132
|
||||
# https://www.python.org/dev/peps/pep-3132/
|
||||
|
||||
|
||||
def extended_unpacking():
|
||||
first, *rest, last = TAINTED_LIST
|
||||
ensure_tainted(first, rest, last)
|
||||
|
||||
|
||||
def also_allowed():
|
||||
*a, = TAINTED_LIST
|
||||
ensure_tainted(a)
|
||||
|
||||
# for b, *c in [(1, 2, 3), (4, 5, 6, 7)]:
|
||||
# print(c)
|
||||
# i=0; c=[2,3]
|
||||
# i=1; c=[5,6,7]
|
||||
|
||||
for b, *c in [TAINTED_LIST, TAINTED_LIST]:
|
||||
ensure_tainted(b, c)
|
||||
|
||||
|
||||
def nested():
|
||||
l = TAINTED_LIST
|
||||
ll = [l,l]
|
||||
|
||||
[[x, *xs], ys] = ll
|
||||
ensure_tainted(x, xs, ys)
|
||||
|
||||
|
||||
# Make tests runable
|
||||
|
||||
extended_unpacking()
|
||||
also_allowed()
|
||||
nested()
|
||||
@@ -0,0 +1,172 @@
|
||||
| test_collections.py:23 | ok | test_construction | tainted_string |
|
||||
| test_collections.py:24 | ok | test_construction | tainted_list |
|
||||
| test_collections.py:25 | ok | test_construction | tainted_tuple |
|
||||
| test_collections.py:26 | ok | test_construction | tainted_set |
|
||||
| test_collections.py:27 | ok | test_construction | tainted_dict |
|
||||
| test_collections.py:31 | ok | test_construction | list(..) |
|
||||
| test_collections.py:32 | ok | test_construction | list(..) |
|
||||
| test_collections.py:33 | ok | test_construction | list(..) |
|
||||
| test_collections.py:34 | ok | test_construction | list(..) |
|
||||
| test_collections.py:35 | ok | test_construction | list(..) |
|
||||
| test_collections.py:37 | ok | test_construction | tuple(..) |
|
||||
| test_collections.py:38 | ok | test_construction | set(..) |
|
||||
| test_collections.py:39 | ok | test_construction | frozenset(..) |
|
||||
| test_collections.py:47 | ok | test_access | tainted_list[0] |
|
||||
| test_collections.py:48 | ok | test_access | tainted_list[x] |
|
||||
| test_collections.py:49 | ok | test_access | tainted_list[Slice] |
|
||||
| test_collections.py:51 | ok | test_access | sorted(..) |
|
||||
| test_collections.py:52 | ok | test_access | reversed(..) |
|
||||
| test_collections.py:53 | ok | test_access | iter(..) |
|
||||
| test_collections.py:54 | ok | test_access | next(..) |
|
||||
| test_collections.py:58 | ok | test_access | a |
|
||||
| test_collections.py:58 | ok | test_access | b |
|
||||
| test_collections.py:58 | ok | test_access | c |
|
||||
| test_collections.py:61 | ok | test_access | h |
|
||||
| test_collections.py:63 | ok | test_access | i |
|
||||
| test_collections.py:70 | ok | test_dict_access | tainted_dict["name"] |
|
||||
| test_collections.py:71 | ok | test_dict_access | tainted_dict.get(..) |
|
||||
| test_collections.py:72 | ok | test_dict_access | tainted_dict[x] |
|
||||
| test_collections.py:73 | ok | test_dict_access | tainted_dict.copy() |
|
||||
| test_collections.py:77 | ok | test_dict_access | v |
|
||||
| test_collections.py:79 | ok | test_dict_access | v |
|
||||
| test_collections.py:87 | fail | test_named_tuple | point[0] |
|
||||
| test_collections.py:88 | fail | test_named_tuple | point.x |
|
||||
| test_collections.py:92 | ok | test_named_tuple | point[1] |
|
||||
| test_collections.py:93 | ok | test_named_tuple | point.y |
|
||||
| test_collections.py:97 | fail | test_named_tuple | a |
|
||||
| test_collections.py:98 | ok | test_named_tuple | b |
|
||||
| test_collections.py:106 | fail | test_defaultdict | tainted_default_dict["name"] |
|
||||
| test_collections.py:107 | fail | test_defaultdict | tainted_default_dict.get(..) |
|
||||
| test_collections.py:108 | fail | test_defaultdict | tainted_default_dict[x] |
|
||||
| test_collections.py:109 | fail | test_defaultdict | tainted_default_dict.copy() |
|
||||
| test_collections.py:112 | fail | test_defaultdict | v |
|
||||
| test_collections.py:114 | fail | test_defaultdict | v |
|
||||
| test_collections.py:121 | ok | test_copy_1 | copy(..) |
|
||||
| test_collections.py:122 | ok | test_copy_1 | deepcopy(..) |
|
||||
| test_collections.py:130 | ok | test_copy_2 | copy.copy(..) |
|
||||
| test_collections.py:131 | ok | test_copy_2 | copy.deepcopy(..) |
|
||||
| test_collections.py:139 | ok | list_index_assign | my_list |
|
||||
| test_collections.py:142 | fail | list_index_assign | my_list |
|
||||
| test_collections.py:149 | ok | list_index_aug_assign | my_list |
|
||||
| test_collections.py:152 | fail | list_index_aug_assign | my_list |
|
||||
| test_collections.py:159 | ok | list_append | my_list |
|
||||
| test_collections.py:162 | fail | list_append | my_list |
|
||||
| test_collections.py:169 | ok | list_extend | my_list |
|
||||
| test_collections.py:172 | fail | list_extend | my_list |
|
||||
| test_collections.py:179 | ok | dict_update_dict | my_dict |
|
||||
| test_collections.py:182 | fail | dict_update_dict | my_dict |
|
||||
| test_collections.py:189 | ok | dict_update_kv_list | my_dict |
|
||||
| test_collections.py:192 | fail | dict_update_kv_list | my_dict |
|
||||
| test_collections.py:198 | ok | dict_update_kv_arg | my_dict |
|
||||
| test_collections.py:201 | fail | dict_update_kv_arg | my_dict |
|
||||
| test_collections.py:208 | ok | dict_manual_update | my_dict |
|
||||
| test_collections.py:212 | fail | dict_manual_update | my_dict |
|
||||
| test_collections.py:220 | fail | dict_merge | merged |
|
||||
| test_collections.py:227 | ok | set_add | my_set |
|
||||
| test_collections.py:230 | fail | set_add | my_set |
|
||||
| test_json.py:26 | ok | test | json.dumps(..) |
|
||||
| test_json.py:27 | ok | test | json.loads(..) |
|
||||
| test_json.py:34 | fail | test | tainted_filelike |
|
||||
| test_json.py:35 | fail | test | json.load(..) |
|
||||
| test_json.py:48 | fail | non_syntacical | dumps(..) |
|
||||
| test_json.py:49 | fail | non_syntacical | dumps_alias(..) |
|
||||
| test_json.py:50 | fail | non_syntacical | loads(..) |
|
||||
| test_json.py:57 | fail | non_syntacical | tainted_filelike |
|
||||
| test_json.py:58 | fail | non_syntacical | load(..) |
|
||||
| test_string.py:25 | ok | str_operations | ts |
|
||||
| test_string.py:26 | ok | str_operations | BinaryExpr |
|
||||
| test_string.py:27 | ok | str_operations | BinaryExpr |
|
||||
| test_string.py:28 | ok | str_operations | BinaryExpr |
|
||||
| test_string.py:29 | ok | str_operations | ts[Slice] |
|
||||
| test_string.py:30 | ok | str_operations | ts[Slice] |
|
||||
| test_string.py:31 | ok | str_operations | ts[Slice] |
|
||||
| test_string.py:32 | ok | str_operations | ts[0] |
|
||||
| test_string.py:33 | ok | str_operations | str(..) |
|
||||
| test_string.py:34 | ok | str_operations | bytes(..) |
|
||||
| test_string.py:35 | ok | str_operations | unicode(..) |
|
||||
| test_string.py:39 | ok | str_operations | aug_assignment |
|
||||
| test_string.py:41 | ok | str_operations | aug_assignment |
|
||||
| test_string.py:49 | ok | str_methods | ts.capitalize() |
|
||||
| test_string.py:50 | ok | str_methods | ts.center(..) |
|
||||
| test_string.py:51 | ok | str_methods | ts.expandtabs() |
|
||||
| test_string.py:53 | ok | str_methods | ts.format() |
|
||||
| test_string.py:54 | ok | str_methods | "{}".format(..) |
|
||||
| test_string.py:55 | ok | str_methods | "{unsafe}".format(..) |
|
||||
| test_string.py:57 | ok | str_methods | ts.join(..) |
|
||||
| test_string.py:58 | ok | str_methods | "".join(..) |
|
||||
| test_string.py:60 | ok | str_methods | ts.ljust(..) |
|
||||
| test_string.py:61 | ok | str_methods | ts.lstrip() |
|
||||
| test_string.py:62 | ok | str_methods | ts.lower() |
|
||||
| test_string.py:64 | ok | str_methods | ts.replace(..) |
|
||||
| test_string.py:65 | ok | str_methods | "safe".replace(..) |
|
||||
| test_string.py:67 | ok | str_methods | ts.rjust(..) |
|
||||
| test_string.py:68 | ok | str_methods | ts.rstrip() |
|
||||
| test_string.py:69 | ok | str_methods | ts.strip() |
|
||||
| test_string.py:70 | ok | str_methods | ts.swapcase() |
|
||||
| test_string.py:71 | ok | str_methods | ts.title() |
|
||||
| test_string.py:72 | ok | str_methods | ts.upper() |
|
||||
| test_string.py:73 | ok | str_methods | ts.zfill(..) |
|
||||
| test_string.py:75 | ok | str_methods | ts.encode(..) |
|
||||
| test_string.py:76 | ok | str_methods | ts.encode(..).decode(..) |
|
||||
| test_string.py:78 | ok | str_methods | tb.decode(..) |
|
||||
| test_string.py:79 | ok | str_methods | tb.decode(..).encode(..) |
|
||||
| test_string.py:82 | ok | str_methods | ts.partition(..) |
|
||||
| test_string.py:83 | ok | str_methods | ts.rpartition(..) |
|
||||
| test_string.py:84 | ok | str_methods | ts.rsplit(..) |
|
||||
| test_string.py:85 | ok | str_methods | ts.split(..) |
|
||||
| test_string.py:86 | ok | str_methods | ts.splitlines() |
|
||||
| test_string.py:91 | ok | str_methods | "safe".replace(..) |
|
||||
| test_string.py:93 | fail | str_methods | ts.join(..) |
|
||||
| test_string.py:94 | fail | str_methods | ts.join(..) |
|
||||
| test_string.py:104 | fail | non_syntactic | meth() |
|
||||
| test_string.py:105 | fail | non_syntactic | _str(..) |
|
||||
| test_string.py:114 | ok | percent_fmt | BinaryExpr |
|
||||
| test_string.py:115 | ok | percent_fmt | BinaryExpr |
|
||||
| test_string.py:116 | ok | percent_fmt | BinaryExpr |
|
||||
| test_string.py:126 | fail | binary_decode_encode | base64.b64encode(..) |
|
||||
| test_string.py:127 | fail | binary_decode_encode | base64.b64decode(..) |
|
||||
| test_string.py:129 | fail | binary_decode_encode | base64.standard_b64encode(..) |
|
||||
| test_string.py:130 | fail | binary_decode_encode | base64.standard_b64decode(..) |
|
||||
| test_string.py:132 | fail | binary_decode_encode | base64.urlsafe_b64encode(..) |
|
||||
| test_string.py:133 | fail | binary_decode_encode | base64.urlsafe_b64decode(..) |
|
||||
| test_string.py:135 | fail | binary_decode_encode | base64.b32encode(..) |
|
||||
| test_string.py:136 | fail | binary_decode_encode | base64.b32decode(..) |
|
||||
| test_string.py:138 | fail | binary_decode_encode | base64.b16encode(..) |
|
||||
| test_string.py:139 | fail | binary_decode_encode | base64.b16decode(..) |
|
||||
| test_string.py:142 | fail | binary_decode_encode | base64.encodestring(..) |
|
||||
| test_string.py:143 | fail | binary_decode_encode | base64.decodestring(..) |
|
||||
| test_string.py:148 | fail | binary_decode_encode | quopri.encodestring(..) |
|
||||
| test_string.py:149 | fail | binary_decode_encode | quopri.decodestring(..) |
|
||||
| test_unpacking.py:16 | ok | unpacking | a |
|
||||
| test_unpacking.py:16 | ok | unpacking | b |
|
||||
| test_unpacking.py:16 | ok | unpacking | c |
|
||||
| test_unpacking.py:22 | ok | unpacking_to_list | a |
|
||||
| test_unpacking.py:22 | ok | unpacking_to_list | b |
|
||||
| test_unpacking.py:22 | ok | unpacking_to_list | c |
|
||||
| test_unpacking.py:31 | ok | nested | a1 |
|
||||
| test_unpacking.py:31 | ok | nested | a2 |
|
||||
| test_unpacking.py:31 | ok | nested | a3 |
|
||||
| test_unpacking.py:31 | ok | nested | b |
|
||||
| test_unpacking.py:31 | ok | nested | c |
|
||||
| test_unpacking.py:35 | ok | nested | a1 |
|
||||
| test_unpacking.py:35 | ok | nested | a2 |
|
||||
| test_unpacking.py:35 | ok | nested | a3 |
|
||||
| test_unpacking.py:35 | ok | nested | b |
|
||||
| test_unpacking.py:35 | ok | nested | c |
|
||||
| test_unpacking.py:39 | ok | nested | a1 |
|
||||
| test_unpacking.py:39 | ok | nested | a2 |
|
||||
| test_unpacking.py:39 | ok | nested | a3 |
|
||||
| test_unpacking.py:39 | ok | nested | b |
|
||||
| test_unpacking.py:39 | ok | nested | c |
|
||||
| test_unpacking.py:46 | ok | unpack_from_set | a |
|
||||
| test_unpacking.py:46 | ok | unpack_from_set | b |
|
||||
| test_unpacking.py:46 | ok | unpack_from_set | c |
|
||||
| test_unpacking.py:55 | ok | contrived_1 | a |
|
||||
| test_unpacking.py:55 | ok | contrived_1 | b |
|
||||
| test_unpacking.py:55 | ok | contrived_1 | c |
|
||||
| test_unpacking.py:56 | fail | contrived_1 | d |
|
||||
| test_unpacking.py:56 | fail | contrived_1 | e |
|
||||
| test_unpacking.py:56 | fail | contrived_1 | f |
|
||||
| test_unpacking.py:65 | ok | contrived_2 | a |
|
||||
| test_unpacking.py:65 | ok | contrived_2 | b |
|
||||
| test_unpacking.py:65 | ok | contrived_2 | c |
|
||||
@@ -0,0 +1,254 @@
|
||||
# Add taintlib to PATH so it can be imported during runtime without any hassle
|
||||
import sys; import os; sys.path.append(os.path.dirname(os.path.dirname((__file__))))
|
||||
from taintlib import *
|
||||
|
||||
# This has no runtime impact, but allows autocomplete to work
|
||||
from typing import TYPE_CHECKING
|
||||
if TYPE_CHECKING:
|
||||
from ..taintlib import *
|
||||
|
||||
|
||||
# Actual tests
|
||||
|
||||
from collections import defaultdict, namedtuple
|
||||
|
||||
def test_construction():
|
||||
tainted_string = TAINTED_STRING
|
||||
tainted_list = [tainted_string]
|
||||
tainted_tuple = (tainted_string,)
|
||||
tainted_set = {tainted_string}
|
||||
tainted_dict = {'key': tainted_string}
|
||||
|
||||
ensure_tainted(
|
||||
tainted_string,
|
||||
tainted_list,
|
||||
tainted_tuple,
|
||||
tainted_set,
|
||||
tainted_dict,
|
||||
)
|
||||
|
||||
ensure_tainted(
|
||||
list(tainted_list),
|
||||
list(tainted_tuple),
|
||||
list(tainted_set),
|
||||
list(tainted_dict.values()),
|
||||
list(tainted_dict.items()),
|
||||
|
||||
tuple(tainted_list),
|
||||
set(tainted_list),
|
||||
frozenset(tainted_list),
|
||||
)
|
||||
|
||||
|
||||
def test_access(x, y, z):
|
||||
tainted_list = TAINTED_LIST
|
||||
|
||||
ensure_tainted(
|
||||
tainted_list[0],
|
||||
tainted_list[x],
|
||||
tainted_list[y:z],
|
||||
|
||||
sorted(tainted_list),
|
||||
reversed(tainted_list),
|
||||
iter(tainted_list),
|
||||
next(iter(tainted_list)),
|
||||
)
|
||||
|
||||
a, b, c = tainted_list[0:3]
|
||||
ensure_tainted(a, b, c)
|
||||
|
||||
for h in tainted_list:
|
||||
ensure_tainted(h)
|
||||
for i in reversed(tainted_list):
|
||||
ensure_tainted(i)
|
||||
|
||||
|
||||
def test_dict_access(x):
|
||||
tainted_dict = TAINTED_DICT
|
||||
|
||||
ensure_tainted(
|
||||
tainted_dict["name"],
|
||||
tainted_dict.get("name"),
|
||||
tainted_dict[x],
|
||||
tainted_dict.copy(),
|
||||
)
|
||||
|
||||
for v in tainted_dict.values():
|
||||
ensure_tainted(v)
|
||||
for k, v in tainted_dict.items():
|
||||
ensure_tainted(v)
|
||||
|
||||
|
||||
def test_named_tuple(): # TODO: namedtuple currently not handled
|
||||
Point = namedtuple('Point', ['x', 'y'])
|
||||
point = Point(TAINTED_STRING, 'safe')
|
||||
|
||||
ensure_tainted(
|
||||
point[0],
|
||||
point.x,
|
||||
)
|
||||
|
||||
ensure_not_tainted(
|
||||
point[1],
|
||||
point.y,
|
||||
)
|
||||
|
||||
a, b = point
|
||||
ensure_tainted(a)
|
||||
ensure_not_tainted(b)
|
||||
|
||||
|
||||
def test_defaultdict(key, x): # TODO: defaultdict currently not handled
|
||||
tainted_default_dict = defaultdict(str)
|
||||
tainted_default_dict[key] += TAINTED_STRING
|
||||
|
||||
ensure_tainted(
|
||||
tainted_default_dict["name"],
|
||||
tainted_default_dict.get("name"),
|
||||
tainted_default_dict[x],
|
||||
tainted_default_dict.copy(),
|
||||
)
|
||||
for v in tainted_default_dict.values():
|
||||
ensure_tainted(v)
|
||||
for k, v in tainted_default_dict.items():
|
||||
ensure_tainted(v)
|
||||
|
||||
|
||||
def test_copy_1():
|
||||
from copy import copy, deepcopy
|
||||
|
||||
ensure_tainted(
|
||||
copy(TAINTED_LIST),
|
||||
deepcopy(TAINTED_LIST),
|
||||
)
|
||||
|
||||
|
||||
def test_copy_2():
|
||||
import copy
|
||||
|
||||
ensure_tainted(
|
||||
copy.copy(TAINTED_LIST),
|
||||
copy.deepcopy(TAINTED_LIST),
|
||||
)
|
||||
|
||||
|
||||
def list_index_assign():
|
||||
tainted_string = TAINTED_STRING
|
||||
my_list = ["safe"]
|
||||
|
||||
ensure_not_tainted(my_list)
|
||||
|
||||
my_list[0] = tainted_string
|
||||
ensure_tainted(my_list)
|
||||
|
||||
|
||||
def list_index_aug_assign():
|
||||
tainted_string = TAINTED_STRING
|
||||
my_list = ["safe"]
|
||||
|
||||
ensure_not_tainted(my_list)
|
||||
|
||||
my_list[0] += tainted_string
|
||||
ensure_tainted(my_list)
|
||||
|
||||
|
||||
def list_append():
|
||||
tainted_string = TAINTED_STRING
|
||||
my_list = ["safe"]
|
||||
|
||||
ensure_not_tainted(my_list)
|
||||
|
||||
my_list.append(tainted_string)
|
||||
ensure_tainted(my_list)
|
||||
|
||||
|
||||
def list_extend():
|
||||
my_list = ["safe"]
|
||||
tainted_list = [TAINTED_STRING]
|
||||
|
||||
ensure_not_tainted(my_list)
|
||||
|
||||
my_list.extend(tainted_list)
|
||||
ensure_tainted(my_list)
|
||||
|
||||
|
||||
def dict_update_dict():
|
||||
my_dict = {"key1": "safe"}
|
||||
tainted_dict = {"key2": TAINTED_STRING}
|
||||
|
||||
ensure_not_tainted(my_dict)
|
||||
|
||||
my_dict.update(tainted_dict)
|
||||
ensure_tainted(my_dict)
|
||||
|
||||
|
||||
def dict_update_kv_list():
|
||||
my_dict = {"key1": "safe"}
|
||||
tainted_kv_list = [("key2", TAINTED_STRING)]
|
||||
|
||||
ensure_not_tainted(my_dict)
|
||||
|
||||
my_dict.update(tainted_kv_list)
|
||||
ensure_tainted(my_dict)
|
||||
|
||||
|
||||
def dict_update_kv_arg():
|
||||
my_dict = {"key1": "safe"}
|
||||
|
||||
ensure_not_tainted(my_dict)
|
||||
|
||||
my_dict.update(key2=TAINTED_STRING)
|
||||
ensure_tainted(my_dict)
|
||||
|
||||
|
||||
def dict_manual_update():
|
||||
my_dict = {"key1": "safe"}
|
||||
tainted_dict = {"key2": TAINTED_STRING}
|
||||
|
||||
ensure_not_tainted(my_dict)
|
||||
|
||||
for k in tainted_dict:
|
||||
my_dict[k] = tainted_dict[k]
|
||||
ensure_tainted(my_dict)
|
||||
|
||||
|
||||
def dict_merge():
|
||||
my_dict = {"key1": "safe"}
|
||||
tainted_dict = {"key2": TAINTED_STRING}
|
||||
|
||||
merged = {**my_dict, **tainted_dict}
|
||||
ensure_tainted(merged)
|
||||
|
||||
|
||||
def set_add():
|
||||
tainted_string = TAINTED_STRING
|
||||
my_set = {"safe"}
|
||||
|
||||
ensure_not_tainted(my_set)
|
||||
|
||||
my_set.add(tainted_string)
|
||||
ensure_tainted(my_set)
|
||||
|
||||
|
||||
# Make tests runable
|
||||
|
||||
test_construction()
|
||||
test_access(0, 0, 2)
|
||||
test_dict_access("name")
|
||||
test_named_tuple()
|
||||
test_defaultdict("key", "key")
|
||||
test_copy_1()
|
||||
test_copy_2()
|
||||
|
||||
list_index_assign()
|
||||
list_index_aug_assign()
|
||||
list_append()
|
||||
list_extend()
|
||||
|
||||
dict_update_dict()
|
||||
dict_update_kv_list()
|
||||
dict_update_kv_arg()
|
||||
dict_manual_update()
|
||||
dict_merge()
|
||||
|
||||
set_add()
|
||||
@@ -0,0 +1,64 @@
|
||||
# Add taintlib to PATH so it can be imported during runtime without any hassle
|
||||
import sys; import os; sys.path.append(os.path.dirname(os.path.dirname((__file__))))
|
||||
from taintlib import *
|
||||
|
||||
# This has no runtime impact, but allows autocomplete to work
|
||||
from typing import TYPE_CHECKING
|
||||
if TYPE_CHECKING:
|
||||
from ..taintlib import *
|
||||
|
||||
|
||||
# Actual tests
|
||||
|
||||
from io import StringIO
|
||||
|
||||
# Workaround for Python3 not having unicode
|
||||
import sys
|
||||
if sys.version_info[0] == 3:
|
||||
unicode = str
|
||||
|
||||
def test():
|
||||
print("\n# test")
|
||||
ts = TAINTED_STRING
|
||||
import json
|
||||
|
||||
ensure_tainted(
|
||||
json.dumps(ts),
|
||||
json.loads(json.dumps(ts)),
|
||||
)
|
||||
|
||||
# For Python2, need to convert to unicode for StringIO to work
|
||||
tainted_filelike = StringIO(unicode(json.dumps(ts)))
|
||||
|
||||
ensure_tainted(
|
||||
tainted_filelike,
|
||||
json.load(tainted_filelike),
|
||||
)
|
||||
|
||||
def non_syntacical():
|
||||
print("\n# non_syntacical")
|
||||
ts = TAINTED_STRING
|
||||
|
||||
# a less syntactical approach
|
||||
from json import load, loads, dumps
|
||||
|
||||
dumps_alias = dumps
|
||||
|
||||
ensure_tainted(
|
||||
dumps(ts),
|
||||
dumps_alias(ts),
|
||||
loads(dumps(ts)),
|
||||
)
|
||||
|
||||
# For Python2, need to convert to unicode for StringIO to work
|
||||
tainted_filelike = StringIO(unicode(dumps(ts)))
|
||||
|
||||
ensure_tainted(
|
||||
tainted_filelike,
|
||||
load(tainted_filelike),
|
||||
)
|
||||
|
||||
# Make tests runable
|
||||
|
||||
test()
|
||||
non_syntacical()
|
||||
@@ -1,27 +1,20 @@
|
||||
import sys
|
||||
# Add taintlib to PATH so it can be imported during runtime without any hassle
|
||||
import sys; import os; sys.path.append(os.path.dirname(os.path.dirname((__file__))))
|
||||
from taintlib import *
|
||||
|
||||
if sys.version_info[0] == 3:
|
||||
unicode = str
|
||||
|
||||
|
||||
TAINTED_STRING = "TAINTED_STRING"
|
||||
TAINTED_BYTES = b"TAINTED_BYTES"
|
||||
|
||||
|
||||
def ensure_tainted(*args):
|
||||
print("- ensure_tainted")
|
||||
for i, arg in enumerate(args):
|
||||
print("arg {}: {!r}".format(i, arg))
|
||||
|
||||
|
||||
def ensure_not_tainted(*args):
|
||||
print("- ensure_not_tainted")
|
||||
for i, arg in enumerate(args):
|
||||
print("arg {}: {!r}".format(i, arg))
|
||||
# This has no runtime impact, but allows autocomplete to work
|
||||
from typing import TYPE_CHECKING
|
||||
if TYPE_CHECKING:
|
||||
from ..taintlib import *
|
||||
|
||||
|
||||
# Actual tests
|
||||
|
||||
# Workaround for Python3 not having unicode
|
||||
import sys
|
||||
if sys.version_info[0] == 3:
|
||||
unicode = str
|
||||
|
||||
|
||||
def str_operations():
|
||||
print("\n# str_operations")
|
||||
@@ -42,6 +35,11 @@ def str_operations():
|
||||
unicode(ts),
|
||||
)
|
||||
|
||||
aug_assignment = "safe"
|
||||
ensure_not_tainted(aug_assignment)
|
||||
aug_assignment += TAINTED_STRING
|
||||
ensure_tainted(aug_assignment)
|
||||
|
||||
|
||||
def str_methods():
|
||||
print("\n# str_methods")
|
||||
@@ -140,18 +138,6 @@ def binary_decode_encode():
|
||||
base64.b16encode(tb),
|
||||
base64.b16decode(base64.b16encode(tb)),
|
||||
|
||||
# # New in Python 3.4
|
||||
# base64.a85encode(tb),
|
||||
# base64.a85decode(base64.a85encode(tb)),
|
||||
|
||||
# # New in Python 3.4
|
||||
# base64.b85encode(tb),
|
||||
# base64.b85decode(base64.b85encode(tb)),
|
||||
|
||||
# # New in Python 3.1
|
||||
# base64.encodebytes(tb),
|
||||
# base64.decodebytes(base64.encodebytes(tb)),
|
||||
|
||||
# deprecated since Python 3.1, but still works
|
||||
base64.encodestring(tb),
|
||||
base64.decodestring(base64.encodestring(tb)),
|
||||
@@ -0,0 +1,75 @@
|
||||
# Add taintlib to PATH so it can be imported during runtime without any hassle
|
||||
import sys; import os; sys.path.append(os.path.dirname(os.path.dirname((__file__))))
|
||||
from taintlib import *
|
||||
|
||||
# This has no runtime impact, but allows autocomplete to work
|
||||
from typing import TYPE_CHECKING
|
||||
if TYPE_CHECKING:
|
||||
from ..taintlib import *
|
||||
|
||||
|
||||
# Actual tests
|
||||
|
||||
def unpacking():
|
||||
l = TAINTED_LIST[0:3]
|
||||
a, b, c = l
|
||||
ensure_tainted(a, b, c)
|
||||
|
||||
|
||||
def unpacking_to_list():
|
||||
l = TAINTED_LIST[0:3]
|
||||
[a, b, c] = l
|
||||
ensure_tainted(a, b, c)
|
||||
|
||||
|
||||
def nested():
|
||||
l = TAINTED_LIST[0:3]
|
||||
ll = [l, l, l]
|
||||
|
||||
# list
|
||||
[[a1, a2, a3], b, c] = ll
|
||||
ensure_tainted(a1, a2, a3, b, c)
|
||||
|
||||
# tuple
|
||||
((a1, a2, a3), b, c) = ll
|
||||
ensure_tainted(a1, a2, a3, b, c)
|
||||
|
||||
# mixed
|
||||
[(a1, a2, a3), b, c] = ll
|
||||
ensure_tainted(a1, a2, a3, b, c)
|
||||
|
||||
|
||||
def unpack_from_set():
|
||||
# no guarantee on ordering ... don't know why you would ever do this
|
||||
a, b, c = {"foo", "bar", TAINTED_STRING}
|
||||
# either all should be tainted, or none of them
|
||||
ensure_tainted(a, b, c)
|
||||
|
||||
|
||||
def contrived_1():
|
||||
# A contrived example. Don't know why anyone would ever actually do this.
|
||||
tainted_list = TAINTED_LIST[0:3]
|
||||
no_taint_list = [1,2,3]
|
||||
|
||||
(a, b, c), (d, e, f) = tainted_list, no_taint_list
|
||||
ensure_tainted(a, b, c)
|
||||
ensure_not_tainted(d, e, f) # FP: we mark `d`, `e` and `f` as tainted.
|
||||
|
||||
|
||||
def contrived_2():
|
||||
# A contrived example. Don't know why anyone would ever actually do this.
|
||||
|
||||
# Old taint tracking was only able to handle taint nested 2 levels in sequences,
|
||||
# so would not mark a, b, c as tainted
|
||||
[[[ (a, b, c) ]]] = [[[ TAINTED_LIST[0:3] ]]]
|
||||
ensure_tainted(a, b, c)
|
||||
|
||||
|
||||
# Make tests runable
|
||||
|
||||
unpacking()
|
||||
unpacking_to_list()
|
||||
nested()
|
||||
unpack_from_set()
|
||||
contrived_1()
|
||||
contrived_2()
|
||||
@@ -1,10 +0,0 @@
|
||||
| test.py:26 | ok | str_methods | ts.casefold() |
|
||||
| test.py:28 | ok | str_methods | ts.format_map(..) |
|
||||
| test.py:29 | fail | str_methods | "{unsafe}".format_map(..) |
|
||||
| test.py:40 | fail | binary_decode_encode | base64.a85encode(..) |
|
||||
| test.py:41 | fail | binary_decode_encode | base64.a85decode(..) |
|
||||
| test.py:44 | fail | binary_decode_encode | base64.b85encode(..) |
|
||||
| test.py:45 | fail | binary_decode_encode | base64.b85decode(..) |
|
||||
| test.py:48 | fail | binary_decode_encode | base64.encodebytes(..) |
|
||||
| test.py:49 | fail | binary_decode_encode | base64.decodebytes(..) |
|
||||
| test.py:57 | ok | f_strings | Fstring |
|
||||
@@ -1,62 +0,0 @@
|
||||
| test.py:32 | ok | str_operations | ts |
|
||||
| test.py:33 | ok | str_operations | BinaryExpr |
|
||||
| test.py:34 | ok | str_operations | BinaryExpr |
|
||||
| test.py:35 | ok | str_operations | BinaryExpr |
|
||||
| test.py:36 | ok | str_operations | ts[Slice] |
|
||||
| test.py:37 | ok | str_operations | ts[Slice] |
|
||||
| test.py:38 | ok | str_operations | ts[Slice] |
|
||||
| test.py:39 | ok | str_operations | ts[0] |
|
||||
| test.py:40 | ok | str_operations | str(..) |
|
||||
| test.py:41 | ok | str_operations | bytes(..) |
|
||||
| test.py:42 | ok | str_operations | unicode(..) |
|
||||
| test.py:51 | ok | str_methods | ts.capitalize() |
|
||||
| test.py:52 | ok | str_methods | ts.center(..) |
|
||||
| test.py:53 | ok | str_methods | ts.expandtabs() |
|
||||
| test.py:55 | ok | str_methods | ts.format() |
|
||||
| test.py:56 | ok | str_methods | "{}".format(..) |
|
||||
| test.py:57 | ok | str_methods | "{unsafe}".format(..) |
|
||||
| test.py:59 | ok | str_methods | ts.join(..) |
|
||||
| test.py:60 | fail | str_methods | "".join(..) |
|
||||
| test.py:62 | ok | str_methods | ts.ljust(..) |
|
||||
| test.py:63 | ok | str_methods | ts.lstrip() |
|
||||
| test.py:64 | ok | str_methods | ts.lower() |
|
||||
| test.py:66 | ok | str_methods | ts.replace(..) |
|
||||
| test.py:67 | ok | str_methods | "safe".replace(..) |
|
||||
| test.py:69 | ok | str_methods | ts.rjust(..) |
|
||||
| test.py:70 | ok | str_methods | ts.rstrip() |
|
||||
| test.py:71 | ok | str_methods | ts.strip() |
|
||||
| test.py:72 | ok | str_methods | ts.swapcase() |
|
||||
| test.py:73 | ok | str_methods | ts.title() |
|
||||
| test.py:74 | ok | str_methods | ts.upper() |
|
||||
| test.py:75 | ok | str_methods | ts.zfill(..) |
|
||||
| test.py:77 | ok | str_methods | ts.encode(..) |
|
||||
| test.py:78 | ok | str_methods | ts.encode(..).decode(..) |
|
||||
| test.py:80 | ok | str_methods | tb.decode(..) |
|
||||
| test.py:81 | ok | str_methods | tb.decode(..).encode(..) |
|
||||
| test.py:84 | ok | str_methods | ts.partition(..) |
|
||||
| test.py:85 | ok | str_methods | ts.rpartition(..) |
|
||||
| test.py:86 | ok | str_methods | ts.rsplit(..) |
|
||||
| test.py:87 | ok | str_methods | ts.split(..) |
|
||||
| test.py:88 | ok | str_methods | ts.splitlines() |
|
||||
| test.py:93 | ok | str_methods | "safe".replace(..) |
|
||||
| test.py:95 | fail | str_methods | ts.join(..) |
|
||||
| test.py:96 | fail | str_methods | ts.join(..) |
|
||||
| test.py:106 | fail | non_syntactic | meth() |
|
||||
| test.py:107 | fail | non_syntactic | _str(..) |
|
||||
| test.py:116 | ok | percent_fmt | BinaryExpr |
|
||||
| test.py:117 | ok | percent_fmt | BinaryExpr |
|
||||
| test.py:118 | fail | percent_fmt | BinaryExpr |
|
||||
| test.py:128 | fail | binary_decode_encode | base64.b64encode(..) |
|
||||
| test.py:129 | fail | binary_decode_encode | base64.b64decode(..) |
|
||||
| test.py:131 | fail | binary_decode_encode | base64.standard_b64encode(..) |
|
||||
| test.py:132 | fail | binary_decode_encode | base64.standard_b64decode(..) |
|
||||
| test.py:134 | fail | binary_decode_encode | base64.urlsafe_b64encode(..) |
|
||||
| test.py:135 | fail | binary_decode_encode | base64.urlsafe_b64decode(..) |
|
||||
| test.py:137 | fail | binary_decode_encode | base64.b32encode(..) |
|
||||
| test.py:138 | fail | binary_decode_encode | base64.b32decode(..) |
|
||||
| test.py:140 | fail | binary_decode_encode | base64.b16encode(..) |
|
||||
| test.py:141 | fail | binary_decode_encode | base64.b16decode(..) |
|
||||
| test.py:156 | fail | binary_decode_encode | base64.encodestring(..) |
|
||||
| test.py:157 | fail | binary_decode_encode | base64.decodestring(..) |
|
||||
| test.py:162 | fail | binary_decode_encode | quopri.encodestring(..) |
|
||||
| test.py:163 | fail | binary_decode_encode | quopri.decodestring(..) |
|
||||
@@ -0,0 +1,15 @@
|
||||
TAINTED_STRING = "TAINTED_STRING"
|
||||
TAINTED_BYTES = b"TAINTED_BYTES"
|
||||
TAINTED_LIST = ["tainted-{}".format(i) for i in range(5)]
|
||||
TAINTED_DICT = {"name": TAINTED_STRING, "some key": "foo"}
|
||||
|
||||
def ensure_tainted(*args):
|
||||
print("- ensure_tainted")
|
||||
for i, arg in enumerate(args):
|
||||
print("arg {}: {!r}".format(i, arg))
|
||||
|
||||
|
||||
def ensure_not_tainted(*args):
|
||||
print("- ensure_not_tainted")
|
||||
for i, arg in enumerate(args):
|
||||
print("arg {}: {!r}".format(i, arg))
|
||||
@@ -20,6 +20,7 @@
|
||||
* complex | `42j` (not supported yet)
|
||||
*/
|
||||
|
||||
private import python
|
||||
import experimental.dataflow.DataFlow
|
||||
|
||||
class TestConfiguration extends DataFlow::Configuration {
|
||||
|
||||
@@ -0,0 +1,31 @@
|
||||
class SomeClass:
|
||||
pass
|
||||
|
||||
def simple_read_write():
|
||||
x = SomeClass() # $tracked=foo
|
||||
x.foo = tracked # $tracked $tracked=foo
|
||||
y = x.foo # $tracked=foo $tracked
|
||||
do_stuff(y) # $tracked
|
||||
|
||||
def foo():
|
||||
x = SomeClass() # $tracked=attr
|
||||
bar(x) # $tracked=attr
|
||||
x.attr = tracked # $tracked=attr $tracked
|
||||
baz(x) # $tracked=attr
|
||||
|
||||
def bar(x): # $tracked=attr
|
||||
z = x.attr # $tracked $tracked=attr
|
||||
do_stuff(z) # $tracked
|
||||
|
||||
def expects_int(x): # $int=field $f+:str=field
|
||||
do_int_stuff(x.field) # $int $f+:str $int=field $f+:str=field
|
||||
|
||||
def expects_string(x): # $f+:int=field $str=field
|
||||
do_string_stuff(x.field) # $f+:int $str $f+:int=field $str=field
|
||||
|
||||
def test_incompatible_types():
|
||||
x = SomeClass() # $int,str=field
|
||||
x.field = int(5) # $int=field $f+:str=field $int $f+:str
|
||||
expects_int(x) # $int=field $f+:str=field
|
||||
x.field = str("Hello") # $f+:int=field $str=field $f+:int $str
|
||||
expects_string(x) # $f+:int=field $str=field
|
||||
61
python/ql/test/experimental/dataflow/typetracking/test.py
Normal file
61
python/ql/test/experimental/dataflow/typetracking/test.py
Normal file
@@ -0,0 +1,61 @@
|
||||
def get_tracked():
|
||||
x = tracked # $tracked
|
||||
return x # $tracked
|
||||
|
||||
def use_tracked_foo(x): # $tracked
|
||||
do_stuff(x) # $tracked
|
||||
|
||||
def foo():
|
||||
use_tracked_foo(
|
||||
get_tracked() # $tracked
|
||||
)
|
||||
|
||||
def use_tracked_bar(x): # $tracked
|
||||
do_stuff(x) # $tracked
|
||||
|
||||
def bar():
|
||||
x = get_tracked() # $tracked
|
||||
use_tracked_bar(x) # $tracked
|
||||
|
||||
def use_tracked_baz(x): # $tracked
|
||||
do_stuff(x) # $tracked
|
||||
|
||||
def baz():
|
||||
x = tracked # $tracked
|
||||
use_tracked_baz(x) # $tracked
|
||||
|
||||
def id(x): # $tracked
|
||||
return x # $tracked
|
||||
|
||||
def use_tracked_quux(x): # $f-:tracked
|
||||
do_stuff(y) # call after return -- not tracked in here.
|
||||
|
||||
def quux():
|
||||
x = tracked # $tracked
|
||||
y = id(x) # $tracked
|
||||
use_tracked_quux(y) # not tracked out of call to id.
|
||||
|
||||
g = None
|
||||
|
||||
def write_g(x): # $tracked
|
||||
g = x # $tracked
|
||||
|
||||
def use_g():
|
||||
do_stuff(g) # $f-:tracked // no global flow for now.
|
||||
|
||||
def global_var_write_test():
|
||||
x = tracked # $tracked
|
||||
write_g(x) # $tracked
|
||||
use_g()
|
||||
|
||||
def expects_int(x): # $int
|
||||
do_int_stuff(x) # $int
|
||||
|
||||
def expects_string(x): # $str
|
||||
do_string_stuff(x) # $str
|
||||
|
||||
def redefine_test():
|
||||
x = int(5) # $int
|
||||
expects_int(x) # $int
|
||||
x = str("Hello") # $str
|
||||
expects_string(x) # $str
|
||||
72
python/ql/test/experimental/dataflow/typetracking/tracked.ql
Normal file
72
python/ql/test/experimental/dataflow/typetracking/tracked.ql
Normal file
@@ -0,0 +1,72 @@
|
||||
import python
|
||||
import experimental.dataflow.TypeTracker
|
||||
import TestUtilities.InlineExpectationsTest
|
||||
|
||||
Node tracked(TypeTracker t) {
|
||||
t.start() and
|
||||
result.asCfgNode() = any(NameNode n | n.getId() = "tracked")
|
||||
or
|
||||
exists(TypeTracker t2 | result = tracked(t2).track(t2, t))
|
||||
}
|
||||
|
||||
class TrackedTest extends InlineExpectationsTest {
|
||||
TrackedTest() { this = "TrackedTest" }
|
||||
|
||||
override string getARelevantTag() { result = "tracked" }
|
||||
|
||||
override predicate hasActualResult(Location location, string element, string tag, string value) {
|
||||
exists(Node e, TypeTracker t |
|
||||
e = tracked(t) and
|
||||
tag = "tracked" and
|
||||
location = e.getLocation() and
|
||||
value = t.getAttr() and
|
||||
element = e.toString()
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
Node int_type(TypeTracker t) {
|
||||
t.start() and
|
||||
result.asCfgNode() = any(CallNode c | c.getFunction().(NameNode).getId() = "int")
|
||||
or
|
||||
exists(TypeTracker t2 | result = int_type(t2).track(t2, t))
|
||||
}
|
||||
|
||||
Node string_type(TypeTracker t) {
|
||||
t.start() and
|
||||
result.asCfgNode() = any(CallNode c | c.getFunction().(NameNode).getId() = "str")
|
||||
or
|
||||
exists(TypeTracker t2 | result = string_type(t2).track(t2, t))
|
||||
}
|
||||
|
||||
class TrackedIntTest extends InlineExpectationsTest {
|
||||
TrackedIntTest() { this = "TrackedIntTest" }
|
||||
|
||||
override string getARelevantTag() { result = "int" }
|
||||
|
||||
override predicate hasActualResult(Location location, string element, string tag, string value) {
|
||||
exists(Node e, TypeTracker t |
|
||||
e = int_type(t) and
|
||||
tag = "int" and
|
||||
location = e.getLocation() and
|
||||
value = t.getAttr() and
|
||||
element = e.toString()
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
class TrackedStringTest extends InlineExpectationsTest {
|
||||
TrackedStringTest() { this = "TrackedStringTest" }
|
||||
|
||||
override string getARelevantTag() { result = "str" }
|
||||
|
||||
override predicate hasActualResult(Location location, string element, string tag, string value) {
|
||||
exists(Node e, TypeTracker t |
|
||||
e = string_type(t) and
|
||||
tag = "str" and
|
||||
location = e.getLocation() and
|
||||
value = t.getAttr() and
|
||||
element = e.toString()
|
||||
)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1 @@
|
||||
experimental/Security/CWE-074/TemplateInjection.ql
|
||||
@@ -0,0 +1 @@
|
||||
semmle-extractor-options: --max-import-depth=3 -p ../../../../query-tests/Security/lib/
|
||||
@@ -0,0 +1 @@
|
||||
experimental/Security/CWE-091/Xslt.ql
|
||||
@@ -0,0 +1 @@
|
||||
semmle-extractor-options: --max-import-depth=3 -p ../../../../query-tests/Security/lib/
|
||||
@@ -0,0 +1 @@
|
||||
semmle-extractor-options: --max-import-depth=3 -p ../../../../query-tests/Security/lib/
|
||||
@@ -0,0 +1 @@
|
||||
experimental/Security/CWE-643/xpath.ql
|
||||
1
python/ql/test/experimental/query-tests/options
Normal file
1
python/ql/test/experimental/query-tests/options
Normal file
@@ -0,0 +1 @@
|
||||
semmle-extractor-options: --max-import-depth=1
|
||||
Reference in New Issue
Block a user