Python: Move dataflow tests out of experimental

This commit is contained in:
Rasmus Wriedt Larsen
2024-04-23 09:40:44 +02:00
parent 19974f04c9
commit ce711f7d2f
260 changed files with 0 additions and 0 deletions

View File

@@ -0,0 +1,117 @@
import python
import semmle.python.dataflow.new.DataFlow
import semmle.python.dataflow.new.TaintTracking
import TestUtilities.InlineExpectationsTest
private import semmle.python.dataflow.new.internal.PrintNode
signature module QueryTestSig {
predicate isSink(DataFlow::Node sink);
predicate flowTo(DataFlow::Node sink);
}
module MakeQueryTest<QueryTestSig Impl> {
module DataFlowQueryTest implements TestSig {
string getARelevantTag() { result = "result" }
predicate hasActualResult(Location location, string element, string tag, string value) {
exists(DataFlow::Node sink | Impl::flowTo(sink) |
location = sink.getLocation() and
tag = "result" and
value = "BAD" and
element = sink.toString()
)
}
// We allow annotating any sink with `result=OK` to signal
// safe sinks.
// Sometimes a line contains both an alert and a safe sink.
// In this situation, the annotation form `OK(safe sink)`
// can be useful.
predicate hasOptionalResult(Location location, string element, string tag, string value) {
exists(DataFlow::Node sink | Impl::isSink(sink) |
location = sink.getLocation() and
tag = "result" and
value in ["OK", "OK(" + prettyNode(sink) + ")"] and
element = sink.toString()
)
}
}
import MakeTest<DataFlowQueryTest>
query predicate missingAnnotationOnSink(Location location, string error, string element) {
error = "ERROR, you should add `# $ MISSING: result=BAD` or `result=OK` annotation" and
exists(DataFlow::Node sink |
exists(sink.getLocation().getFile().getRelativePath()) and
Impl::isSink(sink) and
location = sink.getLocation() and
element = prettyExpr(sink.asExpr()) and
not Impl::flowTo(sink) and
not exists(FalseNegativeTestExpectation missingResult |
missingResult.getTag() = "result" and
missingResult.getValue() = "BAD" and
missingResult.getLocation().getFile() = location.getFile() and
missingResult.getLocation().getStartLine() = location.getStartLine()
) and
not exists(GoodTestExpectation okResult |
okResult.getTag() = "result" and
okResult.getValue() in ["OK", "OK(" + prettyNode(sink) + ")"] and
okResult.getLocation().getFile() = location.getFile() and
okResult.getLocation().getStartLine() = location.getStartLine()
)
)
}
}
module FromDataFlowConfig<DataFlow::ConfigSig C> {
module Impl implements QueryTestSig {
predicate isSink(DataFlow::Node sink) { C::isSink(sink) }
predicate flowTo(DataFlow::Node sink) { DataFlow::Global<C>::flowTo(sink) }
}
import MakeQueryTest<Impl>
}
module FromDataFlowStateConfig<DataFlow::StateConfigSig C> {
module Impl implements QueryTestSig {
predicate isSink(DataFlow::Node sink) { C::isSink(sink) or C::isSink(sink, _) }
predicate flowTo(DataFlow::Node sink) { DataFlow::GlobalWithState<C>::flowTo(sink) }
}
import MakeQueryTest<Impl>
}
module FromTaintTrackingConfig<DataFlow::ConfigSig C> {
module Impl implements QueryTestSig {
predicate isSink(DataFlow::Node sink) { C::isSink(sink) }
predicate flowTo(DataFlow::Node sink) { TaintTracking::Global<C>::flowTo(sink) }
}
import MakeQueryTest<Impl>
}
module FromTaintTrackingStateConfig<DataFlow::StateConfigSig C> {
module Impl implements QueryTestSig {
predicate isSink(DataFlow::Node sink) { C::isSink(sink) or C::isSink(sink, _) }
predicate flowTo(DataFlow::Node sink) { TaintTracking::GlobalWithState<C>::flowTo(sink) }
}
import MakeQueryTest<Impl>
}
deprecated signature class LegacyConfiguration extends DataFlow::Configuration;
deprecated module FromLegacyConfiguration<LegacyConfiguration C> {
module Impl implements QueryTestSig {
predicate isSink(DataFlow::Node sink) { any(C c).isSink(sink) or any(C c).isSink(sink, _) }
predicate flowTo(DataFlow::Node sink) { any(C c).hasFlowTo(sink) }
}
import MakeQueryTest<Impl>
}

View File

@@ -0,0 +1,39 @@
import python
import semmle.python.dataflow.new.DataFlow
import TestUtilities.InlineExpectationsTest
private import semmle.python.dataflow.new.internal.PrintNode
signature module FlowTestSig {
string flowTag();
predicate relevantFlow(DataFlow::Node fromNode, DataFlow::Node toNode);
}
module MakeTestSig<FlowTestSig Impl> implements TestSig {
string getARelevantTag() { result = Impl::flowTag() }
predicate hasActualResult(Location location, string element, string tag, string value) {
exists(DataFlow::Node fromNode, DataFlow::Node toNode | Impl::relevantFlow(fromNode, toNode) |
location = toNode.getLocation() and
tag = Impl::flowTag() and
value =
"\"" + prettyNode(fromNode).replaceAll("\"", "'") + lineStr(fromNode, toNode) + " -> " +
prettyNode(toNode).replaceAll("\"", "'") + "\"" and
element = toNode.toString()
)
}
pragma[inline]
private string lineStr(DataFlow::Node fromNode, DataFlow::Node toNode) {
exists(int delta |
delta = fromNode.getLocation().getStartLine() - toNode.getLocation().getStartLine()
|
if delta = 0
then result = ""
else
if delta > 0
then result = ", l:+" + delta.toString()
else result = ", l:" + delta.toString()
)
}
}

View File

@@ -0,0 +1,13 @@
import python
import semmle.python.dataflow.new.DataFlow
import FlowTest
module LocalFlowStepTest implements FlowTestSig {
string flowTag() { result = "step" }
predicate relevantFlow(DataFlow::Node fromNode, DataFlow::Node toNode) {
DataFlow::localFlowStep(fromNode, toNode)
}
}
import MakeTest<MakeTestSig<LocalFlowStepTest>>

View File

@@ -0,0 +1,42 @@
import python
import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.internal.DataFlowPrivate
import FlowTest
module MaximalFlowTest implements FlowTestSig {
string flowTag() { result = "flow" }
predicate relevantFlow(DataFlow::Node source, DataFlow::Node sink) {
source != sink and
MaximalFlows::flow(source, sink)
}
}
import MakeTest<MakeTestSig<MaximalFlowTest>>
/**
* A configuration to find all "maximal" flows.
* To be used on small programs.
*/
module MaximalFlowsConfig implements DataFlow::ConfigSig {
predicate isSource(DataFlow::Node node) {
exists(node.getLocation().getFile().getRelativePath()) and
not node.asCfgNode() instanceof CallNode and
not node.asCfgNode().getNode() instanceof Return and
not node instanceof DataFlow::ParameterNode and
not node instanceof DataFlow::PostUpdateNode and
// not node.asExpr() instanceof FunctionExpr and
// not node.asExpr() instanceof ClassExpr and
not DataFlow::localFlowStep(_, node)
}
predicate isSink(DataFlow::Node node) {
exists(node.getLocation().getFile().getRelativePath()) and
not any(CallNode c).getArg(_) = node.asCfgNode() and
not node instanceof DataFlow::ArgumentNode and
not node.asCfgNode().(NameNode).getId().matches("SINK%") and
not DataFlow::localFlowStep(node, _)
}
}
module MaximalFlows = DataFlow::Global<MaximalFlowsConfig>;

View File

@@ -0,0 +1,34 @@
import python
import experimental.dataflow.TestUtil.FlowTest
import experimental.dataflow.testConfig
private import semmle.python.dataflow.new.internal.PrintNode
module DataFlowTest implements FlowTestSig {
string flowTag() { result = "flow" }
predicate relevantFlow(DataFlow::Node source, DataFlow::Node sink) {
TestFlow::flow(source, sink)
}
}
import MakeTest<MakeTestSig<DataFlowTest>>
query predicate missingAnnotationOnSink(Location location, string error, string element) {
error = "ERROR, you should add `# $ MISSING: flow` annotation" and
exists(DataFlow::Node sink |
TestConfig::isSink(sink) and
// note: we only care about `SINK` and not `SINK_F`, so we have to reconstruct manually.
exists(DataFlow::CallCfgNode call |
call.getFunction().asCfgNode().(NameNode).getId() = "SINK" and
(sink = call.getArg(_) or sink = call.getArgByName(_))
) and
location = sink.getLocation() and
element = prettyExpr(sink.asExpr()) and
not TestFlow::flowTo(sink) and
not exists(FalseNegativeTestExpectation missingResult |
missingResult.getTag() = "flow" and
missingResult.getLocation().getFile() = location.getFile() and
missingResult.getLocation().getStartLine() = location.getStartLine()
)
)
}

View File

@@ -0,0 +1,33 @@
import python
import experimental.dataflow.TestUtil.FlowTest
import experimental.dataflow.testTaintConfig
private import semmle.python.dataflow.new.internal.PrintNode
module DataFlowTest implements FlowTestSig {
string flowTag() { result = "flow" }
predicate relevantFlow(DataFlow::Node source, DataFlow::Node sink) {
TestFlow::flow(source, sink)
}
}
import MakeTest<MakeTestSig<DataFlowTest>>
query predicate missingAnnotationOnSink(Location location, string error, string element) {
error = "ERROR, you should add `# $ MISSING: flow` annotation" and
exists(DataFlow::Node sink |
exists(DataFlow::CallCfgNode call |
// note: we only care about `SINK` and not `SINK_F`, so we have to reconstruct manually.
call.getFunction().asCfgNode().(NameNode).getId() = "SINK" and
(sink = call.getArg(_) or sink = call.getArgByName(_))
) and
location = sink.getLocation() and
element = prettyExpr(sink.asExpr()) and
not TestFlow::flowTo(sink) and
not exists(FalseNegativeTestExpectation missingResult |
missingResult.getTag() = "flow" and
missingResult.getLocation().getFile() = location.getFile() and
missingResult.getLocation().getStartLine() = location.getStartLine()
)
)
}

View File

@@ -0,0 +1,58 @@
import python
import semmle.python.dataflow.new.DataFlow
import TestUtilities.InlineExpectationsTest
private import semmle.python.dataflow.new.internal.PrintNode
private import semmle.python.dataflow.new.internal.DataFlowPrivate as DataFlowPrivate
/**
* A routing test is designed to test that values are routed to the
* correct arguments of the correct functions. It is assumed that
* the functions tested sink their arguments sequentially, that is
* `SINK1(arg1)`, etc.
*/
signature module RoutingTestSig {
class Argument;
string flowTag(Argument arg);
predicate relevantFlow(DataFlow::Node fromNode, DataFlow::Node toNode, Argument arg);
}
module MakeTestSig<RoutingTestSig Impl> implements TestSig {
string getARelevantTag() { result in ["func", Impl::flowTag(_)] }
predicate hasActualResult(Location location, string element, string tag, string value) {
exists(DataFlow::Node fromNode, DataFlow::Node toNode, Impl::Argument arg |
Impl::relevantFlow(fromNode, toNode, arg)
|
location = fromNode.getLocation() and
element = fromNode.toString() and
(
tag = Impl::flowTag(arg) and
if "\"" + tag + "\"" = fromValue(fromNode) then value = "" else value = fromValue(fromNode)
or
// only have result for `func` tag if the function where `arg<n>` is used, is
// different from the function name of the call where `arg<n>` was specified as
// an argument
tag = "func" and
value = toFunc(toNode) and
not value = fromFunc(fromNode)
)
)
}
}
pragma[inline]
private string fromValue(DataFlow::Node fromNode) {
result = "\"" + prettyNode(fromNode).replaceAll("\"", "'") + "\""
}
pragma[inline]
private string fromFunc(DataFlow::ArgumentNode fromNode) {
result = fromNode.getCall().getNode().(CallNode).getFunction().getNode().(Name).getId()
}
pragma[inline]
private string toFunc(DataFlow::Node toNode) {
result = toNode.getEnclosingCallable().getQualifiedName()
}

View File

@@ -0,0 +1,37 @@
import python
private import semmle.python.dataflow.new.internal.PrintNode
private import semmle.python.dataflow.new.internal.DataFlowPrivate as DataFlowPrivate
private import semmle.python.ApiGraphs
import TestUtilities.InlineExpectationsTest
signature module UnresolvedCallExpectationsSig {
predicate unresolvedCall(CallNode call);
}
module DefaultUnresolvedCallExpectations implements UnresolvedCallExpectationsSig {
predicate unresolvedCall(CallNode call) {
not exists(DataFlowPrivate::DataFlowCall dfc |
exists(dfc.getCallable()) and dfc.getNode() = call
) and
not DataFlowPrivate::resolveClassCall(call, _) and
not call = API::builtin(_).getACall().asCfgNode()
}
}
module MakeUnresolvedCallExpectations<UnresolvedCallExpectationsSig Impl> {
private module UnresolvedCallExpectations implements TestSig {
string getARelevantTag() { result = "unresolved_call" }
predicate hasActualResult(Location location, string element, string tag, string value) {
exists(location.getFile().getRelativePath()) and
exists(CallNode call | Impl::unresolvedCall(call) |
location = call.getLocation() and
tag = "unresolved_call" and
value = prettyExpr(call.getNode()) and
element = call.toString()
)
}
}
import MakeTest<UnresolvedCallExpectations>
}

View File

@@ -0,0 +1,13 @@
import semmle.python.dataflow.new.DataFlow
/**
* A configuration to find all flows.
* To be used on tiny programs.
*/
module AllFlowsConfig implements DataFlow::ConfigSig {
predicate isSource(DataFlow::Node node) { any() }
predicate isSink(DataFlow::Node node) { any() }
}
module AllFlowsFlow = DataFlow::Global<AllFlowsConfig>;

View File

@@ -0,0 +1,3 @@
| test.py:4:10:4:10 | ControlFlowNode for z | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() |
| test.py:7:19:7:19 | ControlFlowNode for a | test.py:1:19:1:19 | ControlFlowNode for x |
| test.py:7:19:7:19 | ControlFlowNode for a | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() |

View File

@@ -0,0 +1,8 @@
import experimental.dataflow.callGraphConfig
from DataFlow::Node source, DataFlow::Node sink
where
CallGraphFlow::flow(source, sink) and
exists(source.getLocation().getFile().getRelativePath()) and
exists(sink.getLocation().getFile().getRelativePath())
select source, sink

View File

@@ -0,0 +1,3 @@
| test.py:1:1:1:21 | SynthDictSplatParameterNode |
| test.py:1:19:1:19 | ControlFlowNode for x |
| test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() |

View File

@@ -0,0 +1,7 @@
import experimental.dataflow.callGraphConfig
from DataFlow::Node sink
where
CallGraphConfig::isSink(sink) and
exists(sink.getLocation().getFile().getRelativePath())
select sink

View File

@@ -0,0 +1,2 @@
| test.py:4:10:4:10 | ControlFlowNode for z |
| test.py:7:19:7:19 | ControlFlowNode for a |

View File

@@ -0,0 +1,7 @@
import experimental.dataflow.callGraphConfig
from DataFlow::Node source
where
CallGraphConfig::isSource(source) and
exists(source.getLocation().getFile().getRelativePath())
select source

View File

@@ -0,0 +1,58 @@
| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:1:5:1:17 | ControlFlowNode for obfuscated_id |
| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:7:5:7:17 | ControlFlowNode for obfuscated_id |
| test.py:1:5:1:17 | ControlFlowNode for obfuscated_id | test.py:7:5:7:17 | ControlFlowNode for obfuscated_id |
| test.py:1:19:1:19 | ControlFlowNode for x | test.py:2:3:2:3 | ControlFlowNode for y |
| test.py:1:19:1:19 | ControlFlowNode for x | test.py:2:7:2:7 | ControlFlowNode for x |
| test.py:1:19:1:19 | ControlFlowNode for x | test.py:3:3:3:3 | ControlFlowNode for z |
| test.py:1:19:1:19 | ControlFlowNode for x | test.py:3:7:3:7 | ControlFlowNode for y |
| test.py:1:19:1:19 | ControlFlowNode for x | test.py:4:10:4:10 | ControlFlowNode for z |
| test.py:1:19:1:19 | ControlFlowNode for x | test.py:7:1:7:1 | ControlFlowNode for b |
| test.py:1:19:1:19 | ControlFlowNode for x | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() |
| test.py:2:3:2:3 | ControlFlowNode for y | test.py:3:3:3:3 | ControlFlowNode for z |
| test.py:2:3:2:3 | ControlFlowNode for y | test.py:3:7:3:7 | ControlFlowNode for y |
| test.py:2:3:2:3 | ControlFlowNode for y | test.py:4:10:4:10 | ControlFlowNode for z |
| test.py:2:3:2:3 | ControlFlowNode for y | test.py:7:1:7:1 | ControlFlowNode for b |
| test.py:2:3:2:3 | ControlFlowNode for y | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() |
| test.py:2:7:2:7 | ControlFlowNode for x | test.py:2:3:2:3 | ControlFlowNode for y |
| test.py:2:7:2:7 | ControlFlowNode for x | test.py:3:3:3:3 | ControlFlowNode for z |
| test.py:2:7:2:7 | ControlFlowNode for x | test.py:3:7:3:7 | ControlFlowNode for y |
| test.py:2:7:2:7 | ControlFlowNode for x | test.py:4:10:4:10 | ControlFlowNode for z |
| test.py:2:7:2:7 | ControlFlowNode for x | test.py:7:1:7:1 | ControlFlowNode for b |
| test.py:2:7:2:7 | ControlFlowNode for x | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() |
| test.py:3:3:3:3 | ControlFlowNode for z | test.py:4:10:4:10 | ControlFlowNode for z |
| test.py:3:3:3:3 | ControlFlowNode for z | test.py:7:1:7:1 | ControlFlowNode for b |
| test.py:3:3:3:3 | ControlFlowNode for z | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() |
| test.py:3:7:3:7 | ControlFlowNode for y | test.py:3:3:3:3 | ControlFlowNode for z |
| test.py:3:7:3:7 | ControlFlowNode for y | test.py:4:10:4:10 | ControlFlowNode for z |
| test.py:3:7:3:7 | ControlFlowNode for y | test.py:7:1:7:1 | ControlFlowNode for b |
| test.py:3:7:3:7 | ControlFlowNode for y | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() |
| test.py:4:10:4:10 | ControlFlowNode for z | test.py:7:1:7:1 | ControlFlowNode for b |
| test.py:4:10:4:10 | ControlFlowNode for z | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() |
| test.py:6:1:6:1 | ControlFlowNode for a | test.py:1:19:1:19 | ControlFlowNode for x |
| test.py:6:1:6:1 | ControlFlowNode for a | test.py:2:3:2:3 | ControlFlowNode for y |
| test.py:6:1:6:1 | ControlFlowNode for a | test.py:2:7:2:7 | ControlFlowNode for x |
| test.py:6:1:6:1 | ControlFlowNode for a | test.py:3:3:3:3 | ControlFlowNode for z |
| test.py:6:1:6:1 | ControlFlowNode for a | test.py:3:7:3:7 | ControlFlowNode for y |
| test.py:6:1:6:1 | ControlFlowNode for a | test.py:4:10:4:10 | ControlFlowNode for z |
| test.py:6:1:6:1 | ControlFlowNode for a | test.py:7:1:7:1 | ControlFlowNode for b |
| test.py:6:1:6:1 | ControlFlowNode for a | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() |
| test.py:6:1:6:1 | ControlFlowNode for a | test.py:7:19:7:19 | ControlFlowNode for a |
| test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral | test.py:1:19:1:19 | ControlFlowNode for x |
| test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral | test.py:2:3:2:3 | ControlFlowNode for y |
| test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral | test.py:2:7:2:7 | ControlFlowNode for x |
| test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral | test.py:3:3:3:3 | ControlFlowNode for z |
| test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral | test.py:3:7:3:7 | ControlFlowNode for y |
| test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral | test.py:4:10:4:10 | ControlFlowNode for z |
| test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral | test.py:6:1:6:1 | ControlFlowNode for a |
| test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral | test.py:7:1:7:1 | ControlFlowNode for b |
| test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() |
| test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral | test.py:7:19:7:19 | ControlFlowNode for a |
| test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() | test.py:7:1:7:1 | ControlFlowNode for b |
| test.py:7:19:7:19 | ControlFlowNode for a | test.py:1:19:1:19 | ControlFlowNode for x |
| test.py:7:19:7:19 | ControlFlowNode for a | test.py:2:3:2:3 | ControlFlowNode for y |
| test.py:7:19:7:19 | ControlFlowNode for a | test.py:2:7:2:7 | ControlFlowNode for x |
| test.py:7:19:7:19 | ControlFlowNode for a | test.py:3:3:3:3 | ControlFlowNode for z |
| test.py:7:19:7:19 | ControlFlowNode for a | test.py:3:7:3:7 | ControlFlowNode for y |
| test.py:7:19:7:19 | ControlFlowNode for a | test.py:4:10:4:10 | ControlFlowNode for z |
| test.py:7:19:7:19 | ControlFlowNode for a | test.py:7:1:7:1 | ControlFlowNode for b |
| test.py:7:19:7:19 | ControlFlowNode for a | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() |

View File

@@ -0,0 +1,9 @@
import allFlowsConfig
from DataFlow::Node source, DataFlow::Node sink
where
source != sink and
AllFlowsFlow::flow(source, sink) and
exists(source.getLocation().getFile().getRelativePath()) and
exists(sink.getLocation().getFile().getRelativePath())
select source, sink

View File

@@ -0,0 +1,42 @@
| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:1:5:1:17 | ControlFlowNode for obfuscated_id |
| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:1:5:1:17 | ControlFlowNode for obfuscated_id |
| test.py:1:5:1:17 | ControlFlowNode for obfuscated_id | test.py:7:5:7:17 | ControlFlowNode for obfuscated_id |
| test.py:1:19:1:19 | ControlFlowNode for x | test.py:2:3:2:3 | ControlFlowNode for y |
| test.py:1:19:1:19 | ControlFlowNode for x | test.py:2:3:2:3 | ControlFlowNode for y |
| test.py:1:19:1:19 | ControlFlowNode for x | test.py:2:3:2:3 | ControlFlowNode for y |
| test.py:1:19:1:19 | ControlFlowNode for x | test.py:2:3:2:3 | ControlFlowNode for y |
| test.py:1:19:1:19 | ControlFlowNode for x | test.py:2:7:2:7 | ControlFlowNode for x |
| test.py:1:19:1:19 | ControlFlowNode for x | test.py:2:7:2:7 | ControlFlowNode for x |
| test.py:1:19:1:19 | ControlFlowNode for x | test.py:2:7:2:7 | ControlFlowNode for x |
| test.py:1:19:1:19 | ControlFlowNode for x | test.py:2:7:2:7 | ControlFlowNode for x |
| test.py:2:3:2:3 | ControlFlowNode for y | test.py:3:3:3:3 | ControlFlowNode for z |
| test.py:2:3:2:3 | ControlFlowNode for y | test.py:3:3:3:3 | ControlFlowNode for z |
| test.py:2:3:2:3 | ControlFlowNode for y | test.py:3:3:3:3 | ControlFlowNode for z |
| test.py:2:3:2:3 | ControlFlowNode for y | test.py:3:3:3:3 | ControlFlowNode for z |
| test.py:2:3:2:3 | ControlFlowNode for y | test.py:3:7:3:7 | ControlFlowNode for y |
| test.py:2:3:2:3 | ControlFlowNode for y | test.py:3:7:3:7 | ControlFlowNode for y |
| test.py:2:3:2:3 | ControlFlowNode for y | test.py:3:7:3:7 | ControlFlowNode for y |
| test.py:2:3:2:3 | ControlFlowNode for y | test.py:3:7:3:7 | ControlFlowNode for y |
| test.py:2:7:2:7 | ControlFlowNode for x | test.py:2:3:2:3 | ControlFlowNode for y |
| test.py:2:7:2:7 | ControlFlowNode for x | test.py:2:3:2:3 | ControlFlowNode for y |
| test.py:2:7:2:7 | ControlFlowNode for x | test.py:2:3:2:3 | ControlFlowNode for y |
| test.py:2:7:2:7 | ControlFlowNode for x | test.py:2:3:2:3 | ControlFlowNode for y |
| test.py:3:3:3:3 | ControlFlowNode for z | test.py:4:10:4:10 | ControlFlowNode for z |
| test.py:3:3:3:3 | ControlFlowNode for z | test.py:4:10:4:10 | ControlFlowNode for z |
| test.py:3:3:3:3 | ControlFlowNode for z | test.py:4:10:4:10 | ControlFlowNode for z |
| test.py:3:3:3:3 | ControlFlowNode for z | test.py:4:10:4:10 | ControlFlowNode for z |
| test.py:3:7:3:7 | ControlFlowNode for y | test.py:3:3:3:3 | ControlFlowNode for z |
| test.py:3:7:3:7 | ControlFlowNode for y | test.py:3:3:3:3 | ControlFlowNode for z |
| test.py:3:7:3:7 | ControlFlowNode for y | test.py:3:3:3:3 | ControlFlowNode for z |
| test.py:3:7:3:7 | ControlFlowNode for y | test.py:3:3:3:3 | ControlFlowNode for z |
| test.py:4:10:4:10 | ControlFlowNode for z | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() |
| test.py:4:10:4:10 | ControlFlowNode for z | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() |
| test.py:6:1:6:1 | ControlFlowNode for a | test.py:7:19:7:19 | ControlFlowNode for a |
| test.py:6:1:6:1 | ControlFlowNode for a | test.py:7:19:7:19 | ControlFlowNode for a |
| test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral | test.py:6:1:6:1 | ControlFlowNode for a |
| test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral | test.py:6:1:6:1 | ControlFlowNode for a |
| test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() | test.py:7:1:7:1 | ControlFlowNode for b |
| test.py:7:19:7:19 | ControlFlowNode for a | test.py:1:19:1:19 | ControlFlowNode for x |
| test.py:7:19:7:19 | ControlFlowNode for a | test.py:1:19:1:19 | ControlFlowNode for x |
| test.py:7:19:7:19 | ControlFlowNode for a | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() |
| test.py:7:19:7:19 | ControlFlowNode for a | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() |

View File

@@ -0,0 +1,8 @@
import allFlowsConfig
from AllFlowsFlow::PathNode fromNode, AllFlowsFlow::PathNode toNode
where
toNode = fromNode.getASuccessor() and
exists(fromNode.getNode().getLocation().getFile().getRelativePath()) and
exists(toNode.getNode().getLocation().getFile().getRelativePath())
select fromNode, toNode

View File

@@ -0,0 +1,40 @@
| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:1:1:1:21 | ControlFlowNode for FunctionExpr |
| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:1:5:1:17 | ControlFlowNode for obfuscated_id |
| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:7:5:7:17 | ControlFlowNode for obfuscated_id |
| test.py:1:1:1:21 | SynthDictSplatParameterNode | test.py:1:1:1:21 | SynthDictSplatParameterNode |
| test.py:1:5:1:17 | ControlFlowNode for obfuscated_id | test.py:1:5:1:17 | ControlFlowNode for obfuscated_id |
| test.py:1:5:1:17 | ControlFlowNode for obfuscated_id | test.py:7:5:7:17 | ControlFlowNode for obfuscated_id |
| test.py:1:19:1:19 | ControlFlowNode for x | test.py:1:19:1:19 | ControlFlowNode for x |
| test.py:1:19:1:19 | ControlFlowNode for x | test.py:2:3:2:3 | ControlFlowNode for y |
| test.py:1:19:1:19 | ControlFlowNode for x | test.py:2:7:2:7 | ControlFlowNode for x |
| test.py:1:19:1:19 | ControlFlowNode for x | test.py:3:3:3:3 | ControlFlowNode for z |
| test.py:1:19:1:19 | ControlFlowNode for x | test.py:3:7:3:7 | ControlFlowNode for y |
| test.py:1:19:1:19 | ControlFlowNode for x | test.py:4:10:4:10 | ControlFlowNode for z |
| test.py:2:3:2:3 | ControlFlowNode for y | test.py:2:3:2:3 | ControlFlowNode for y |
| test.py:2:3:2:3 | ControlFlowNode for y | test.py:3:3:3:3 | ControlFlowNode for z |
| test.py:2:3:2:3 | ControlFlowNode for y | test.py:3:7:3:7 | ControlFlowNode for y |
| test.py:2:3:2:3 | ControlFlowNode for y | test.py:4:10:4:10 | ControlFlowNode for z |
| test.py:2:7:2:7 | ControlFlowNode for x | test.py:2:3:2:3 | ControlFlowNode for y |
| test.py:2:7:2:7 | ControlFlowNode for x | test.py:2:7:2:7 | ControlFlowNode for x |
| test.py:2:7:2:7 | ControlFlowNode for x | test.py:3:3:3:3 | ControlFlowNode for z |
| test.py:2:7:2:7 | ControlFlowNode for x | test.py:3:7:3:7 | ControlFlowNode for y |
| test.py:2:7:2:7 | ControlFlowNode for x | test.py:4:10:4:10 | ControlFlowNode for z |
| test.py:3:3:3:3 | ControlFlowNode for z | test.py:3:3:3:3 | ControlFlowNode for z |
| test.py:3:3:3:3 | ControlFlowNode for z | test.py:4:10:4:10 | ControlFlowNode for z |
| test.py:3:7:3:7 | ControlFlowNode for y | test.py:3:3:3:3 | ControlFlowNode for z |
| test.py:3:7:3:7 | ControlFlowNode for y | test.py:3:7:3:7 | ControlFlowNode for y |
| test.py:3:7:3:7 | ControlFlowNode for y | test.py:4:10:4:10 | ControlFlowNode for z |
| test.py:4:10:4:10 | ControlFlowNode for z | test.py:4:10:4:10 | ControlFlowNode for z |
| test.py:6:1:6:1 | ControlFlowNode for a | test.py:6:1:6:1 | ControlFlowNode for a |
| test.py:6:1:6:1 | ControlFlowNode for a | test.py:7:19:7:19 | ControlFlowNode for a |
| test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral | test.py:6:1:6:1 | ControlFlowNode for a |
| test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral | test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral |
| test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral | test.py:7:19:7:19 | ControlFlowNode for a |
| test.py:7:1:7:1 | ControlFlowNode for b | test.py:7:1:7:1 | ControlFlowNode for b |
| test.py:7:5:7:17 | ControlFlowNode for obfuscated_id | test.py:7:5:7:17 | ControlFlowNode for obfuscated_id |
| test.py:7:5:7:17 | [post] ControlFlowNode for obfuscated_id | test.py:7:5:7:17 | [post] ControlFlowNode for obfuscated_id |
| test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() | test.py:7:1:7:1 | ControlFlowNode for b |
| test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() |
| test.py:7:5:7:20 | [pre] ControlFlowNode for obfuscated_id() | test.py:7:5:7:20 | [pre] ControlFlowNode for obfuscated_id() |
| test.py:7:19:7:19 | ControlFlowNode for a | test.py:7:19:7:19 | ControlFlowNode for a |
| test.py:7:19:7:19 | [post] ControlFlowNode for a | test.py:7:19:7:19 | [post] ControlFlowNode for a |

View File

@@ -0,0 +1,8 @@
import semmle.python.dataflow.new.DataFlow
from DataFlow::Node fromNode, DataFlow::Node toNode
where
DataFlow::localFlow(fromNode, toNode) and
exists(fromNode.getLocation().getFile().getRelativePath()) and
exists(toNode.getLocation().getFile().getRelativePath())
select fromNode, toNode

View File

@@ -0,0 +1,2 @@
testFailures
failures

View File

@@ -0,0 +1 @@
import experimental.dataflow.TestUtil.LocalFlowStepTest

View File

@@ -0,0 +1,10 @@
| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:1:5:1:17 | ControlFlowNode for obfuscated_id |
| test.py:1:5:1:17 | ControlFlowNode for obfuscated_id | test.py:7:5:7:17 | ControlFlowNode for obfuscated_id |
| test.py:1:19:1:19 | ControlFlowNode for x | test.py:2:7:2:7 | ControlFlowNode for x |
| test.py:2:3:2:3 | ControlFlowNode for y | test.py:3:7:3:7 | ControlFlowNode for y |
| test.py:2:7:2:7 | ControlFlowNode for x | test.py:2:3:2:3 | ControlFlowNode for y |
| test.py:3:3:3:3 | ControlFlowNode for z | test.py:4:10:4:10 | ControlFlowNode for z |
| test.py:3:7:3:7 | ControlFlowNode for y | test.py:3:3:3:3 | ControlFlowNode for z |
| test.py:6:1:6:1 | ControlFlowNode for a | test.py:7:19:7:19 | ControlFlowNode for a |
| test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral | test.py:6:1:6:1 | ControlFlowNode for a |
| test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() | test.py:7:1:7:1 | ControlFlowNode for b |

View File

@@ -0,0 +1,8 @@
import semmle.python.dataflow.new.DataFlow
from DataFlow::Node fromNode, DataFlow::Node toNode
where
DataFlow::localFlowStep(fromNode, toNode) and
exists(fromNode.getLocation().getFile().getRelativePath()) and
exists(toNode.getLocation().getFile().getRelativePath())
select fromNode, toNode

View File

@@ -0,0 +1,2 @@
testFailures
failures

View File

@@ -0,0 +1 @@
import experimental.dataflow.TestUtil.MaximalFlowTest

View File

@@ -0,0 +1,7 @@
| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:7:5:7:17 | ControlFlowNode for obfuscated_id |
| test.py:1:19:1:19 | ControlFlowNode for x | test.py:4:10:4:10 | ControlFlowNode for z |
| test.py:1:19:1:19 | ControlFlowNode for x | test.py:7:1:7:1 | ControlFlowNode for b |
| test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral | test.py:4:10:4:10 | ControlFlowNode for z |
| test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral | test.py:7:1:7:1 | ControlFlowNode for b |
| test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral | test.py:7:19:7:19 | ControlFlowNode for a |
| test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() | test.py:7:1:7:1 | ControlFlowNode for b |

View File

@@ -0,0 +1,9 @@
import maximalFlowsConfig
from DataFlow::Node source, DataFlow::Node sink
where
source != sink and
MaximalFlowsFlow::flow(source, sink) and
exists(source.getLocation().getFile().getRelativePath()) and
exists(sink.getLocation().getFile().getRelativePath())
select source, sink

View File

@@ -0,0 +1,23 @@
import python
import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.internal.DataFlowPrivate as DataFlowPrivate
/**
* A configuration to find all "maximal" flows.
* To be used on small programs.
*/
module MaximalFlowsConfig implements DataFlow::ConfigSig {
predicate isSource(DataFlow::Node node) {
node instanceof DataFlow::ParameterNode
or
node instanceof DataFlow::LocalSourceNode
}
predicate isSink(DataFlow::Node node) {
node instanceof DataFlowPrivate::ReturnNode
or
not DataFlowPrivate::LocalFlow::localFlowStep(node, _)
}
}
module MaximalFlowsFlow = DataFlow::Global<MaximalFlowsConfig>;

View File

@@ -0,0 +1,18 @@
| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr |
| test.py:1:1:1:21 | SynthDictSplatParameterNode |
| test.py:1:5:1:17 | ControlFlowNode for obfuscated_id |
| test.py:1:19:1:19 | ControlFlowNode for x |
| test.py:2:3:2:3 | ControlFlowNode for y |
| test.py:2:7:2:7 | ControlFlowNode for x |
| test.py:3:3:3:3 | ControlFlowNode for z |
| test.py:3:7:3:7 | ControlFlowNode for y |
| test.py:4:10:4:10 | ControlFlowNode for z |
| test.py:6:1:6:1 | ControlFlowNode for a |
| test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral |
| test.py:7:1:7:1 | ControlFlowNode for b |
| test.py:7:5:7:17 | ControlFlowNode for obfuscated_id |
| test.py:7:5:7:17 | [post] ControlFlowNode for obfuscated_id |
| test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() |
| test.py:7:5:7:20 | [pre] ControlFlowNode for obfuscated_id() |
| test.py:7:19:7:19 | ControlFlowNode for a |
| test.py:7:19:7:19 | [post] ControlFlowNode for a |

View File

@@ -0,0 +1,7 @@
import allFlowsConfig
from DataFlow::Node sink
where
AllFlowsConfig::isSink(sink) and
exists(sink.getLocation().getFile().getRelativePath())
select sink

View File

@@ -0,0 +1,18 @@
| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr |
| test.py:1:1:1:21 | SynthDictSplatParameterNode |
| test.py:1:5:1:17 | ControlFlowNode for obfuscated_id |
| test.py:1:19:1:19 | ControlFlowNode for x |
| test.py:2:3:2:3 | ControlFlowNode for y |
| test.py:2:7:2:7 | ControlFlowNode for x |
| test.py:3:3:3:3 | ControlFlowNode for z |
| test.py:3:7:3:7 | ControlFlowNode for y |
| test.py:4:10:4:10 | ControlFlowNode for z |
| test.py:6:1:6:1 | ControlFlowNode for a |
| test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral |
| test.py:7:1:7:1 | ControlFlowNode for b |
| test.py:7:5:7:17 | ControlFlowNode for obfuscated_id |
| test.py:7:5:7:17 | [post] ControlFlowNode for obfuscated_id |
| test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() |
| test.py:7:5:7:20 | [pre] ControlFlowNode for obfuscated_id() |
| test.py:7:19:7:19 | ControlFlowNode for a |
| test.py:7:19:7:19 | [post] ControlFlowNode for a |

View File

@@ -0,0 +1,7 @@
import allFlowsConfig
from DataFlow::Node source
where
AllFlowsConfig::isSource(source) and
exists(source.getLocation().getFile().getRelativePath())
select source

View File

@@ -0,0 +1,7 @@
def obfuscated_id(x): #$ step="FunctionExpr -> obfuscated_id"
y = x #$ step="x -> y" step="x, l:-1 -> x"
z = y #$ step="y -> z" step="y, l:-1 -> y"
return z #$ flow="42, l:+2 -> z" step="z, l:-1 -> z"
a = 42 #$ step="42 -> a"
b = obfuscated_id(a) #$ flow="42, l:-1 -> b" flow="FunctionExpr, l:-6 -> obfuscated_id" step="obfuscated_id(..) -> b" step="obfuscated_id, l:-6 -> obfuscated_id" step="a, l:-1 -> a"

View File

@@ -0,0 +1,22 @@
private import python
import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.internal.DataFlowPrivate as DataFlowPrivate
/**
* A configuration to find the call graph edges.
*/
module CallGraphConfig implements DataFlow::ConfigSig {
predicate isSource(DataFlow::Node node) {
node instanceof DataFlowPrivate::ReturnNode
or
node instanceof DataFlow::ArgumentNode
}
predicate isSink(DataFlow::Node node) {
node instanceof DataFlowPrivate::OutNode
or
node instanceof DataFlow::ParameterNode
}
}
module CallGraphFlow = DataFlow::Global<CallGraphConfig>;

View File

@@ -0,0 +1,13 @@
| test.py:32:8:32:23 | CrosstalkTestX() | test.py:9:5:9:23 | Function __init__ | test.py:32:8:32:23 | [pre] ControlFlowNode for CrosstalkTestX() | self |
| test.py:33:8:33:23 | CrosstalkTestY() | test.py:21:5:21:23 | Function __init__ | test.py:33:8:33:23 | [pre] ControlFlowNode for CrosstalkTestY() | self |
| test.py:43:1:43:8 | func() | test.py:13:5:13:26 | Function setx | test.py:36:12:36:15 | ControlFlowNode for objx | self |
| test.py:43:1:43:8 | func() | test.py:13:5:13:26 | Function setx | test.py:43:6:43:7 | ControlFlowNode for IntegerLiteral | position 0 |
| test.py:43:1:43:8 | func() | test.py:25:5:25:26 | Function sety | test.py:38:12:38:15 | ControlFlowNode for objy | self |
| test.py:43:1:43:8 | func() | test.py:25:5:25:26 | Function sety | test.py:43:6:43:7 | ControlFlowNode for IntegerLiteral | position 0 |
| test.py:51:1:51:8 | func() | test.py:16:5:16:30 | Function setvalue | test.py:47:12:47:15 | ControlFlowNode for objx | self |
| test.py:51:1:51:8 | func() | test.py:16:5:16:30 | Function setvalue | test.py:51:6:51:7 | ControlFlowNode for IntegerLiteral | position 0 |
| test.py:51:1:51:8 | func() | test.py:28:5:28:30 | Function setvalue | test.py:49:12:49:15 | ControlFlowNode for objy | self |
| test.py:51:1:51:8 | func() | test.py:28:5:28:30 | Function setvalue | test.py:51:6:51:7 | ControlFlowNode for IntegerLiteral | position 0 |
| test.py:70:1:70:8 | func() | test.py:58:5:58:33 | Function foo | test.py:63:12:63:12 | ControlFlowNode for a | self |
| test.py:70:1:70:8 | func() | test.py:58:5:58:33 | Function foo | test.py:70:6:70:7 | ControlFlowNode for IntegerLiteral | position 0 |
| test.py:70:1:70:8 | func() | test.py:58:5:58:33 | Function foo | test.py:70:6:70:7 | ControlFlowNode for IntegerLiteral | self |

View File

@@ -0,0 +1,9 @@
private import python
private import semmle.python.dataflow.new.internal.DataFlowPrivate
private import semmle.python.dataflow.new.internal.DataFlowPublic
from DataFlowCall call, DataFlowCallable callable, ArgumentNode arg, ArgumentPosition apos
where
callable = call.getCallable() and
arg = call.getArgument(apos)
select call, callable, arg, apos

View File

@@ -0,0 +1 @@
semmle-extractor-options: --max-import-depth=0

View File

@@ -0,0 +1,70 @@
import random
cond = random.randint(0,1) == 1
# ------------------------------------------------------------------------------
# Calling different bound-methods based on conditional
# ------------------------------------------------------------------------------
class CrosstalkTestX:
def __init__(self):
self.x = None
self.y = None
def setx(self, value):
self.x = value
def setvalue(self, value):
self.x = value
class CrosstalkTestY:
def __init__(self):
self.x = None
self.y = None
def sety(self ,value):
self.y = value
def setvalue(self, value):
self.y = value
objx = CrosstalkTestX()
objy = CrosstalkTestY()
if cond:
func = objx.setx
else:
func = objy.sety
# What we're testing for is whether both objects are passed as self to both methods,
# which is wrong.
func(42)
if cond:
func = objx.setvalue
else:
func = objy.setvalue
func(43)
# ------------------------------------------------------------------------------
# Calling methods in different ways
# ------------------------------------------------------------------------------
class A(object):
def foo(self, arg="Default"):
print("A.foo", self, arg)
a = A()
if cond:
func = a.foo # `44` is passed as arg
else:
func = A.foo # `44` is passed as self
# What we're testing for is whether a single call ends up having both `a` and `44` is
# passed as self to `A.foo`, which is wrong.
func(44)

View File

@@ -0,0 +1,2 @@
testFailures
failures

View File

@@ -0,0 +1,37 @@
import python
import semmle.python.dataflow.new.DataFlow
import semmle.python.dataflow.new.internal.DataFlowDispatch as DataFlowDispatch
import TestUtilities.InlineExpectationsTest
private import semmle.python.dataflow.new.internal.PrintNode
module DataFlowCallTest implements TestSig {
string getARelevantTag() {
result in ["call", "callType"]
or
result = "arg[" + any(DataFlowDispatch::ArgumentPosition pos).toString() + "]"
}
predicate hasActualResult(Location location, string element, string tag, string value) {
exists(location.getFile().getRelativePath()) and
exists(DataFlowDispatch::DataFlowCall call |
location = call.getLocation() and
element = call.toString() and
exists(call.getCallable())
|
value = prettyExpr(call.getNode().getNode()) and
tag = "call"
or
value = call.(DataFlowDispatch::NormalCall).getCallType().toString() and
tag = "callType"
or
exists(DataFlowDispatch::ArgumentPosition pos, DataFlow::Node arg |
arg = call.getArgument(pos)
|
value = prettyNodeForInlineTest(arg) and
tag = "arg[" + pos + "]"
)
)
}
}
import MakeTest<DataFlowCallTest>

View File

@@ -0,0 +1,16 @@
# We want to ensure that the __new__ method is considered a classmethod even though it
# doesn't have a decorator. This means that the `cls` parameter should be considered a
# reference to the class (or subclass), and not an instance of the class. We can detect
# this from looking at the arguments passed in the `cls.foo` call. if we see a `self`
# argument, this means it has correct behavior (because we're targeting a classmethod),
# if there is no `self` argument, this means we've only considered `cls` to be a class
# instance, since we don't want to pass that to the `cls` parameter of the classmethod `WithNewImpl.foo`.
class WithNewImpl(object):
def __new__(cls):
print("WithNewImpl.foo")
cls.foo() # $ call=cls.foo() callType=CallTypeClassMethod arg[self]=cls
@classmethod
def foo(cls):
print("WithNewImpl.foo")

View File

@@ -0,0 +1,82 @@
# A very basic test of DataFlowCall
#
# see `coverage/argumentRoutingTest.ql` for a more in depth test of argument routing
# handling.
def func(arg):
pass
class MyClass(object):
def __init__(self, arg):
pass
def my_method(self, arg):
pass
def other_method(self):
self.my_method(42) # $ arg[self]=self call=self.my_method(..) callType=CallTypeNormalMethod arg[position 0]=42
self.sm(42) # $ call=self.sm(..) callType=CallTypeStaticMethod arg[position 0]=42
@staticmethod
def sm(arg):
pass
@classmethod
def cm(cls, arg):
pass
@classmethod
def other_classmethod(cls):
cls.cm(42) # $ call=cls.cm(..) callType=CallTypeClassMethod arg[position 0]=42 arg[self]=cls
cls.sm(42) # $ call=cls.sm(..) callType=CallTypeStaticMethod arg[position 0]=42
def __getitem__(self, key):
pass
func(0) # $ call=func(..) arg[position 0]=0 callType=CallTypePlainFunction
x = MyClass(1) # $ call=MyClass(..) arg[self]=[pre]MyClass(..) arg[position 0]=1 callType=CallTypeClass
x.my_method(2) # $ call=x.my_method(..) arg[self]=x arg[position 0]=2 callType=CallTypeNormalMethod
mm = x.my_method
mm(2) # $ call=mm(..) arg[self]=x arg[position 0]=2 callType=CallTypeNormalMethod
MyClass.my_method(x, 2) # $ call=MyClass.my_method(..) arg[position 0]=2 arg[self]=x callType=CallTypeMethodAsPlainFunction
x.sm(3) # $ call=x.sm(..) arg[position 0]=3 callType=CallTypeStaticMethod
MyClass.sm(3) # $ call=MyClass.sm(..) arg[position 0]=3 callType=CallTypeStaticMethod
x.cm(4) # $ call=x.cm(..) arg[position 0]=4 callType=CallTypeClassMethod
MyClass.cm(4) # $ call=MyClass.cm(..) arg[position 0]=4 arg[self]=MyClass callType=CallTypeClassMethod
x[5] # $ MISSING: call=x[5] arg[self]=x arg[position 0]=5
class Subclass(MyClass):
pass
y = Subclass(1) # $ call=Subclass(..) arg[self]=[pre]Subclass(..) arg[position 0]=1 callType=CallTypeClass
y.my_method(2) # $ call=y.my_method(..) arg[self]=y arg[position 0]=2 callType=CallTypeNormalMethod
mm = y.my_method
mm(2) # $ call=mm(..) arg[self]=y arg[position 0]=2 callType=CallTypeNormalMethod
Subclass.my_method(y, 2) # $ call=Subclass.my_method(..) arg[self]=y arg[position 0]=2 callType=CallTypeMethodAsPlainFunction
y.sm(3) # $ call=y.sm(..) arg[position 0]=3 callType=CallTypeStaticMethod
Subclass.sm(3) # $ call=Subclass.sm(..) arg[position 0]=3 callType=CallTypeStaticMethod
y.cm(4) # $ call=y.cm(..) arg[position 0]=4 callType=CallTypeClassMethod
Subclass.cm(4) # $ call=Subclass.cm(..) arg[self]=Subclass arg[position 0]=4 callType=CallTypeClassMethod
y[5] # $ MISSING: call=y[5] arg[self]=y arg[position 0]=5
try:
# These are included to show whether we have a DataFlowCall for things we can't
# resolve. Both are interesting since with points-to we used to have a DataFlowCall
# for _one_ but not the other
import mypkg
mypkg.foo(42)
mypkg.subpkg.bar(43)
except:
pass

View File

@@ -0,0 +1,25 @@
# Originally we had module and functions as `DataFlowCallable``, and any call inside a
# class scope would not have a result for getEnclosingCallable. Since this was only a
# consistency error for calls, originally we added a new `DataFlowClassScope` only for
# those classes that had a call in their scope. That's why all the class definitions in
# this test do a call to the dummy function `func`.
#
# Note: this was shortsighted, since most DataFlow::Node use `getCallableScope` helper
# to define their .getEnclosingCallable(), which picks the first DataFlowCallable to
# contain the node. (so for some classes that would be DataFlowClassScope, and for some
# it would be the module/function containing the class definition)
def func(*args, **kwargs):
print("func()")
class Cls:
func()
class Inner:
func()
def other_func():
class Cls2:
func()
return Cls2
x = other_func()

View File

@@ -0,0 +1,7 @@
import python
import semmle.python.dataflow.new.DataFlow
query predicate parameterWithoutNode(Parameter p, string msg) {
not exists(DataFlow::ParameterNode node | p = node.getParameter()) and
msg = "There is no `ParameterNode` associated with this parameter."
}

View File

@@ -0,0 +1,10 @@
dangerous = SOURCE
safe = "safe"
def dangerous_func():
return SOURCE
safe2 = SOURCE
safe2 = "safe"

View File

@@ -0,0 +1,258 @@
# This is currently a copy of the integration tests.
# It should contain many syntactic constructs, so should
# perhaps be taken from coverage once that is done.
# (We might even put the consistency check in there.)
def test1():
SINK(SOURCE)
def test2():
s = SOURCE
SINK(s)
def source():
return SOURCE
def sink(arg):
SINK(arg)
def test3():
t = source()
SINK(t)
def test4():
t = SOURCE
sink(t)
def test5():
t = source()
sink(t)
def test6(cond):
if cond:
t = "Safe"
else:
t = SOURCE
if cond:
SINK(t)
def test7(cond):
if cond:
t = SOURCE
else:
t = "Safe"
if cond:
SINK(t)
def source2(arg):
return source(arg)
def sink2(arg):
sink(arg)
def sink3(cond, arg):
if cond:
sink(arg)
def test8(cond):
t = source2()
sink2(t)
#False positive
def test9(cond):
if cond:
t = "Safe"
else:
t = SOURCE
sink3(cond, t)
def test10(cond):
if cond:
t = SOURCE
else:
t = "Safe"
sink3(cond, t)
def hub(arg):
return arg
def test11():
t = SOURCE
t = hub(t)
SINK(t)
def test12():
t = "safe"
t = hub(t)
SINK(t)
import module
def test13():
t = module.dangerous
SINK(t)
def test14():
t = module.safe
SINK(t)
def test15():
t = module.safe2
SINK(t)
def test16():
t = module.dangerous_func()
SINK(t)
class C(object): pass
def x_sink(arg):
SINK(arg.x)
def test17():
t = C()
t.x = module.dangerous
SINK(t.x)
def test18():
t = C()
t.x = module.dangerous
t = hub(t)
x_sink(t)
def test19():
t = CUSTOM_SOURCE
t = hub(TAINT_FROM_ARG(t))
CUSTOM_SINK(t)
def test20(cond):
if cond:
t = CUSTOM_SOURCE
else:
t = SOURCE
if cond:
CUSTOM_SINK(t)
else:
SINK(t)
def test21(cond):
if cond:
t = CUSTOM_SOURCE
else:
t = SOURCE
if not cond:
CUSTOM_SINK(t)
else:
SINK(t)
def test22(cond):
if cond:
t = CUSTOM_SOURCE
else:
t = SOURCE
t = TAINT_FROM_ARG(t)
if cond:
CUSTOM_SINK(t)
else:
SINK(t)
from module import dangerous as unsafe
SINK(unsafe)
def test23():
with SOURCE as t:
SINK(t)
def test24():
s = SOURCE
SANITIZE(s)
SINK(s)
def test_update_extend(x, y):
l = [SOURCE]
d = {"key" : SOURCE}
x.extend(l)
y.update(d)
SINK(x[0])
SINK(y["key"])
l2 = list(l)
d2 = dict(d)
def test_truth():
t = SOURCE
if t:
SINK(t)
else:
SINK(t)
if not t:
SINK(t)
else:
SINK(t)
def test_early_exit():
t = FALSEY
if not t:
return
t
def flow_through_type_test_if_no_class():
t = SOURCE
if isinstance(t, str):
SINK(t)
else:
SINK(t)
def flow_in_iteration():
t = ITERABLE_SOURCE
for i in t:
i
return i
def flow_in_generator():
seq = [SOURCE]
for i in seq:
yield i
def flow_from_generator():
for x in flow_in_generator():
SINK(x)
def const_eq_clears_taint():
tainted = SOURCE
if tainted == "safe":
SINK(tainted) # safe
SINK(tainted) # unsafe
def const_eq_clears_taint2():
tainted = SOURCE
if tainted != "safe":
return
SINK(tainted) # safe
def non_const_eq_preserves_taint(x):
tainted = SOURCE
if tainted == tainted:
SINK(tainted) # unsafe
if tainted == x:
SINK(tainted) # unsafe
def overflowCallee(*args, p="", **kwargs):
print("args", args)
print("p", p)
print("kwargs", kwargs)
def synth_arg_posOverflow():
overflowCallee(42)
def synth_arg_kwOverflow():
overflowCallee(foo=42)
def synth_arg_kwUnpacked():
overflowCallee(**{"p": "42"})
def split_lambda(cond):
if cond:
pass
foo = lambda x: False
if cond:
pass

View File

@@ -0,0 +1,2 @@
testFailures
failures

View File

@@ -0,0 +1 @@
../coverage/argumentRoutingTest.ql

View File

@@ -0,0 +1,54 @@
# Python 2 specific tests, like the one in coverage/classes.py
#
# User-defined methods, both instance methods and class methods, can be called in many non-standard ways
# i.e. differently from simply `c.f()` or `C.f()`. For example, a user-defined `__await__` method on a
# class `C` will be called by the syntactic construct `await c` when `c` is an instance of `C`.
#
# These tests should cover all the class calls that we hope to support.
# It is based on https://docs.python.org/3/reference/datamodel.html, and headings refer there.
#
# All functions starting with "test_" should run and execute `print("OK")` exactly once.
# This can be checked by running validTest.py.
import sys
import os
sys.path.append(os.path.dirname(os.path.dirname((__file__))))
from testlib import expects
def SINK1(x):
pass
def SINK2(x):
pass
def SINK3(x):
pass
def SINK4(x):
pass
def OK():
print("OK")
# 3.3.8. Emulating numeric types
# object.__index__(self)
class With_index:
def __index__(self):
SINK1(self)
OK() # Call not found
return 0
def test_index():
import operator
with_index = With_index() #$ MISSING: arg1="SSA variable with_index" func=With_index.__index__
operator.index(with_index)

View File

@@ -0,0 +1 @@
semmle-extractor-options: --max-import-depth=1 --lang=2

View File

@@ -0,0 +1,2 @@
testFailures
failures

View File

@@ -0,0 +1 @@
../coverage/argumentRoutingTest.ql

View File

@@ -0,0 +1,72 @@
# Python 3 specific tests, like the one in coverage/classes.py
#
# User-defined methods, both instance methods and class methods, can be called in many non-standard ways
# i.e. differently from simply `c.f()` or `C.f()`. For example, a user-defined `__await__` method on a
# class `C` will be called by the syntactic construct `await c` when `c` is an instance of `C`.
#
# These tests should cover all the class calls that we hope to support.
# It is based on https://docs.python.org/3/reference/datamodel.html, and headings refer there.
#
# All functions starting with "test_" should run and execute `print("OK")` exactly once.
# This can be checked by running validTest.py.
import sys
import os
sys.path.append(os.path.dirname(os.path.dirname((__file__))))
from testlib import expects
def SINK1(x):
pass
def SINK2(x):
pass
def SINK3(x):
pass
def SINK4(x):
pass
def OK():
print("OK")
# 3.3.7. Emulating container types
# object.__length_hint__(self)
class With_length_hint:
def __length_hint__(self):
SINK1(self)
OK()
return 0
def test_length_hint():
import operator
with_length_hint = With_length_hint() #$ arg1="with_length_hint" func=With_length_hint.__length_hint__
operator.length_hint(with_length_hint)
# 3.3.8. Emulating numeric types
# object.__index__(self)
class With_index:
def __index__(self):
SINK1(self)
OK() # Call not found
return 0
def test_index():
import operator
with_index = With_index() #$ arg1="with_index" func=With_index.__index__
operator.index(with_index)

View File

@@ -0,0 +1 @@
semmle-extractor-options: --max-import-depth=1 --lang=3

View File

@@ -0,0 +1,3 @@
missingAnnotationOnSink
testFailures
failures

View File

@@ -0,0 +1,2 @@
import python
import experimental.dataflow.TestUtil.NormalDataflowTest

View File

@@ -0,0 +1,337 @@
import sys
import os
import functools
sys.path.append(os.path.dirname(os.path.dirname((__file__))))
from testlib import expects
SOURCE = "source"
arg = "source"
arg1 = "source1"
arg2 = "source2"
arg3 = "source3"
arg4 = "source4"
arg5 = "source5"
arg6 = "source6"
arg7 = "source7"
def SINK_TEST(x, test):
if test(x):
print("OK")
else:
print("Unexpected flow", x)
def SINK(x, expected=arg):
SINK_TEST(x, test=lambda x: x == expected)
def SINK_F(x, unexpected=arg):
SINK_TEST(x, test=lambda x: x != unexpected)
SINK1 = functools.partial(SINK, expected=arg1)
SINK2 = functools.partial(SINK, expected=arg2)
SINK3 = functools.partial(SINK, expected=arg3)
SINK4 = functools.partial(SINK, expected=arg4)
SINK5 = functools.partial(SINK, expected=arg5)
SINK6 = functools.partial(SINK, expected=arg6)
SINK7 = functools.partial(SINK, expected=arg7)
SINK1_F = functools.partial(SINK_F, unexpected=arg1)
SINK2_F = functools.partial(SINK_F, unexpected=arg2)
SINK3_F = functools.partial(SINK_F, unexpected=arg3)
SINK4_F = functools.partial(SINK_F, unexpected=arg4)
SINK5_F = functools.partial(SINK_F, unexpected=arg5)
SINK6_F = functools.partial(SINK_F, unexpected=arg6)
SINK7_F = functools.partial(SINK_F, unexpected=arg7)
def argument_passing(
a,
b,
/,
c,
d=arg4, #$ arg4 func=argument_passing
*,
e=arg5, #$ arg5 func=argument_passing
f,
**g,
):
SINK1(a)
SINK2(b)
SINK3(c)
SINK4(d)
SINK5(e)
SINK6(f)
try:
SINK7(g["g"])
except:
print("OK")
@expects(7)
def test_argument_passing1():
argument_passing(arg1, *(arg2, arg3, arg4), e=arg5, **{"f": arg6, "g": arg7}) #$ arg1 arg5 arg6 arg7 func=argument_passing MISSING: arg2 arg3 arg4
@expects(7)
def test_argument_passing2():
argument_passing(arg1, arg2, arg3, f=arg6) #$ arg1 arg2 arg3 arg6
def with_pos_only(a, /, b):
SINK1(a)
SINK2(b)
@expects(6)
def test_pos_only():
with_pos_only(arg1, arg2) #$ arg1 arg2
with_pos_only(arg1, b=arg2) #$ arg1 arg2
with_pos_only(arg1, *(arg2,)) #$ arg1 MISSING: arg2
def with_multiple_kw_args(a, b, c):
SINK1(a)
SINK2(b)
SINK3(c)
@expects(12)
def test_multiple_kw_args():
with_multiple_kw_args(b=arg2, c=arg3, a=arg1) #$ arg1 arg2 arg3
with_multiple_kw_args(arg1, *(arg2,), arg3) #$ arg1 MISSING: arg2 arg3
with_multiple_kw_args(arg1, **{"c": arg3}, b=arg2) #$ arg1 arg2 arg3 func=with_multiple_kw_args
with_multiple_kw_args(**{"b": arg2}, **{"c": arg3}, **{"a": arg1}) #$ arg1 arg2 arg3 func=with_multiple_kw_args
def with_default_arguments(a=arg1, b=arg2, c=arg3): #$ arg1 arg2 arg3 func=with_default_arguments
SINK1(a)
SINK2(b)
SINK3(c)
@expects(12)
def test_default_arguments():
with_default_arguments()
with_default_arguments(arg1) #$ arg1
with_default_arguments(b=arg2) #$ arg2
with_default_arguments(**{"c": arg3}) #$ arg3 func=with_default_arguments
# All combinations
def test_pos_pos():
def with_pos(a):
SINK1(a)
with_pos(arg1) #$ arg1 func=test_pos_pos.with_pos
def test_pos_pos_only():
def with_pos_only(a, /):
SINK1(a)
with_pos_only(arg1) #$ arg1 func=test_pos_pos_only.with_pos_only
def test_pos_star():
def with_star(*a):
if len(a) > 0:
SINK1(a[0])
with_star(arg1) #$ arg1 func=test_pos_star.with_star
def test_pos_kw():
def with_kw(a=""):
SINK1(a)
with_kw(arg1) #$ arg1 func=test_pos_kw.with_kw
def test_kw_pos():
def with_pos(a):
SINK1(a)
with_pos(a=arg1) #$ arg1 func=test_kw_pos.with_pos
def test_kw_kw():
def with_kw(a=""):
SINK1(a)
with_kw(a=arg1) #$ arg1 func=test_kw_kw.with_kw
def test_kw_doublestar():
def with_doublestar(**kwargs):
SINK1(kwargs["a"])
with_doublestar(a=arg1) #$ arg1 func=test_kw_doublestar.with_doublestar
def only_kwargs(**kwargs):
SINK1(kwargs["a"])
SINK2(kwargs["b"])
# testing precise content tracking, that content from `a` or `b` does not end up here.
SINK3_F(kwargs["c"])
@expects(3)
def test_kwargs():
args = {"a": arg1, "b": arg2, "c": "safe"} # $ arg1 arg2 func=only_kwargs
only_kwargs(**args)
def mixed(a, **kwargs):
SINK1(a)
try:
SINK1_F(kwargs["a"]) # since 'a' is a keyword argument, it cannot be part of **kwargs
except KeyError:
print("OK")
SINK2(kwargs["b"])
# testing precise content tracking, that content from `a` or `b` does not end up here.
SINK3_F(kwargs["c"])
@expects(4*3)
def test_mixed():
mixed(a=arg1, b=arg2, c="safe") # $ arg1 arg2
args = {"b": arg2, "c": "safe"} # $ arg2 func=mixed
mixed(a=arg1, **args) # $ arg1
args = {"a": arg1, "b": arg2, "c": "safe"} # $ arg1 arg2 func=mixed
mixed(**args)
def kwargs_same_name_as_positional_only(a, /, **kwargs):
SINK1(a)
SINK2(kwargs["a"])
@expects(2*2)
def test_kwargs_same_name_as_positional_only():
kwargs_same_name_as_positional_only(arg1, a=arg2) # $ arg1 SPURIOUS: bad1="arg2" MISSING: arg2
kwargs = {"a": arg2} # $ func=kwargs_same_name_as_positional_only SPURIOUS: bad1="arg2" MISSING: arg2
kwargs_same_name_as_positional_only(arg1, **kwargs) # $ arg1
def starargs_only(*args):
SINK1(args[0])
SINK2(args[1])
SINK3_F(args[2])
@expects(5*3)
def test_only_starargs():
starargs_only(arg1, arg2, "safe") # $ arg1 arg2 SPURIOUS: bad2,bad3="arg1" bad1,bad3="arg2"
args = (arg2, "safe") # $ MISSING: arg2
starargs_only(arg1, *args) # $ arg1 SPURIOUS: bad2,bad3="arg1"
args = (arg1, arg2, "safe") # $ arg1 arg2 func=starargs_only
starargs_only(*args)
empty_args = ()
args = (arg1, arg2, "safe") # $ arg1 arg2 func=starargs_only
starargs_only(*args, *empty_args)
args = (arg1, arg2, "safe") # $ MISSING: arg1 arg2 func=starargs_only
starargs_only(*empty_args, *args)
def starargs_mixed(a, *args):
SINK1(a)
SINK2(args[0])
SINK3_F(args[1])
@expects(3*8)
def test_stararg_mixed():
starargs_mixed(arg1, arg2, "safe") # $ arg1 arg2 SPURIOUS: bad3="arg2"
args = (arg2, "safe") # $ arg2 func=starargs_mixed
starargs_mixed(arg1, *args) # $ arg1
args = (arg1, arg2, "safe")
starargs_mixed(*args) # $ MISSING: arg1 arg2
args = (arg1, arg2, "safe")
more_args = ("foo", "bar")
starargs_mixed(*args, *more_args) # $ MISSING: arg1 arg2
empty_args = ()
# adding first/last
starargs_mixed(arg1, arg2, "safe", *empty_args) # $ arg1 arg2 SPURIOUS: bad3="arg2"
starargs_mixed(*empty_args, arg1, arg2, "safe") # $ MISSING: arg1 arg2
# adding before/after *args
args = (arg2, "safe") # $ arg2 func=starargs_mixed
starargs_mixed(arg1, *args, *empty_args) # $ arg1
args = (arg2, "safe")
starargs_mixed(arg1, *empty_args, *args) # $ arg1 MISSING: arg2
# ------------------------------------------------------------------------------
# Test updating field of argument
# ------------------------------------------------------------------------------
class MyClass: pass
def kwargsSideEffect(**kwargs):
kwargs["a"].foo = kwargs["b"]
@expects(2)
def test_kwargsSideEffect():
a = MyClass()
kwargs = {"a": a, "b": SOURCE}
kwargsSideEffect(**kwargs)
SINK(a.foo) # $ MISSING: flow
a = MyClass()
kwargsSideEffect(a=a, b=SOURCE)
SINK(a.foo) # $ MISSING: flow
def keywordArgSideEffect(a, b):
a.foo = b
@expects(2)
def test_keywordArgSideEffect():
a = MyClass()
kwargs = {"a": a, "b": SOURCE}
keywordArgSideEffect(**kwargs)
SINK(a.foo) # $ MISSING: flow
a = MyClass()
keywordArgSideEffect(a=a, b=SOURCE)
SINK(a.foo) # $ flow="SOURCE, l:-1 -> a.foo"
def starargsSideEffect(*args):
args[0].foo = args[1]
@expects(2)
def test_starargsSideEffect():
a = MyClass()
args = (a, SOURCE)
starargsSideEffect(*args)
SINK(a.foo) # $ MISSING: flow
a = MyClass()
starargsSideEffect(a, SOURCE)
SINK(a.foo) # $ MISSING: flow
def positionalArgSideEffect(a, b):
a.foo = b
@expects(2)
def test_positionalArgSideEffect():
a = MyClass()
args = (a, SOURCE)
positionalArgSideEffect(*args)
SINK(a.foo) # $ MISSING: flow
a = MyClass()
positionalArgSideEffect(a, SOURCE)
SINK(a.foo) # $ flow="SOURCE, l:-1 -> a.foo"

View File

@@ -0,0 +1,63 @@
import sys
import os
import functools
sys.path.append(os.path.dirname(os.path.dirname((__file__))))
from testlib import expects
arg = "source"
arg1 = "source1"
arg2 = "source2"
arg3 = "source3"
arg4 = "source4"
arg5 = "source5"
arg6 = "source6"
arg7 = "source7"
def SINK_TEST(x, test):
if test(x):
print("OK")
else:
print("Unexpected flow", x)
def SINK(x, expected=arg):
SINK_TEST(x, test=lambda x: x == expected)
def SINK_F(x, unexpected=arg):
SINK_TEST(x, test=lambda x: x != unexpected)
SINK1 = functools.partial(SINK, expected=arg1)
SINK2 = functools.partial(SINK, expected=arg2)
SINK3 = functools.partial(SINK, expected=arg3)
SINK4 = functools.partial(SINK, expected=arg4)
SINK5 = functools.partial(SINK, expected=arg5)
SINK6 = functools.partial(SINK, expected=arg6)
SINK7 = functools.partial(SINK, expected=arg7)
SINK1_F = functools.partial(SINK_F, unexpected=arg1)
SINK2_F = functools.partial(SINK_F, unexpected=arg2)
SINK3_F = functools.partial(SINK_F, unexpected=arg3)
SINK4_F = functools.partial(SINK_F, unexpected=arg4)
SINK5_F = functools.partial(SINK_F, unexpected=arg5)
SINK6_F = functools.partial(SINK_F, unexpected=arg6)
SINK7_F = functools.partial(SINK_F, unexpected=arg7)
def bad_argument_flow_func(arg):
SINK1_F(arg)
def bad_argument_flow_func2(arg):
SINK2(arg)
def test_bad_argument_flow():
# this is just a test to show that the testing setup works
# in the first one, we pretend we expected no flow for arg1
bad_argument_flow_func(arg1) # $ bad1="arg1"
# in the second one, we pretend we wanted flow for arg2 instead
bad_argument_flow_func2(arg1) # $ bad2="arg1"

View File

@@ -0,0 +1,2 @@
testFailures
failures

View File

@@ -0,0 +1,133 @@
import python
import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.internal.DataFlowPrivate as DataFlowPrivate
import experimental.dataflow.TestUtil.RoutingTest
module Argument1RoutingTest implements RoutingTestSig {
class Argument = Unit;
string flowTag(Argument arg) { result = "arg1" and exists(arg) }
predicate relevantFlow(DataFlow::Node source, DataFlow::Node sink, Argument arg) {
(
Argument1ExtraRoutingFlow::flow(source, sink)
or
ArgumentRoutingFlow::flow(source, sink) and
ArgumentRoutingConfig::isArgSource(source, 1) and
ArgumentRoutingConfig::isGoodSink(sink, 1)
) and
exists(arg)
}
}
class ArgNumber extends int {
ArgNumber() { this in [1 .. 7] }
}
module ArgumentRoutingConfig implements DataFlow::ConfigSig {
additional predicate isArgSource(DataFlow::Node node, ArgNumber argNumber) {
node.(DataFlow::CfgNode).getNode().(NameNode).getId() = "arg" + argNumber
}
predicate isSource(DataFlow::Node node) { isArgSource(node, _) }
additional predicate isGoodSink(DataFlow::Node node, ArgNumber argNumber) {
exists(CallNode call |
call.getFunction().(NameNode).getId() = "SINK" + argNumber and
node.(DataFlow::CfgNode).getNode() = call.getAnArg()
)
}
additional predicate isBadSink(DataFlow::Node node, ArgNumber argNumber) {
exists(CallNode call |
call.getFunction().(NameNode).getId() = "SINK" + argNumber + "_F" and
node.(DataFlow::CfgNode).getNode() = call.getAnArg()
)
}
predicate isSink(DataFlow::Node node) { isGoodSink(node, _) or isBadSink(node, _) }
/**
* We want to be able to use `arg` in a sequence of calls such as `func(kw=arg); ... ; func(arg)`.
* Use-use flow lets the argument to the first call reach the sink inside the second call,
* making it seem like we handle all cases even if we only handle the last one.
* We make the test honest by preventing flow into source nodes.
*/
predicate isBarrierIn(DataFlow::Node node) { isSource(node) }
}
module ArgumentRoutingFlow = DataFlow::Global<ArgumentRoutingConfig>;
module Argument1ExtraRoutingConfig implements DataFlow::ConfigSig {
predicate isSource(DataFlow::Node node) {
exists(AssignmentDefinition def, DataFlow::CallCfgNode call |
def.getDefiningNode() = node.(DataFlow::CfgNode).getNode() and
def.getValue() = call.getNode() and
call.getFunction().asCfgNode().(NameNode).getId().matches("With\\_%")
) and
node.(DataFlow::CfgNode).getNode().(NameNode).getId().matches("with\\_%")
}
predicate isSink(DataFlow::Node node) {
exists(CallNode call |
call.getFunction().(NameNode).getId() = "SINK1" and
node.(DataFlow::CfgNode).getNode() = call.getAnArg()
)
}
/**
* We want to be able to use `arg` in a sequence of calls such as `func(kw=arg); ... ; func(arg)`.
* Use-use flow lets the argument to the first call reach the sink inside the second call,
* making it seem like we handle all cases even if we only handle the last one.
* We make the test honest by preventing flow into source nodes.
*/
predicate isBarrierIn(DataFlow::Node node) { isSource(node) }
}
module Argument1ExtraRoutingFlow = DataFlow::Global<Argument1ExtraRoutingConfig>;
module RestArgumentRoutingTest implements RoutingTestSig {
class Argument = ArgNumber;
string flowTag(Argument arg) { result = "arg" + arg }
predicate relevantFlow(DataFlow::Node source, DataFlow::Node sink, Argument arg) {
ArgumentRoutingFlow::flow(source, sink) and
ArgumentRoutingConfig::isArgSource(source, arg) and
ArgumentRoutingConfig::isGoodSink(sink, arg) and
arg > 1
}
}
/** Bad flow from `arg<n>` to `SINK<N>_F` */
module BadArgumentRoutingTestSinkF implements RoutingTestSig {
class Argument = ArgNumber;
string flowTag(Argument arg) { result = "bad" + arg }
predicate relevantFlow(DataFlow::Node source, DataFlow::Node sink, Argument arg) {
ArgumentRoutingFlow::flow(source, sink) and
ArgumentRoutingConfig::isArgSource(source, arg) and
ArgumentRoutingConfig::isBadSink(sink, arg)
}
}
/** Bad flow from `arg<n>` to `SINK<M>` or `SINK<M>_F`, where `n != m`. */
module BadArgumentRoutingTestWrongSink implements RoutingTestSig {
class Argument = ArgNumber;
string flowTag(Argument arg) { result = "bad" + arg }
predicate relevantFlow(DataFlow::Node source, DataFlow::Node sink, Argument arg) {
ArgumentRoutingFlow::flow(source, sink) and
ArgumentRoutingConfig::isArgSource(source, any(ArgNumber i | not i = arg)) and
(
ArgumentRoutingConfig::isGoodSink(sink, arg)
or
ArgumentRoutingConfig::isBadSink(sink, arg)
)
}
}
import MakeTest<MergeTests4<MakeTestSig<Argument1RoutingTest>, MakeTestSig<RestArgumentRoutingTest>,
MakeTestSig<BadArgumentRoutingTestSinkF>, MakeTestSig<BadArgumentRoutingTestWrongSink>>>

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,253 @@
# User-defined methods, both instance methods and class methods, can be called in many non-standard ways
# i.e. differently from simply `c.f()` or `C.f()`. For example, a user-defined `__await__` method on a
# class `C` will be called by the syntactic construct `await c` when `c` is an instance of `C`.
#
# These tests are based on the first part of https://docs.python.org/3/reference/datamodel.html.
# A thorough covering of methods in that document is found in classes.py.
#
# Intended sources should be the variable `SOURCE` and intended sinks should be
# arguments to the function `SINK` (see python/ql/test/experimental/dataflow/testConfig.qll).
import sys
import os
import functools
sys.path.append(os.path.dirname(os.path.dirname((__file__))))
from testlib import expects
# These are defined so that we can evaluate the test code.
NONSOURCE = "not a source"
SOURCE = "source"
arg1 = "source1"
arg2 = "source2"
arg3 = "source3"
arg4 = "source4"
arg5 = "source5"
arg6 = "source6"
arg7 = "source7"
def is_source(x):
return x == "source" or x == b"source" or x == 42 or x == 42.0 or x == 42j
def SINK(x, expected=SOURCE):
if is_source(x) or x == expected:
print("OK")
else:
print("Unexpected flow", x)
def SINK_F(x):
if is_source(x):
print("Unexpected flow", x)
else:
print("OK")
SINK1 = functools.partial(SINK, expected=arg1)
SINK2 = functools.partial(SINK, expected=arg2)
SINK3 = functools.partial(SINK, expected=arg3)
SINK4 = functools.partial(SINK, expected=arg4)
SINK5 = functools.partial(SINK, expected=arg5)
SINK6 = functools.partial(SINK, expected=arg6)
SINK7 = functools.partial(SINK, expected=arg7)
# Callable types
# These are the types to which the function call operation (see section Calls) can be applied:
# User-defined functions
# A user-defined function object is created by a function definition (see section Function definitions). It should be called with an argument list containing the same number of items as the function's formal parameter list.
def f(a, b):
return a
SINK(f(SOURCE, 3)) #$ flow="SOURCE -> f(..)"
# Instance methods
# An instance method object combines a class, a class instance and any callable object (normally a user-defined function).
class C(object):
def method(self, x, y):
SINK1(x)
SINK2(y)
@classmethod
def classmethod(cls, x, y):
SINK1(x)
SINK2(y)
@staticmethod
def staticmethod(x, y):
SINK1(x)
SINK2(y)
def gen(self, x, count):
n = count
while n > 0:
yield x
n -= 1
async def coro(self, x):
return x
c = C()
@expects(6)
def test_method_call():
# When an instance method object is created by retrieving a user-defined function object from a class via one of its instances, its __self__ attribute is the instance, and the method object is said to be bound. The new methods __func__ attribute is the original function object.
func_obj = c.method.__func__
# When an instance method object is called, the underlying function (__func__) is called, inserting the class instance (__self__) in front of the argument list. For instance, when C is a class which contains a definition for a function f(), and x is an instance of C, calling x.f(1) is equivalent to calling C.f(x, 1).
c.method(arg1, arg2) # $ func=C.method arg1 arg2
C.method(c, arg1, arg2) # $ func=C.method arg1 arg2
func_obj(c, arg1, arg2) # $ MISSING: func=C.method arg1 arg2
@expects(6)
def test_classmethod_call():
# When an instance method object is created by retrieving a class method object from a class or instance, its __self__ attribute is the class itself, and its __func__ attribute is the function object underlying the class method.
c_func_obj = C.classmethod.__func__
# When an instance method object is derived from a class method object, the “class instance” stored in __self__ will actually be the class itself, so that calling either x.f(1) or C.f(1) is equivalent to calling f(C,1) where f is the underlying function.
c.classmethod(arg1, arg2) # $ func=C.classmethod arg1 arg2
C.classmethod(arg1, arg2) # $ func=C.classmethod arg1 arg2
c_func_obj(C, arg1, arg2) # $ MISSING: func=C.classmethod arg1 arg2
@expects(5)
def test_staticmethod_call():
# staticmethods does not have a __func__ attribute
try:
C.staticmethod.__func__
except AttributeError:
print("OK")
# When an instance method object is derived from a class method object, the “class instance” stored in __self__ will actually be the class itself, so that calling either x.f(1) or C.f(1) is equivalent to calling f(C,1) where f is the underlying function.
c.staticmethod(arg1, arg2) # $ func=C.staticmethod arg1 arg2
C.staticmethod(arg1, arg2) # $ func=C.staticmethod arg1 arg2
# subclass
class SC(C):
pass
sc = SC()
@expects(6)
def test_subclass_method_call():
func_obj = sc.method.__func__
sc.method(arg1, arg2) # $ func=C.method arg1 arg2
SC.method(sc, arg1, arg2) # $ func=C.method arg1 arg2
func_obj(sc, arg1, arg2) # $ MISSING: func=C.method arg1 arg2
@expects(6)
def test_subclass_classmethod_call():
c_func_obj = SC.classmethod.__func__
sc.classmethod(arg1, arg2) # $ func=C.classmethod arg1 arg2
SC.classmethod(arg1, arg2) # $ func=C.classmethod arg1 arg2
c_func_obj(SC, arg1, arg2) # $ MISSING: func=C.classmethod arg1 arg2
@expects(5)
def test_subclass_staticmethod_call():
try:
SC.staticmethod.__func__
except AttributeError:
print("OK")
sc.staticmethod(arg1, arg2) # $ func=C.staticmethod arg1 arg2
SC.staticmethod(arg1, arg2) # $ func=C.staticmethod arg1 arg2
# Generator functions
# A function or method which uses the yield statement (see section The yield statement) is called a generator function. Such a function, when called, always returns an iterator object which can be used to execute the body of the function: calling the iterators iterator.__next__() method will cause the function to execute until it provides a value using the yield statement. When the function executes a return statement or falls off the end, a StopIteration exception is raised and the iterator will have reached the end of the set of values to be returned.
def gen(x, count):
n = count
while n > 0:
yield x
n -= 1
iter = gen(SOURCE, 1)
SINK(iter.__next__()) # $ MISSING: flow
# SINK_F(iter.__next__()) # throws StopIteration, FP
oiter = c.gen(SOURCE, 1)
SINK(oiter.__next__()) # $ MISSING: flow
# SINK_F(oiter.__next__()) # throws StopIteration, FP
# Coroutine functions
# A function or method which is defined using async def is called a coroutine function. Such a function, when called, returns a coroutine object. It may contain await expressions, as well as async with and async for statements. See also the Coroutine Objects section.
async def coro(x):
return x
import asyncio
SINK(asyncio.run(coro(SOURCE))) # $ MISSING: flow
SINK(asyncio.run(c.coro(SOURCE))) # $ MISSING: flow
class A:
def __await__(self):
fut = asyncio.Future()
fut.set_result(SOURCE)
yield from fut
async def atest_custom_await_impl():
a = A()
x = await a
# TODO: Figure out how to actually return something from our custom __await__
# implementation. The problem is we have to play nicely with the asyncio framework,
# which have their own expectations on what a return value from __await__ should look
# like.
assert x is None
SINK_F(x)
# Asynchronous generator functions
# A function or method which is defined using async def and which uses the yield statement is called a asynchronous generator function. Such a function, when called, returns an asynchronous iterator object which can be used in an async for statement to execute the body of the function.
# Calling the asynchronous iterators aiterator.__anext__() method will return an awaitable which when awaited will execute until it provides a value using the yield expression. When the function executes an empty return statement or falls off the end, a StopAsyncIteration exception is raised and the asynchronous iterator will have reached the end of the set of values to be yielded.
# Built-in functions
# A built-in function object is a wrapper around a C function. Examples of built-in functions are len() and math.sin() (math is a standard built-in module). The number and type of the arguments are determined by the C function. Special read-only attributes: __doc__ is the functions documentation string, or None if unavailable; __name__ is the functions name; __self__ is set to None (but see the next item); __module__ is the name of the module the function was defined in or None if unavailable.
# Built-in methods
# This is really a different disguise of a built-in function, this time containing an object passed to the C function as an implicit extra argument. An example of a built-in method is alist.append(), assuming alist is a list object. In this case, the special read-only attribute __self__ is set to the object denoted by alist.
# Classes
# Classes are callable. These objects normally act as factories for new instances of themselves, but variations are possible for class types that override __new__(). The arguments of the call are passed to __new__() and, in the typical case, to __init__() to initialize the new instance.
# Class Instances
# Instances of arbitrary classes can be made callable by defining a __call__() method in their class.
# If a class sets __iter__() to None, calling iter() on its instances will raise a TypeError (without falling back to __getitem__()).
# 3.3.1. Basic customization
class Customized:
a = NONSOURCE
b = NONSOURCE
def __new__(cls):
cls.a = SOURCE
return super().__new__(cls)
def __init__(self):
self.b = SOURCE
# testing __new__ and __init__
customized = Customized()
SINK(Customized.a) #$ MISSING:flow="SOURCE, l:-8 -> customized.a"
SINK_F(Customized.b)
SINK(customized.a) #$ MISSING: flow="SOURCE, l:-10 -> customized.a"
SINK(customized.b) #$ flow="SOURCE, l:-7 -> customized.b"
class Test2:
def __init__(self, arg):
self.x = SOURCE
self.y = arg
t = Test2(SOURCE)
SINK(t.x) # $ flow="SOURCE, l:-4 -> t.x"
SINK(t.y) # $ flow="SOURCE, l:-2 -> t.y"

View File

@@ -0,0 +1,16 @@
| test.py:41:1:41:33 | Entry definition for SsaSourceVariable NONSOURCE | test.py:42:10:42:18 | ControlFlowNode for NONSOURCE |
| test.py:41:1:41:33 | Entry definition for SsaSourceVariable SINK | test.py:44:5:44:8 | ControlFlowNode for SINK |
| test.py:41:1:41:33 | Entry definition for SsaSourceVariable SOURCE | test.py:42:21:42:26 | ControlFlowNode for SOURCE |
| test.py:42:5:42:5 | ControlFlowNode for x | test.py:43:9:43:9 | ControlFlowNode for x |
| test.py:42:10:42:26 | ControlFlowNode for Tuple | test.py:42:5:42:5 | ControlFlowNode for x |
| test.py:43:5:43:5 | ControlFlowNode for y | test.py:44:10:44:10 | ControlFlowNode for y |
| test.py:43:9:43:12 | ControlFlowNode for Subscript | test.py:43:5:43:5 | ControlFlowNode for y |
| test.py:208:1:208:53 | Entry definition for SsaSourceVariable SINK | test.py:210:5:210:8 | ControlFlowNode for SINK |
| test.py:208:1:208:53 | Entry definition for SsaSourceVariable SOURCE | test.py:209:25:209:30 | ControlFlowNode for SOURCE |
| test.py:209:5:209:5 | ControlFlowNode for x | test.py:210:10:210:10 | ControlFlowNode for x |
| test.py:209:9:209:68 | ControlFlowNode for .0 | test.py:209:9:209:68 | ControlFlowNode for .0 |
| test.py:209:9:209:68 | ControlFlowNode for ListComp | test.py:209:5:209:5 | ControlFlowNode for x |
| test.py:209:16:209:16 | ControlFlowNode for v | test.py:209:45:209:45 | ControlFlowNode for v |
| test.py:209:40:209:40 | ControlFlowNode for u | test.py:209:56:209:56 | ControlFlowNode for u |
| test.py:209:51:209:51 | ControlFlowNode for z | test.py:209:67:209:67 | ControlFlowNode for z |
| test.py:209:62:209:62 | ControlFlowNode for y | test.py:209:10:209:10 | ControlFlowNode for y |

View File

@@ -0,0 +1,8 @@
import python
import semmle.python.dataflow.new.DataFlow
from DataFlow::Node nodeFrom, DataFlow::Node nodeTo
where
DataFlow::localFlowStep(nodeFrom, nodeTo) and
nodeFrom.getEnclosingCallable().getQualifiedName().matches("%\\_with\\_local\\_flow")
select nodeFrom, nodeTo

View File

@@ -0,0 +1,73 @@
# All functions starting with "test_" should run and execute `print("OK")` exactly once.
# This can be checked by running validTest.py.
import sys
import os
sys.path.append(os.path.dirname(os.path.dirname((__file__))))
from testlib import expects
# These are defined so that we can evaluate the test code.
NONSOURCE = "not a source"
SOURCE = "source"
def is_source(x):
return x == "source" or x == b"source" or x == 42 or x == 42.0 or x == 42j
def SINK(x):
if is_source(x):
print("OK")
else:
print("Unexpected flow", x)
def SINK_F(x):
if is_source(x):
print("Unexpected flow", x)
else:
print("OK")
# ------------------------------------------------------------------------------
# Actual tests
# ------------------------------------------------------------------------------
def test_while():
x = NONSOURCE
n = 2
while n > 0:
if n == 1:
SINK(x) #$ flow="SOURCE, l:+1 -> x"
x = SOURCE
n -= 1
class MyObj(object):
def __init__(self, foo):
self.foo = foo
def setFoo(obj, x):
obj.foo = x
def test_while_obj():
myobj = MyObj(NONSOURCE)
n = 2
while n > 0:
if n == 1:
SINK(myobj.foo) #$ flow="SOURCE, l:+1 -> myobj.foo"
setFoo(myobj, SOURCE)
n -= 1
def setAndTestFoo(obj, x, test):
if test:
# This flow is not found, if self-loops are broken at the SSA level.
SINK(obj.foo) #$ flow="SOURCE, l:+7 -> obj.foo"
obj.foo = x
def test_while_obj_sink():
myobj = MyObj(NONSOURCE)
n = 2
while n > 0:
setAndTestFoo(myobj, SOURCE, n == 1)
n -= 1

View File

@@ -0,0 +1,7 @@
# this test has code on module level, which works in slightly different ways than when
# inside a function
t = (SOURCE, NONSOURCE)
a, b = t
SINK(a) #$ flow="SOURCE, l:-2 -> a"
SINK_F(b)

View File

@@ -0,0 +1,885 @@
# This should cover all the syntactical constructs that we hope to support.
# Headings refer to https://docs.python.org/3/reference/expressions.html,
# and are selected whenever they incur dataflow.
# Intended sources should be the variable `SOURCE` and intended sinks should be
# arguments to the function `SINK` (see python/ql/test/experimental/dataflow/testConfig.qll).
#
# Functions whose name ends with "_with_local_flow" will also be tested for local flow.
#
# All functions starting with "test_" should run and execute `print("OK")` exactly once.
# This can be checked by running validTest.py.
import sys
import os
sys.path.append(os.path.dirname(os.path.dirname((__file__))))
from testlib import expects
# These are defined so that we can evaluate the test code.
NONSOURCE = "not a source"
SOURCE = "source"
def is_source(x):
return x == "source" or x == b"source" or x == 42 or x == 42.0 or x == 42j
def SINK(x):
if is_source(x):
print("OK")
else:
print("Unexpected flow", x)
def SINK_F(x):
if is_source(x):
print("Unexpected flow", x)
else:
print("OK")
def test_tuple_with_local_flow():
x = (NONSOURCE, SOURCE)
y = x[1]
SINK(y) #$ flow="SOURCE, l:-2 -> y"
def test_tuple_negative():
x = (NONSOURCE, SOURCE)
y = x[0]
SINK_F(y)
# 6.2.1. Identifiers (Names)
def test_names():
x = SOURCE
SINK(x) #$ flow="SOURCE, l:-1 -> x"
# 6.2.2. Literals
def test_string_literal():
x = "source"
SINK(x) #$ flow="'source', l:-1 -> x"
def test_bytes_literal():
x = b"source"
SINK(x) #$ flow="b'source', l:-1 -> x"
def test_integer_literal():
x = 42
SINK(x) #$ flow="42, l:-1 -> x"
def test_floatnumber_literal():
x = 42.0
SINK(x) #$ flow="42.0, l:-1 -> x"
def test_imagnumber_literal():
x = 42j
SINK(x) #$ MISSING:flow="42j, l:-1 -> x"
# 6.2.3. Parenthesized forms
def test_parenthesized_form():
x = (SOURCE)
SINK(x) #$ flow="SOURCE, l:-1 -> x"
# 6.2.5. List displays
def test_list_display():
x = [SOURCE]
SINK(x[0]) #$ flow="SOURCE, l:-1 -> x[0]"
def test_list_display_negative():
x = [SOURCE]
SINK_F(x)
def test_list_comprehension():
x = [SOURCE for y in [NONSOURCE]]
SINK(x[0]) #$ flow="SOURCE, l:-1 -> x[0]"
def test_list_comprehension_flow():
x = [y for y in [SOURCE]]
SINK(x[0]) #$ flow="SOURCE, l:-1 -> x[0]"
def test_list_comprehension_inflow():
l = [SOURCE]
x = [y for y in l]
SINK(x[0]) #$ flow="SOURCE, l:-2 -> x[0]"
def test_nested_list_display():
x = [*[SOURCE]]
SINK(x[0]) #$ MISSING:flow="SOURCE, l:-1 -> x[0]"
@expects(6)
def test_list_comprehension_with_tuple_result():
# confirms that this simple case works as expected
t = (SOURCE, NONSOURCE)
SINK(t[0]) # $ flow="SOURCE, l:-1 -> t[0]"
SINK_F(t[1])
# confirms that this simple case works as expected
l1 = [(SOURCE, NONSOURCE) for _ in [1]]
SINK(l1[0][0]) # $ flow="SOURCE, l:-1 -> l1[0][0]"
SINK_F(l1[0][1])
# stops working when using reference to variable, due to variables being captured by
# the function we internally generate for the comprehension body.
s = SOURCE
ns = NONSOURCE
l3 = [(s, ns) for _ in [1]]
SINK(l3[0][0]) # $ MISSING: flow="SOURCE, l:-3 -> l3[0][0]"
SINK_F(l3[0][1])
# 6.2.6. Set displays
def test_set_display():
x = {SOURCE}
SINK(x.pop()) #$ flow="SOURCE, l:-1 -> x.pop()"
def test_set_comprehension():
x = {SOURCE for y in [NONSOURCE]}
SINK(x.pop()) #$ flow="SOURCE, l:-1 -> x.pop()"
def test_set_comprehension_flow():
x = {y for y in [SOURCE]}
SINK(x.pop()) #$ flow="SOURCE, l:-1 -> x.pop()"
def test_set_comprehension_inflow():
l = {SOURCE}
x = {y for y in l}
SINK(x.pop()) #$ flow="SOURCE, l:-2 -> x.pop()"
def test_nested_set_display():
x = {*{SOURCE}}
SINK(x.pop()) #$ MISSING:flow="SOURCE, l:-1 -> x.pop()"
# 6.2.7. Dictionary displays
def test_dict_display():
x = {"s": SOURCE}
SINK(x["s"]) #$ flow="SOURCE, l:-1 -> x['s']"
def test_dict_display_pop():
x = {"s": SOURCE}
SINK(x.pop("s")) #$ flow="SOURCE, l:-1 -> x.pop(..)"
def test_dict_comprehension():
x = {y: SOURCE for y in ["s"]}
SINK(x["s"]) #$ MISSING:flow="SOURCE, l:-1 -> x['s']"
def test_dict_comprehension_pop():
x = {y: SOURCE for y in ["s"]}
SINK(x.pop("s")) #$ MISSING:flow="SOURCE, l:-1 -> x.pop()"
def test_nested_dict_display():
x = {**{"s": SOURCE}}
SINK(x["s"]) #$ MISSING:flow="SOURCE, l:-1 -> x['s']"
def test_nested_dict_display_pop():
x = {**{"s": SOURCE}}
SINK(x.pop("s")) #$ MISSING:flow="SOURCE, l:-1 -> x.pop()"
# Nested comprehensions
def test_nested_comprehension():
x = [y for z in [[SOURCE]] for y in z]
SINK(x[0]) #$ flow="SOURCE, l:-1 -> x[0]"
def test_nested_comprehension_deep_with_local_flow():
x = [y for v in [[[[SOURCE]]]] for u in v for z in u for y in z]
SINK(x[0]) #$ flow="SOURCE, l:-1 -> x[0]"
def test_nested_comprehension_dict():
d = {"s": [SOURCE]}
x = [y for k, v in d.items() for y in v]
SINK(x[0]) #$ flow="SOURCE, l:-2 -> x[0]"
def test_nested_comprehension_paren():
x = [y for y in (z for z in [SOURCE])]
SINK(x[0]) #$ flow="SOURCE, l:-1 -> x[0]"
# Iterable unpacking in comprehensions
def test_unpacking_comprehension():
x = [a for (a, b) in [(SOURCE, NONSOURCE)]]
SINK(x[0]) #$ flow="SOURCE, l:-1 -> x[0]"
def test_star_unpacking_comprehension():
x = [a[0] for (*a, b) in [(SOURCE, NONSOURCE)]]
SINK(x[0]) #$ flow="SOURCE, l:-1 -> x[0]"
# 6.2.8. Generator expressions
def test_generator():
x = (SOURCE for y in [NONSOURCE])
SINK([*x][0]) #$ MISSING:flow="SOURCE, l:-1 -> List[0]"
# 6.2.9. Yield expressions
def gen(x):
yield x
def test_yield():
g = gen(SOURCE)
SINK(next(g)) #$ MISSING:flow="SOURCE, l:-1 -> next()"
def gen_from(x):
yield from gen(x)
def test_yield_from():
g = gen_from(SOURCE)
SINK(next(g)) #$ MISSING:flow="SOURCE, l:-1 -> next()"
# a statement rather than an expression, but related to generators
def test_for():
for x in gen(SOURCE):
SINK(x) #$ MISSING:flow="SOURCE, l:-1 -> x"
# 6.2.9.1. Generator-iterator methods
def test___next__():
g = gen(SOURCE)
SINK(g.__next__()) #$ MISSING:flow="SOURCE, l:-1 -> g.__next__()"
def gen2(x):
# argument of `send` has to flow to value of `yield x` (and so to `m`)
m = yield x
yield m
def test_send():
g = gen2(NONSOURCE)
n = next(g)
SINK(g.send(SOURCE)) #$ MISSING:flow="SOURCE -> g.send()"
def gen_ex(x):
try:
yield NONSOURCE
except:
yield x # `x` has to flow to call to `throw`
def test_throw():
g = gen_ex(SOURCE)
n = next(g)
SINK(g.throw(TypeError)) #$ MISSING:flow="SOURCE, l:-2 -> g.throw()"
# no `test_close` as `close` involves no data flow
# 6.2.9.3. Asynchronous generator functions
async def agen(x):
yield x
# 6.2.9.4. Asynchronous generator-iterator methods
# helper to run async test functions
def runa(a):
import asyncio
asyncio.run(a)
async def atest___anext__():
g = agen(SOURCE)
SINK(await g.__anext__()) #$ MISSING:flow="SOURCE, l:-1 -> g.__anext__()"
def test___anext__():
runa(atest___anext__())
async def agen2(x):
# argument of `send` has to flow to value of `yield x` (and so to `m`)
m = yield x
yield m
async def atest_asend():
g = agen2(NONSOURCE)
n = await g.__anext__()
SINK(await g.asend(SOURCE)) #$ MISSING:flow="SOURCE -> g.asend()"
def test_asend():
runa(atest_asend())
async def agen_ex(x):
try:
yield NONSOURCE
except:
yield x # `x` has to flow to call to `athrow`
async def atest_athrow():
g = agen_ex(SOURCE)
n = await g.__anext__()
SINK(await g.athrow(TypeError)) #$ MISSING:flow="SOURCE, l:-2 -> g.athrow()"
def test_athrow():
runa(atest_athrow())
# 6.3.1. Attribute references
class C:
a = SOURCE
@expects(2)
def test_attribute_reference():
SINK(C.a) # $ flow="SOURCE, l:-5 -> C.a"
c = C()
SINK(c.a) # $ MISSING: flow="SOURCE, l:-7 -> c.a"
# overriding __getattr__ should be tested by the class coverage tests
# 6.3.2. Subscriptions
def test_subscription_tuple():
SINK((SOURCE,)[0]) #$ flow="SOURCE -> Tuple[0]"
def test_subscription_list():
SINK([SOURCE][0]) #$ flow="SOURCE -> List[0]"
def test_subscription_mapping():
SINK({"s": SOURCE}["s"]) #$ flow="SOURCE -> Dict['s']"
# overriding __getitem__ should be tested by the class coverage tests
# 6.3.3. Slicings
l = [SOURCE]
def test_slicing():
s = l[0:1:1]
SINK(s[0]) #$ MISSING:flow="SOURCE -> s[0]"
# The grammar seems to allow `l[0:1:1, 0:1]`, but the interpreter does not like it
# 6.3.4. Calls
def second(a, b):
return b
def test_call_positional():
SINK(second(NONSOURCE, SOURCE)) #$ flow="SOURCE -> second(..)"
def test_call_positional_negative():
SINK_F(second(SOURCE, NONSOURCE))
def test_call_keyword():
SINK(second(NONSOURCE, b=SOURCE)) #$ flow="SOURCE -> second(..)"
def test_call_unpack_iterable():
SINK(second(NONSOURCE, *[SOURCE])) #$ MISSING:flow="SOURCE -> second(..)"
def test_call_unpack_mapping():
SINK(second(NONSOURCE, **{"b": SOURCE})) #$ flow="SOURCE -> second(..)"
def f_extra_pos(a, *b):
return b[0]
def test_call_extra_pos():
SINK(f_extra_pos(NONSOURCE, SOURCE)) #$ flow="SOURCE -> f_extra_pos(..)"
def f_extra_keyword(a, **b):
return b["b"]
def test_call_extra_keyword():
SINK(f_extra_keyword(NONSOURCE, b=SOURCE)) #$ flow="SOURCE -> f_extra_keyword(..)"
# return the name of the first extra keyword argument
def f_extra_keyword_flow(**a):
return [*a][0]
# call the function with our source as the name of the keyword arguemnt
def test_call_extra_keyword_flow():
SINK(f_extra_keyword_flow(**{SOURCE: None})) #$ MISSING:flow="SOURCE -> f_extra_keyword(..)"
# 6.11. Boolean operations
def test_or(x = False):
# if we don't know the value of the lhs, we should always add flow
SINK(x or SOURCE) #$ flow="SOURCE -> BoolExpr"
def test_and(x = True):
# if we don't know the value of the lhs, we should always add flow
SINK(x and SOURCE) #$ flow="SOURCE -> BoolExpr"
# 6.12. Assignment expressions
def test_assignment_expression_flow_lhs():
x = NONSOURCE
if x := SOURCE:
SINK(x) #$ flow="SOURCE, l:-1 -> x"
def test_assignment_expression_flow_out():
x = NONSOURCE
SINK(x := SOURCE) #$ flow="SOURCE -> AssignExpr"
# 6.13. Conditional expressions
def test_conditional_true():
SINK(SOURCE if True else NONSOURCE) #$ flow="SOURCE -> IfExp"
def test_conditional_true_guards():
SINK_F(NONSOURCE if True else SOURCE)
def test_conditional_false():
SINK(NONSOURCE if False else SOURCE) #$ flow="SOURCE -> IfExp"
def test_conditional_false_guards():
SINK_F(SOURCE if False else NONSOURCE)
# Condition is evaluated first, so x is SOURCE once chosen
def test_conditional_evaluation_true():
x = NONSOURCE
SINK(x if (SOURCE == (x := SOURCE)) else NONSOURCE) #$ flow="SOURCE -> IfExp"
# Condition is evaluated first, so x is SOURCE once chosen
def test_conditional_evaluation_false():
x = NONSOURCE
SINK(NONSOURCE if (NONSOURCE == (x := SOURCE)) else x) #$ flow="SOURCE -> IfExp"
# 6.14. Lambdas
def test_lambda():
def f(x):
return x
SINK(f(SOURCE)) #$ flow="SOURCE -> f(..)"
def test_lambda_positional():
def second(a, b):
return b
SINK(second(NONSOURCE, SOURCE)) #$ flow="SOURCE -> second(..)"
def test_lambda_positional_negative():
def second(a, b):
return b
SINK_F(second(SOURCE, NONSOURCE))
def test_lambda_keyword():
def second(a, b):
return b
SINK(second(NONSOURCE, b=SOURCE)) #$ flow="SOURCE -> second(..)"
def test_lambda_unpack_iterable():
def second(a, b):
return b
SINK(second(NONSOURCE, *[SOURCE])) #$ MISSING:flow="SOURCE -> second(..)" # Flow missing
def test_lambda_unpack_mapping():
def second(a, b):
return b
SINK(second(NONSOURCE, **{"b": SOURCE})) #$ flow="SOURCE -> second(..)"
def test_lambda_extra_pos():
f_extra_pos = lambda a, *b: b[0]
SINK(f_extra_pos(NONSOURCE, SOURCE)) #$ flow="SOURCE -> f_extra_pos(..)"
def test_lambda_extra_keyword():
f_extra_keyword = lambda a, **b: b["b"]
SINK(f_extra_keyword(NONSOURCE, b=SOURCE)) #$ flow="SOURCE -> f_extra_keyword(..)"
# call the function with our source as the name of the keyword argument
def test_lambda_extra_keyword_flow():
# return the name of the first extra keyword argument
f_extra_keyword_flow = lambda **a: [*a][0]
SINK(f_extra_keyword_flow(**{SOURCE: None})) #$ MISSING:flow="SOURCE -> f_extra_keyword(..)"
@expects(4)
def test_swap():
a = SOURCE
b = NONSOURCE
SINK(a) #$ flow="SOURCE, l:-2 -> a"
SINK_F(b)
a, b = b, a
SINK_F(a)
SINK(b) #$ flow="SOURCE, l:-7 -> b"
@expects(2)
def test_unpacking_assignment():
t = (SOURCE, NONSOURCE)
a, b = t
SINK(a) #$ flow="SOURCE, l:-2 -> a"
SINK_F(b)
@expects(3)
def test_nested_unpacking_assignment():
t = (SOURCE, (NONSOURCE, SOURCE))
a, (b, c) = t
SINK(a) #$ flow="SOURCE, l:-2 -> a"
SINK_F(b)
SINK(c) #$ flow="SOURCE, l:-4 -> c"
@expects(2)
def test_deeply_nested_unpacking_assignment():
t = [[[[SOURCE]]], NONSOURCE]
[[[a]]], b = t
SINK(a) #$ flow="SOURCE, l:-2 -> a"
SINK_F(b)
@expects(4)
def test_iterated_unpacking_assignment():
t = (SOURCE, SOURCE, NONSOURCE)
a, *b, c = t
SINK(a) #$ flow="SOURCE, l:-2 -> a"
SINK_F(b)
SINK(b[0]) #$ flow="SOURCE, l:-4 -> b[0]"
SINK_F(c) #$ SPURIOUS: flow="SOURCE, l:-5 -> c" # We do not track tuple sizes
@expects(3)
def test_iterated_unpacking_assignment_shrink():
t = (SOURCE, SOURCE)
a, *b, c = t
SINK(a) #$ flow="SOURCE, l:-2 -> a"
SINK_F(b)
SINK(c) #$ flow="SOURCE, l:-4 -> c"
@expects(15)
def test_unpacking_assignment_conversion():
ll = [[SOURCE, NONSOURCE, SOURCE], [SOURCE], [NONSOURCE]]
# tuple
((a1, a2, a3), b, c) = ll
SINK(a1) #$ flow="SOURCE, l:-4 -> a1"
SINK_F(a2) #$ SPURIOUS: flow="SOURCE, l:-5 -> a2" # We expect an FP as all elements are tainted
SINK(a3) #$ flow="SOURCE, l:-6 -> a3"
SINK_F(b) # The list itself is not tainted
SINK_F(c)
# mixed
[(a1, a2, a3), b, c] = ll
SINK(a1) #$ flow="SOURCE, l:-12 -> a1"
SINK_F(a2) #$ SPURIOUS: flow="SOURCE, l:-13 -> a2" # We expect an FP as all elements are tainted
SINK(a3) #$ flow="SOURCE, l:-14 -> a3"
SINK_F(b) # The list itself is not tainted
SINK_F(c)
# mixed differently
([a1, a2, a3], b, c) = ll
SINK(a1) #$ flow="SOURCE, l:-20 -> a1"
SINK_F(a2) #$ SPURIOUS: flow="SOURCE, l:-21 -> a2" # We expect an FP as all elements are tainted
SINK(a3) #$ flow="SOURCE, l:-22 -> a3"
SINK_F(b) # The list itself is not tainted
SINK_F(c)
@expects(24)
def test_iterated_unpacking_assignment_conversion():
tt = ((SOURCE, NONSOURCE, SOURCE),NONSOURCE)
# list
[[a1, *a2], *b] = tt
SINK(a1) #$ flow="SOURCE, l:-4 -> a1"
SINK_F(a2) # The list itself is not tainted
SINK_F(a2[0]) #$ SPURIOUS: flow="SOURCE, l:-6 -> a2[0]" # FP here due to list abstraction
SINK(a2[1]) #$ flow="SOURCE, l:-7 -> a2[1]"
SINK_F(b) # The list itself is not tainted
SINK_F(b[0])
# tuple
((a1, *a2), *b) = tt
SINK(a1) #$ flow="SOURCE, l:-13 -> a1"
SINK_F(a2) # The list itself is not tainted
SINK_F(a2[0]) #$ SPURIOUS: flow="SOURCE, l:-15 -> a2[0]" # FP here due to list abstraction
SINK(a2[1]) #$ flow="SOURCE, l:-16 -> a2[1]"
SINK_F(b) # The list itself is not tainted
SINK_F(b[0])
# mixed
[(a1, *a2), *b] = tt
SINK(a1) #$ flow="SOURCE, l:-22 -> a1"
SINK_F(a2) # The list itself is not tainted
SINK_F(a2[0]) #$ SPURIOUS: flow="SOURCE, l:-24 -> a2[0]" # FP here due to list abstraction
SINK(a2[1]) #$ flow="SOURCE, l:-25 -> a2[1]"
SINK_F(b) # The list itself is not tainted
SINK_F(b[0])
# mixed differently
([a1, *a2], *b) = tt
SINK(a1) #$ flow="SOURCE, l:-31 -> a1"
SINK_F(a2) # The list itself is not tainted
SINK_F(a2[0]) #$ SPURIOUS: flow="SOURCE, l:-33 -> a2[0]" # FP here due to list abstraction
SINK(a2[1]) #$ flow="SOURCE, l:-34 -> a2[1]"
SINK_F(b) # The list itself is not tainted
SINK_F(b[0])
@expects(3)
def test_iterable_repacking():
a, *(b, c) = (SOURCE, NONSOURCE, SOURCE)
SINK(a) #$ flow="SOURCE, l:-1 -> a"
SINK_F(b)
SINK(c) #$ MISSING: flow="SOURCE, l:-3 -> c"
@expects(4)
def test_iterable_unpacking_in_for():
tl = [(SOURCE, NONSOURCE), (SOURCE, NONSOURCE)]
for x,y in tl:
SINK(x) #$ flow="SOURCE, l:-2 -> x"
SINK_F(y)
@expects(6)
def test_iterable_star_unpacking_in_for():
tl = [(SOURCE, NONSOURCE), (SOURCE, NONSOURCE)]
for *x,y in tl:
SINK_F(x)
SINK(x[0]) #$ flow="SOURCE, l:-3 -> x[0]"
SINK_F(y) #$ SPURIOUS: flow="SOURCE, l:-4 -> y" # FP here since we do not track the tuple lenght and so `*x` could be empty
@expects(6)
def test_iterable_star_unpacking_in_for_2():
tl = [(SOURCE, NONSOURCE), (SOURCE, NONSOURCE)]
for x,*y,z in tl:
SINK(x) #$ flow="SOURCE, l:-2 -> x"
SINK_F(y) # The list itself is not tainted (and is here empty)
SINK_F(z)
def iterate_star_args(first, second, *args):
for arg in args:
SINK(arg) #$ flow="SOURCE, l:+5 -> arg" flow="SOURCE, l:+6 -> arg"
# FP reported here: https://github.com/github/codeql-python-team/issues/49
@expects(2)
def test_overflow_iteration():
s = SOURCE
iterate_star_args(NONSOURCE, NONSOURCE, SOURCE, s)
@expects(6)
def test_deep_callgraph():
# port of python/ql/test/library-tests/taint/general/deep.py
# based on the fact that `test_deep_callgraph_defined_in_module` works the problem
# seems to be that we're defining these functions inside another function and that
# the flow of these function definitions DOESN'T flow into the body of the `f<n>`
# functions (they DO flow into the body of `test_deep_callgraph`, otherwise the
# `f1` call wouldn't work).
def f1(arg):
return arg
def f2(arg):
return f1(arg)
def f3(arg):
return f2(arg)
def f4(arg):
return f3(arg)
def f5(arg):
return f4(arg)
def f6(arg):
return f5(arg)
x = f6(SOURCE)
SINK(x) #$ flow="SOURCE, l:-1 -> x"
x = f5(SOURCE)
SINK(x) #$ flow="SOURCE, l:-1 -> x"
x = f4(SOURCE)
SINK(x) #$ flow="SOURCE, l:-1 -> x"
x = f3(SOURCE)
SINK(x) #$ flow="SOURCE, l:-1 -> x"
x = f2(SOURCE)
SINK(x) #$ flow="SOURCE, l:-1 -> x"
x = f1(SOURCE)
SINK(x) #$ flow="SOURCE, l:-1 -> x"
def wat_f1(arg):
return arg
def wat_f2(arg):
return wat_f1(arg)
def wat_f3(arg):
return wat_f2(arg)
def wat_f4(arg):
return wat_f3(arg)
def wat_f5(arg):
return wat_f4(arg)
def wat_f6(arg):
return wat_f5(arg)
@expects(6)
def test_deep_callgraph_defined_in_module():
x = wat_f6(SOURCE)
SINK(x) #$ flow="SOURCE, l:-1 -> x"
x = wat_f5(SOURCE)
SINK(x) #$ flow="SOURCE, l:-1 -> x"
x = wat_f4(SOURCE)
SINK(x) #$ flow="SOURCE, l:-1 -> x"
x = wat_f3(SOURCE)
SINK(x) #$ flow="SOURCE, l:-1 -> x"
x = wat_f2(SOURCE)
SINK(x) #$ flow="SOURCE, l:-1 -> x"
x = wat_f1(SOURCE)
SINK(x) #$ flow="SOURCE, l:-1 -> x"
@expects(2)
def test_dynamic_tuple_creation_1():
tup = tuple()
tup += (SOURCE,)
tup += (NONSOURCE,)
SINK(tup[0]) #$ MISSING:flow="SOURCE, l:-3 -> tup[0]"
SINK_F(tup[1])
@expects(2)
def test_dynamic_tuple_creation_2():
tup = ()
tup += (SOURCE,)
tup += (NONSOURCE,)
SINK(tup[0]) #$ MISSING:flow="SOURCE, l:-3 -> tup[0]"
SINK_F(tup[1])
@expects(2)
def test_dynamic_tuple_creation_3():
tup1 = (SOURCE,)
tup2 = (NONSOURCE,)
tup = tup1 + tup2
SINK(tup[0]) #$ MISSING:flow="SOURCE, l:-4 -> tup[0]"
SINK_F(tup[1])
# Inspired by FP-report https://github.com/github/codeql/issues/4239
@expects(2)
def test_dynamic_tuple_creation_4():
tup = ()
for item in [SOURCE, NONSOURCE]:
tup += (item,)
SINK(tup[0]) #$ MISSING:flow="SOURCE, l:-3 -> tup[0]"
SINK_F(tup[1])
def return_from_inner_scope(x):
try:
return x[0]
except IndexError:
return SOURCE
def test_return_from_inner_scope():
SINK(return_from_inner_scope([])) #$ flow="SOURCE, l:-3 -> return_from_inner_scope(..)"
# Inspired by reverse read inconsistency check
def insertAtA(d):
d["a"] = SOURCE
def test_reverse_read_subscript():
d = {"a": NONSOURCE}
l = [d]
insertAtA(l[0])
SINK(d["a"]) #$ MISSING:flow="SOURCE, l-6 -> d['a']""
def test_reverse_read_dict_arg():
d = {"a": NONSOURCE}
dd = {"d": d}
insertAtA(**dd)
SINK(d["a"]) #$ MISSING:flow="SOURCE, l-12 -> d['a']""
class WithA:
def setA(self, v):
self.a = v
def __init__(self):
self.a = ""
def test_reverse_read_subscript_cls():
withA = WithA()
l = [withA]
l[0].setA(SOURCE)
SINK(withA.a) #$ MISSING:flow="SOURCE, l:-1 -> self.a"
@expects(3)
def test_with_default_param_value(x=SOURCE, /, y=SOURCE, *, z=SOURCE):
SINK(x) #$ flow="SOURCE, l:-1 -> x"
SINK(y) #$ flow="SOURCE, l:-2 -> y"
SINK(z) #$ flow="SOURCE, l:-3 -> z"

View File

@@ -0,0 +1,360 @@
# This tests some of the common built-in functions and methods.
# We need a decent model of data flow through these in order to
# analyse most programs.
#
# All functions starting with "test_" should run and execute `print("OK")` exactly once.
# This can be checked by running validTest.py.
import sys
import os
sys.path.append(os.path.dirname(os.path.dirname((__file__))))
from testlib import expects
# These are defined so that we can evaluate the test code.
NONSOURCE = "not a source"
SOURCE = "source"
def is_source(x):
return x == "source" or x == b"source" or x == 42 or x == 42.0 or x == 42j
def SINK(x):
if is_source(x):
print("OK")
else:
print("Unexpected flow", x)
def SINK_F(x):
if is_source(x):
print("Unexpected flow", x)
else:
print("OK")
# Actual tests
## Container constructors
### List
@expects(2)
def test_list_from_list():
l1 = [SOURCE, NONSOURCE]
l2 = list(l1)
SINK(l2[0]) #$ flow="SOURCE, l:-2 -> l2[0]"
SINK_F(l2[1]) #$ SPURIOUS: flow="SOURCE, l:-3 -> l2[1]"
# -- skip list_from_string
@expects(2)
def test_list_from_tuple():
t = (SOURCE, NONSOURCE)
l = list(t)
SINK(l[0]) #$ flow="SOURCE, l:-2 -> l[0]"
SINK_F(l[1]) #$ SPURIOUS: flow="SOURCE, l:-3 -> l[1]"
def test_list_from_set():
s = {SOURCE}
l = list(s)
SINK(l[0]) #$ flow="SOURCE, l:-2 -> l[0]"
@expects(2)
def test_list_from_dict():
d = {SOURCE: 'v', NONSOURCE: 'v2'}
l = list(d)
SINK(l[0]) #$ MISSING: flow="SOURCE, l:-2 -> l[0]"
SINK_F(l[1]) # expecting FP due to imprecise flow
### Tuple
@expects(2)
def test_tuple_from_list():
l = [SOURCE, NONSOURCE]
t = tuple(l)
SINK(t[0]) #$ MISSING: flow="SOURCE, l:-2 -> t[0]"
SINK_F(t[1])
@expects(2)
def test_tuple_from_tuple():
t0 = (SOURCE, NONSOURCE)
t = tuple(t0)
SINK(t[0]) #$ flow="SOURCE, l:-2 -> t[0]"
SINK_F(t[1])
def test_tuple_from_set():
s = {SOURCE}
t = tuple(s)
SINK(t[0]) #$ MISSING: flow="SOURCE, l:-2 -> t[0]"
@expects(2)
def test_tuple_from_dict():
d = {SOURCE: "v1", NONSOURCE: "v2"}
t = tuple(d)
SINK(t[0]) #$ MISSING: flow="SOURCE, l:-2 -> t[0]"
SINK_F(t[1])
### Set
def test_set_from_list():
l = [SOURCE]
s = set(l)
v = s.pop()
SINK(v) #$ flow="SOURCE, l:-3 -> v"
def test_set_from_tuple():
t = (SOURCE,)
s = set(t)
v = s.pop()
SINK(v) #$ flow="SOURCE, l:-3 -> v"
def test_set_from_set():
s0 = {SOURCE}
s = set(s0)
v = s.pop()
SINK(v) #$ flow="SOURCE, l:-3 -> v"
def test_set_from_dict():
d = {SOURCE: "val"}
s = set(d)
v = s.pop()
SINK(v) #$ MISSING: flow="SOURCE, l:-3 -> v"
### Dict
@expects(2)
def test_dict_from_keyword():
d = dict(k = SOURCE, k1 = NONSOURCE)
SINK(d["k"]) #$ flow="SOURCE, l:-1 -> d['k']"
SINK_F(d["k1"])
@expects(2)
def test_dict_from_list():
d = dict([("k", SOURCE), ("k1", NONSOURCE)])
SINK(d["k"]) #$ MISSING: flow="SOURCE, l:-1 -> d[k]"
SINK_F(d["k1"])
@expects(2)
def test_dict_from_dict():
d1 = {'k': SOURCE, 'k1': NONSOURCE}
d2 = dict(d1)
SINK(d2["k"]) #$ flow="SOURCE, l:-2 -> d2['k']"
SINK_F(d2["k1"])
## Container methods
### List
def test_list_pop():
l = [SOURCE]
v = l.pop()
SINK(v) #$ flow="SOURCE, l:-2 -> v"
def test_list_pop_index():
l = [SOURCE]
v = l.pop(0)
SINK(v) #$ flow="SOURCE, l:-2 -> v"
def test_list_pop_index_imprecise():
l = [SOURCE, NONSOURCE]
v = l.pop(1)
SINK_F(v) #$ SPURIOUS: flow="SOURCE, l:-2 -> v"
@expects(2)
def test_list_copy():
l0 = [SOURCE, NONSOURCE]
l = l0.copy()
SINK(l[0]) #$ flow="SOURCE, l:-2 -> l[0]"
SINK_F(l[1]) #$ SPURIOUS: flow="SOURCE, l:-3 -> l[1]"
def test_list_append():
l = [NONSOURCE]
l.append(SOURCE)
SINK(l[1]) #$ flow="SOURCE, l:-1 -> l[1]"
### Set
def test_set_pop():
s = {SOURCE}
v = s.pop()
SINK(v) #$ flow="SOURCE, l:-2 -> v"
def test_set_copy():
s0 = {SOURCE}
s = s0.copy()
SINK(s.pop()) #$ flow="SOURCE, l:-2 -> s.pop()"
def test_set_add():
s = set([])
s.add(SOURCE)
SINK(s.pop()) #$ flow="SOURCE, l:-1 -> s.pop()"
### Dict
def test_dict_keys():
d = {SOURCE: "value"}
keys = d.keys()
key_list = list(keys)
SINK(key_list[0]) #$ MISSING: flow="SOURCE, l:-3 -> key_list[0]"
def test_dict_values():
d = {'k': SOURCE}
vals = d.values()
val_list = list(vals)
SINK(val_list[0]) #$ flow="SOURCE, l:-3 -> val_list[0]"
@expects(4)
def test_dict_items():
d = {'k': SOURCE, SOURCE: "value"}
items = d.items()
item_list = list(items)
SINK_F(item_list[0][0]) # expecting FP due to imprecise flow
SINK(item_list[0][1]) #$ flow="SOURCE, l:-4 -> item_list[0][1]"
SINK(item_list[1][0]) #$ MISSING: flow="SOURCE, l:-5 -> item_list[1][0]"
SINK_F(item_list[1][1]) #$ SPURIOUS: flow="SOURCE, l:-6 -> item_list[1][1]"
@expects(3)
def test_dict_pop():
d = {'k': SOURCE}
v = d.pop("k")
SINK(v) #$ flow="SOURCE, l:-2 -> v"
v1 = d.pop("k", NONSOURCE)
SINK_F(v1) #$ SPURIOUS: flow="SOURCE, l:-4 -> v1"
v2 = d.pop("non-existing", SOURCE)
SINK(v2) #$ flow="SOURCE, l:-1 -> v2"
@expects(3)
def test_dict_get():
d = {'k': SOURCE}
v = d.get("k")
SINK(v) #$ flow="SOURCE, l:-2 -> v"
v1 = d.get("non-existing", SOURCE)
SINK(v1) #$ flow="SOURCE, l:-1 -> v1"
k = "k"
v2 = d.get(k)
SINK(v2) #$ flow="SOURCE, l:-7 -> v2"
@expects(2)
def test_dict_popitem():
d = {'k': SOURCE}
t = d.popitem() # could be any pair (before 3.7), but we only have one
SINK_F(t[0])
SINK(t[1]) #$ flow="SOURCE, l:-3 -> t[1]"
@expects(2)
def test_dict_copy():
d = {'k': SOURCE, 'k1': NONSOURCE}
d1 = d.copy()
SINK(d1["k"]) #$ flow="SOURCE, l:-2 -> d1['k']"
SINK_F(d1["k1"])
## Functions on containers
### sorted
def test_sorted_list():
l0 = [SOURCE]
l = sorted(l0)
SINK(l[0]) #$ flow="SOURCE, l:-2 -> l[0]"
def test_sorted_tuple():
t = (SOURCE,)
l = sorted(t)
SINK(l[0]) #$ flow="SOURCE, l:-2 -> l[0]"
def test_sorted_set():
s = {SOURCE}
l = sorted(s)
SINK(l[0]) #$ flow="SOURCE, l:-2 -> l[0]"
def test_sorted_dict():
d = {SOURCE: "val"}
l = sorted(d)
SINK(l[0]) #$ MISSING: flow="SOURCE, l:-2 -> l[0]"
### reversed
@expects(2)
def test_reversed_list():
l0 = [SOURCE, NONSOURCE]
r = reversed(l0)
l = list(r)
SINK_F(l[0]) #$ SPURIOUS: flow="SOURCE, l:-3 -> l[0]"
SINK(l[1]) #$ flow="SOURCE, l:-4 -> l[1]"
@expects(2)
def test_reversed_tuple():
t = (SOURCE, NONSOURCE)
r = reversed(t)
l = list(r)
SINK_F(l[0]) #$ SPURIOUS: flow="SOURCE, l:-3 -> l[0]"
SINK(l[1]) #$ flow="SOURCE, l:-4 -> l[1]"
@expects(2)
def test_reversed_dict():
d = {SOURCE: "v1", NONSOURCE: "v2"}
r = reversed(d)
l = list(r)
SINK_F(l[0])
SINK(l[1]) #$ MISSING: flow="SOURCE, l:-4 -> l[1]"
### iter
def test_iter_list():
l0 = [SOURCE]
i = iter(l0)
l = list(i)
SINK(l[0]) #$ flow="SOURCE, l:-3 -> l[0]"
def test_iter_tuple():
t = (SOURCE,)
i = iter(t)
l = list(i)
SINK(l[0]) #$ flow="SOURCE, l:-3 -> l[0]"
def test_iter_set():
t = {SOURCE}
i = iter(t)
l = list(i)
SINK(l[0]) #$ flow="SOURCE, l:-3 -> l[0]"
def test_iter_dict():
d = {SOURCE: "val"}
i = iter(d)
l = list(i)
SINK(l[0]) #$ MISSING: flow="SOURCE, l:-3 -> l[0]"
def test_iter_iter():
# applying iter() to the result of iter() is basically a no-op
l0 = [SOURCE]
i = iter(iter(l0))
l = list(i)
SINK(l[0]) #$ flow="SOURCE, l:-3 -> l[0]"
### next
def test_next_list():
l = [SOURCE]
i = iter(l)
n = next(i)
SINK(n) #$ flow="SOURCE, l:-3 -> n"
def test_next_tuple():
t = (SOURCE,)
i = iter(t)
n = next(i)
SINK(n) #$ flow="SOURCE, l:-3 -> n"
def test_next_set():
s = {SOURCE}
i = iter(s)
n = next(i)
SINK(n) #$ flow="SOURCE, l:-3 -> n"
def test_next_dict():
d = {SOURCE: "val"}
i = iter(d)
n = next(i)
SINK(n) #$ MISSING: flow="SOURCE, l:-3 -> n"

View File

@@ -0,0 +1,31 @@
def_count
| 4 |
def
| def_use_flow.py:10:5:10:5 | Essa node definition |
| def_use_flow.py:17:11:17:11 | Essa node definition |
| def_use_flow.py:19:9:19:9 | Essa node definition |
| def_use_flow.py:21:7:21:7 | Essa node definition |
implicit_use_count
| 0 |
implicit_use
source_use_count
| 3 |
source_use
| def_use_flow.py:28:15:28:15 | ControlFlowNode for x |
| def_use_flow.py:30:13:30:13 | ControlFlowNode for x |
| def_use_flow.py:32:11:32:11 | ControlFlowNode for x |
def_use_edge_count
| 12 |
def_use_edge
| def_use_flow.py:10:5:10:5 | SSA variable x | def_use_flow.py:28:15:28:15 | ControlFlowNode for x |
| def_use_flow.py:10:5:10:5 | SSA variable x | def_use_flow.py:30:13:30:13 | ControlFlowNode for x |
| def_use_flow.py:10:5:10:5 | SSA variable x | def_use_flow.py:32:11:32:11 | ControlFlowNode for x |
| def_use_flow.py:17:11:17:11 | SSA variable x | def_use_flow.py:28:15:28:15 | ControlFlowNode for x |
| def_use_flow.py:17:11:17:11 | SSA variable x | def_use_flow.py:30:13:30:13 | ControlFlowNode for x |
| def_use_flow.py:17:11:17:11 | SSA variable x | def_use_flow.py:32:11:32:11 | ControlFlowNode for x |
| def_use_flow.py:19:9:19:9 | SSA variable x | def_use_flow.py:28:15:28:15 | ControlFlowNode for x |
| def_use_flow.py:19:9:19:9 | SSA variable x | def_use_flow.py:30:13:30:13 | ControlFlowNode for x |
| def_use_flow.py:19:9:19:9 | SSA variable x | def_use_flow.py:32:11:32:11 | ControlFlowNode for x |
| def_use_flow.py:21:7:21:7 | SSA variable x | def_use_flow.py:28:15:28:15 | ControlFlowNode for x |
| def_use_flow.py:21:7:21:7 | SSA variable x | def_use_flow.py:30:13:30:13 | ControlFlowNode for x |
| def_use_flow.py:21:7:21:7 | SSA variable x | def_use_flow.py:32:11:32:11 | ControlFlowNode for x |

View File

@@ -0,0 +1,47 @@
import python
private import semmle.python.dataflow.new.internal.DataFlowPrivate
query int def_count() {
exists(SsaSourceVariable x | x.getName() = "x" |
result = count(EssaNodeDefinition def | def.getSourceVariable() = x)
)
}
query EssaNodeDefinition def() {
exists(SsaSourceVariable x | x.getName() = "x" | result.getSourceVariable() = x)
}
query int implicit_use_count() {
exists(SsaSourceVariable x | x.getName() = "x" | result = count(x.getAnImplicitUse()))
}
query ControlFlowNode implicit_use() {
exists(SsaSourceVariable x | x.getName() = "x" | result = x.getAnImplicitUse())
}
query int source_use_count() {
exists(SsaSourceVariable x | x.getName() = "x" | result = count(x.getASourceUse()))
}
query ControlFlowNode source_use() {
exists(SsaSourceVariable x | x.getName() = "x" | result = x.getASourceUse())
}
query int def_use_edge_count() {
exists(SsaSourceVariable x | x.getName() = "x" |
result =
count(EssaVariable v, NameNode use |
v.getSourceVariable() = x and
use = x.getAUse() and
LocalFlow::defToFirstUse(v, use)
)
)
}
query predicate def_use_edge(EssaVariable v, NameNode use) {
exists(SsaSourceVariable x | x.getName() = "x" |
v.getSourceVariable() = x and
use = x.getAUse() and
LocalFlow::defToFirstUse(v, use)
)
}

View File

@@ -0,0 +1,36 @@
# This test file is inspired by
# `csharp/ql/test/library-tests/dataflow/local/UseUseExplosion.cs`
# but with `n=3` kept small, since we do have the explosion.
cond = ...
# global variables are slightly special,
# so we go into a function scope
def scope():
x = 0
if(cond > 3):
if(cond > 2):
if(cond > 1):
pass
else:
x = 1
else:
x = 2
else:
x = 3
if(cond > 3):
if(cond > 2):
if(cond > 1):
pass
else:
use(x)
else:
use(x)
else:
use(x)
def use(v):
# this could just be `pass` but we do not want it optimized away.
y = v+2

View File

@@ -0,0 +1,24 @@
| class_example.py:0:0:0:0 | Module class_example | class_example.py:1:1:1:3 | ControlFlowNode for wat |
| class_example.py:0:0:0:0 | Module class_example | class_example.py:1:7:1:7 | ControlFlowNode for IntegerLiteral |
| class_example.py:0:0:0:0 | Module class_example | class_example.py:3:1:3:10 | ControlFlowNode for ClassExpr |
| class_example.py:0:0:0:0 | Module class_example | class_example.py:3:7:3:9 | ControlFlowNode for Wat |
| class_example.py:0:0:0:0 | Module class_example | class_example.py:4:5:4:7 | ControlFlowNode for wat |
| class_example.py:0:0:0:0 | Module class_example | class_example.py:4:11:4:11 | ControlFlowNode for IntegerLiteral |
| class_example.py:0:0:0:0 | Module class_example | class_example.py:5:5:5:9 | ControlFlowNode for print |
| class_example.py:0:0:0:0 | Module class_example | class_example.py:5:5:5:26 | ControlFlowNode for print() |
| class_example.py:0:0:0:0 | Module class_example | class_example.py:5:11:5:20 | ControlFlowNode for StringLiteral |
| class_example.py:0:0:0:0 | Module class_example | class_example.py:5:23:5:25 | ControlFlowNode for wat |
| class_example.py:0:0:0:0 | Module class_example | class_example.py:7:1:7:5 | ControlFlowNode for print |
| class_example.py:0:0:0:0 | Module class_example | class_example.py:7:1:7:23 | ControlFlowNode for print() |
| class_example.py:0:0:0:0 | Module class_example | class_example.py:7:7:7:17 | ControlFlowNode for StringLiteral |
| class_example.py:0:0:0:0 | Module class_example | class_example.py:7:20:7:22 | ControlFlowNode for wat |
| generator.py:0:0:0:0 | Module generator | generator.py:1:1:1:23 | ControlFlowNode for FunctionExpr |
| generator.py:0:0:0:0 | Module generator | generator.py:1:5:1:18 | ControlFlowNode for generator_func |
| generator.py:1:1:1:23 | Function generator_func | generator.py:1:20:1:21 | ControlFlowNode for xs |
| generator.py:1:1:1:23 | Function generator_func | generator.py:2:12:2:26 | ControlFlowNode for .0 |
| generator.py:1:1:1:23 | Function generator_func | generator.py:2:12:2:26 | ControlFlowNode for .0 |
| generator.py:1:1:1:23 | Function generator_func | generator.py:2:12:2:26 | ControlFlowNode for ListComp |
| generator.py:1:1:1:23 | Function generator_func | generator.py:2:13:2:13 | ControlFlowNode for Yield |
| generator.py:1:1:1:23 | Function generator_func | generator.py:2:13:2:13 | ControlFlowNode for x |
| generator.py:1:1:1:23 | Function generator_func | generator.py:2:19:2:19 | ControlFlowNode for x |
| generator.py:1:1:1:23 | Function generator_func | generator.py:2:24:2:25 | ControlFlowNode for xs |

View File

@@ -0,0 +1,6 @@
import python
import semmle.python.dataflow.new.DataFlow
from DataFlow::CfgNode node
where exists(node.getLocation().getFile().getRelativePath())
select node.getEnclosingCallable() as enclosingCallable, node

View File

@@ -0,0 +1,7 @@
wat = 1
class Wat:
wat = 2
print("in class", wat) # prints 2
print("in module", wat) # prints 1

View File

@@ -0,0 +1,2 @@
def generator_func(xs):
return [x for x in xs]

View File

@@ -0,0 +1,3 @@
missingAnnotationOnSink
testFailures
failures

View File

@@ -0,0 +1,2 @@
import python
import experimental.dataflow.TestUtil.NormalDataflowTest

View File

@@ -0,0 +1,35 @@
import sys
import os
sys.path.append(os.path.dirname(os.path.dirname((__file__))))
from testlib import expects
# These are defined so that we can evaluate the test code.
NONSOURCE = "not a source"
SOURCE = "source"
def is_source(x):
return x == "source" or x == b"source" or x == 42 or x == 42.0 or x == 42j
def SINK(x):
if is_source(x):
print("OK")
else:
print("Unexpected flow", x)
def SINK_F(x):
if is_source(x):
print("Unexpected flow", x)
else:
print("OK")
def test_as_binding():
try:
e_with_source = Exception()
e_with_source.a = SOURCE
raise e_with_source
except Exception as e:
SINK(e.a) # $ MISSING: flow

View File

@@ -0,0 +1,77 @@
import sys
import os
sys.path.append(os.path.dirname(os.path.dirname((__file__))))
from testlib import expects
# These are defined so that we can evaluate the test code.
NONSOURCE = "not a source"
SOURCE = "source"
def is_source(x):
return x == "source" or x == b"source" or x == 42 or x == 42.0 or x == 42j
def SINK(x):
if is_source(x):
print("OK")
else:
print("Unexpected flow", x)
def SINK_F(x):
if is_source(x):
print("Unexpected flow", x)
else:
print("OK")
def test_as_binding():
try:
e_with_source = Exception()
e_with_source.a = SOURCE
raise e_with_source
except* Exception as eg:
SINK(eg.exceptions[0].a) # $ MISSING: flow
@expects(4)
def test_group():
value_error_with_source = ValueError()
value_error_with_source.a = SOURCE
type_error_without_source = TypeError()
type_error_without_source.a = NONSOURCE
os_error_without_source = OSError()
os_error_without_source.a = NONSOURCE
eg = ExceptionGroup(
"one",
[
type_error_without_source,
ExceptionGroup(
"two",
[type_error_without_source, value_error_with_source]
),
ExceptionGroup(
"three",
[os_error_without_source]
)
]
)
try:
raise eg
except* (TypeError, OSError) as es:
# The matched sub-group, represented by `es` is filtered,
# but the nesting is in place.
SINK_F(es.exceptions[0].a)
SINK_F(es.exceptions[1].exceptions[0].a)
SINK_F(es.exceptions[2].exceptions[0].a)
except* ValueError as es:
# The matched sub-group, represented by `es` is filtered,
# but the nesting is in place.
# So the ValueError was originally found in
# `eg.exceptions[1].exceptions[1].a`
# but now it is found in
# `es.exceptions[0].exceptions[0].a`
SINK(es.exceptions[0].exceptions[0].a) # $ MISSING: flow

View File

@@ -0,0 +1,3 @@
missingAnnotationOnSink
testFailures
failures

View File

@@ -0,0 +1,2 @@
import python
import experimental.dataflow.TestUtil.NormalDataflowTest

View File

@@ -0,0 +1,2 @@
testFailures
failures

View File

@@ -0,0 +1,14 @@
import python
import experimental.dataflow.TestUtil.UnresolvedCalls
private import semmle.python.dataflow.new.DataFlow
module IgnoreDictMethod implements UnresolvedCallExpectationsSig {
predicate unresolvedCall(CallNode call) {
DefaultUnresolvedCallExpectations::unresolvedCall(call) and
not any(DataFlow::MethodCallNode methodCall |
methodCall.getMethodName() in ["get", "setdefault"]
).asCfgNode() = call
}
}
import MakeUnresolvedCallExpectations<IgnoreDictMethod>

View File

@@ -0,0 +1 @@
semmle-extractor-options: --max-import-depth=3

View File

@@ -0,0 +1,618 @@
import sys
import os
sys.path.append(os.path.dirname(os.path.dirname((__file__)))) # $ unresolved_call=sys.path.append(..)
from testlib import expects
# These are defined so that we can evaluate the test code.
NONSOURCE = "not a source"
SOURCE = "source"
def is_source(x):
return x == "source" or x == b"source" or x == 42 or x == 42.0 or x == 42j
def SINK(x, *, not_present_at_runtime=False):
# not_present_at_runtime supports use-cases where we want flow from data-flow layer
# (so we want to use SINK), but we end up in a siaution where it's not possible to
# actually get flow from a source at runtime. The only use-case is for the
# cross-talk tests, where our ability to use if-then-else is limited because doing
# so would make cfg-splitting kick in, and that would solve the problem trivially
# (by the splitting).
if not_present_at_runtime:
print("OK")
return
if is_source(x):
print("OK")
else:
print("Unexpected flow", x)
def SINK_F(x):
if is_source(x):
print("Unexpected flow", x)
else:
print("OK")
# ------------------------------------------------------------------------------
# Actual tests
# ------------------------------------------------------------------------------
class MyObj(object):
def __init__(self, foo):
self.foo = foo
def setFoo(self, foo):
self.foo = foo
def setFoo(obj, x):
obj.foo = x
@expects(3) # $ unresolved_call=expects(..) unresolved_call=expects(..)(..)
def test_indirect_assign():
myobj = MyObj(NONSOURCE)
SINK_F(myobj.foo)
setFoo(myobj, SOURCE)
SINK(myobj.foo) # $ flow="SOURCE, l:-1 -> myobj.foo"
setFoo(myobj, NONSOURCE)
SINK_F(myobj.foo) # $ SPURIOUS: flow="SOURCE, l:-4 -> myobj.foo"
@expects(3) # $ unresolved_call=expects(..) unresolved_call=expects(..)(..)
def test_indirect_assign_method():
myobj = MyObj(NONSOURCE)
SINK_F(myobj.foo)
myobj.setFoo(SOURCE)
SINK(myobj.foo) # $ flow="SOURCE, l:-1 -> myobj.foo"
myobj.setFoo(NONSOURCE)
SINK_F(myobj.foo) # $ SPURIOUS: flow="SOURCE, l:-4 -> myobj.foo"
@expects(3) # $ unresolved_call=expects(..) unresolved_call=expects(..)(..)
def test_indirect_assign_bound_method():
myobj = MyObj(NONSOURCE)
SINK_F(myobj.foo)
sf = myobj.setFoo
sf(SOURCE)
SINK(myobj.foo) # $ flow="SOURCE, l:-1 -> myobj.foo"
sf(NONSOURCE)
SINK_F(myobj.foo) # $ SPURIOUS: flow="SOURCE, l:-4 -> myobj.foo"
@expects(3) # $ unresolved_call=expects(..) unresolved_call=expects(..)(..)
def test_direct_assign():
myobj = MyObj(NONSOURCE)
SINK_F(myobj.foo)
myobj.foo = SOURCE
SINK(myobj.foo) # $ flow="SOURCE, l:-1 -> myobj.foo"
myobj.foo = NONSOURCE
SINK_F(myobj.foo)
def test_direct_if_assign(cond = False):
myobj = MyObj(NONSOURCE)
myobj.foo = SOURCE
if cond:
myobj.foo = NONSOURCE
SINK_F(myobj.foo)
SINK(myobj.foo) # $ flow="SOURCE, l:-4 -> myobj.foo"
@expects(2) # $ unresolved_call=expects(..) unresolved_call=expects(..)(..)
def test_direct_if_always_assign(cond = True):
myobj = MyObj(NONSOURCE)
myobj.foo = SOURCE
if cond:
myobj.foo = NONSOURCE
SINK_F(myobj.foo)
else:
myobj.foo = NONSOURCE
SINK_F(myobj.foo)
SINK_F(myobj.foo)
def test_getattr():
myobj = MyObj(NONSOURCE)
myobj.foo = SOURCE
SINK(getattr(myobj, "foo")) # $ flow="SOURCE, l:-1 -> getattr(..)"
def test_setattr():
myobj = MyObj(NONSOURCE)
setattr(myobj, "foo", SOURCE)
SINK(myobj.foo) # $ flow="SOURCE, l:-1 -> myobj.foo"
def test_setattr_getattr():
myobj = MyObj(NONSOURCE)
setattr(myobj, "foo", SOURCE)
SINK(getattr(myobj, "foo")) # $ flow="SOURCE, l:-1 -> getattr(..)"
def test_setattr_getattr_overwrite():
myobj = MyObj(NONSOURCE)
setattr(myobj, "foo", SOURCE)
setattr(myobj, "foo", NONSOURCE)
SINK_F(getattr(myobj, "foo"))
def test_constructor_assign():
obj = MyObj(SOURCE)
SINK(obj.foo) # $ flow="SOURCE, l:-1 -> obj.foo"
def test_constructor_assign_kw():
obj = MyObj(foo=SOURCE)
SINK(obj.foo) # $ flow="SOURCE, l:-1 -> obj.foo"
def fields_with_local_flow(x):
obj = MyObj(x)
a = obj.foo
return a
def test_fields():
SINK(fields_with_local_flow(SOURCE)) # $ flow="SOURCE -> fields_with_local_flow(..)"
def call_with_source(func):
func(SOURCE)
def test_bound_method_passed_as_arg():
myobj = MyObj(NONSOURCE)
call_with_source(myobj.setFoo)
SINK(myobj.foo) # $ MISSING: flow="SOURCE, l:-5 -> foo.x"
# ------------------------------------------------------------------------------
# Nested Object
# ------------------------------------------------------------------------------
class NestedObj(object):
def __init__(self):
self.obj = MyObj("OK")
def getObj(self):
return self.obj
def test_nested_obj():
x = SOURCE
a = NestedObj()
a.obj.foo = x
SINK(a.obj.foo) # $ flow="SOURCE, l:-3 -> a.obj.foo"
def test_nested_obj_method():
x = SOURCE
a = NestedObj()
a.getObj().foo = x
SINK(a.obj.foo) # $ flow="SOURCE, l:-3 -> a.obj.foo"
# ------------------------------------------------------------------------------
# Field access on compound arguments
# ------------------------------------------------------------------------------
# TODO: Add support for this, see https://github.com/github/codeql/pull/10444
@expects(5) # $ unresolved_call=expects(..) unresolved_call=expects(..)(..)
def test_field_on_compound_arg(cond_true=True, cond_false=False):
class Ex:
def __init__(self):
self.attr = None
def set_attr(obj):
obj.attr = SOURCE
x = Ex()
y = Ex()
set_attr(x if cond_true else y)
SINK(x.attr) # $ MISSING: flow
x = Ex()
y = Ex()
set_attr(x if cond_false else y)
SINK(y.attr) # $ MISSING: flow
x = Ex()
y = Ex()
z = Ex()
set_attr(x if cond_false else (y if cond_true else z))
SINK_F(x.attr) # $ MISSING: flow
SINK(y.attr) # $ MISSING: flow
SINK_F(z.attr) # $ MISSING: flow
# ------------------------------------------------------------------------------
# Content in class attribute
# ------------------------------------------------------------------------------
class WithTuple:
my_tuple = (SOURCE, NONSOURCE)
def test_inst(self):
SINK(self.my_tuple[0]) # $ MISSING: flow
SINK_F(self.my_tuple[1])
def test_inst_no_call(self):
SINK(self.my_tuple[0]) # $ MISSING: flow
SINK_F(self.my_tuple[1])
@classmethod
def test_cm(cls):
SINK(cls.my_tuple[0]) # $ flow="SOURCE, l:-12 -> cls.my_tuple[0]"
SINK_F(cls.my_tuple[1])
@classmethod
def test_cm_no_call(cls):
SINK(cls.my_tuple[0]) # $ MISSING: flow="SOURCE, l:-8 -> cls.my_tuple[0]"
SINK_F(cls.my_tuple[1])
@expects(2*4) # $ unresolved_call=expects(..) unresolved_call=expects(..)(..)
def test_WithTuple():
SINK(WithTuple.my_tuple[0]) # $ flow="SOURCE, l:-23 -> WithTuple.my_tuple[0]"
SINK_F(WithTuple.my_tuple[1])
WithTuple.test_cm()
inst = WithTuple()
inst.test_inst()
SINK(inst.my_tuple[0]) # $ MISSING: flow
SINK_F(inst.my_tuple[1])
@expects(4) # $ unresolved_call=expects(..) unresolved_call=expects(..)(..)
def test_inst_override():
inst = WithTuple()
# setting attribute on instance does not override class attribute, it's only on the
# instance!
inst.my_tuple = (NONSOURCE, SOURCE)
SINK_F(inst.my_tuple[0])
SINK(inst.my_tuple[1]) # $ flow="SOURCE, l:-3 -> inst.my_tuple[1]"
SINK(WithTuple.my_tuple[0]) # $ flow="SOURCE, l:-46 -> WithTuple.my_tuple[0]"
SINK_F(WithTuple.my_tuple[1])
class WithTuple2:
my_tuple = (NONSOURCE,)
def set_to_source():
WithTuple2.my_tuple = (SOURCE,)
@expects(4) # $ unresolved_call=expects(..) unresolved_call=expects(..)(..)
def test_global_flow_to_class_attribute():
inst = WithTuple2()
SINK_F(WithTuple2.my_tuple[0])
SINK_F(inst.my_tuple[0])
set_to_source()
SINK(WithTuple2.my_tuple[0]) # $ MISSING: flow="SOURCE, l:-10 -> WithTuple2.my_tuple[0]"
SINK(inst.my_tuple[0]) # $ MISSING: flow="SOURCE, l:-11 -> inst.my_tuple[0]"
class Outer:
src = SOURCE
class Inner:
src = SOURCE
@expects(2) # $ unresolved_call=expects(..) unresolved_call=expects(..)(..)
def test_nested_class():
SINK(Outer.src) # $ flow="SOURCE, l:-6 -> Outer.src"
SINK(Outer.Inner.src) # $ flow="SOURCE, l:-5 -> Outer.Inner.src"
# --------------------------------------
# unique classes from functions
# --------------------------------------
def make_class():
# a fresh class is returned each time this function is called
class C:
my_tuple = (NONSOURCE,)
return C
@expects(8) # $ unresolved_call=expects(..) unresolved_call=expects(..)(..)
def test_unique_class():
# This test highlights that if we use the _ClassExpr_ itself as the target/source
# for jumpsteps, we will end up with spurious flow (that is, we will think that
# x_cls and y_cls are the same, so by updating .my_tuple on x_cls we might propagate
# that to y_cls as well -- it might not matter too much in reality, but certainly an
# interesting corner case)
x_cls = make_class()
y_cls = make_class()
assert x_cls != y_cls
x_inst = x_cls()
y_inst = y_cls()
SINK_F(x_cls.my_tuple[0])
SINK_F(x_inst.my_tuple[0])
SINK_F(y_cls.my_tuple[0])
SINK_F(y_inst.my_tuple[0])
x_cls.my_tuple = (SOURCE,)
SINK(x_cls.my_tuple[0]) # $ flow="SOURCE, l:-1 -> x_cls.my_tuple[0]"
SINK(x_inst.my_tuple[0]) # $ MISSING: flow="SOURCE, l:-2 -> x_inst.my_tuple[0]"
SINK_F(y_cls.my_tuple[0])
SINK_F(y_inst.my_tuple[0])
# ------------------------------------------------------------------------------
# Crosstalk test -- using different function based on conditional
# ------------------------------------------------------------------------------
# NOTE: These tests use `SINK(objy.y, not_present_at_runtime=True)` since it's not
# possible to use if-then-else statements, since that would make cfg-splitting kick in,
# and that would solve the problem trivially (by the splitting).
class CrosstalkTestX:
def __init__(self):
self.x = None
self.y = None
def setx(self, value):
self.x = value
def setvalue(self, value):
self.x = value
def do_nothing(self, value):
pass
class CrosstalkTestY:
def __init__(self):
self.x = None
self.y = None
def sety(self ,value):
self.y = value
def setvalue(self, value):
self.y = value
@expects(8) # $ unresolved_call=expects(..) unresolved_call=expects(..)(..)
def test_no_crosstalk_reference(cond=True):
objx = CrosstalkTestX()
SINK_F(objx.x)
SINK_F(objx.y)
objy = CrosstalkTestY()
SINK_F(objy.x)
SINK_F(objy.y)
if cond:
objx.setvalue(SOURCE)
else:
objy.setvalue(SOURCE)
SINK(objx.x) # $ flow="SOURCE, l:-4 -> objx.x"
SINK_F(objx.y)
SINK_F(objy.x)
SINK(objy.y, not_present_at_runtime=True) # $ flow="SOURCE, l:-5 -> objy.y"
@expects(8) # $ unresolved_call=expects(..) unresolved_call=expects(..)(..)
def test_potential_crosstalk_different_name(cond=True):
objx = CrosstalkTestX()
SINK_F(objx.x)
SINK_F(objx.y)
objy = CrosstalkTestY()
SINK_F(objy.x)
SINK_F(objy.y)
if cond:
func = objx.setx
else:
func = objy.sety
func(SOURCE)
SINK(objx.x) # $ flow="SOURCE, l:-2 -> objx.x"
SINK_F(objx.y)
SINK_F(objy.x)
SINK(objy.y, not_present_at_runtime=True) # $ flow="SOURCE, l:-5 -> objy.y"
@expects(8) # $ unresolved_call=expects(..) unresolved_call=expects(..)(..)
def test_potential_crosstalk_same_name(cond=True):
objx = CrosstalkTestX()
SINK_F(objx.x)
SINK_F(objx.y)
objy = CrosstalkTestY()
SINK_F(objy.x)
SINK_F(objy.y)
if cond:
func = objx.setvalue
else:
func = objy.setvalue
func(SOURCE)
SINK(objx.x) # $ flow="SOURCE, l:-2 -> objx.x"
SINK_F(objx.y)
SINK_F(objy.x)
SINK(objy.y, not_present_at_runtime=True) # $ flow="SOURCE, l:-5 -> objy.y"
@expects(10) # $ unresolved_call=expects(..) unresolved_call=expects(..)(..)
def test_potential_crosstalk_same_name_object_reference(cond=True):
objx = CrosstalkTestX()
SINK_F(objx.x)
SINK_F(objx.y)
objy = CrosstalkTestY()
SINK_F(objy.x)
SINK_F(objy.y)
if cond:
obj = objx
else:
obj = objy
obj.setvalue(SOURCE)
SINK(objx.x) # $ MISSING: flow="SOURCE, l:-2 -> objx.x"
SINK_F(objx.y)
SINK_F(objy.x)
SINK(objy.y, not_present_at_runtime=True) # $ MISSING: flow="SOURCE, l:-5 -> objy.y"
SINK(obj.x) # $ flow="SOURCE, l:-7 -> obj.x"
SINK(obj.y, not_present_at_runtime=True) # $ flow="SOURCE, l:-8 -> obj.y"
@expects(4) # $ unresolved_call=expects(..) unresolved_call=expects(..)(..)
def test_potential_crosstalk_same_class(cond=True):
objx1 = CrosstalkTestX()
SINK_F(objx1.x)
objx2 = CrosstalkTestX()
SINK_F(objx2.x)
if cond:
func = objx1.setvalue
else:
func = objx2.do_nothing
# We want to ensure that objx2.x does not end up getting tainted, since that would
# be cross-talk between the self arguments are their functions.
func(SOURCE)
SINK(objx1.x) # $ flow="SOURCE, l:-2 -> objx1.x"
SINK_F(objx2.x)
class NewTest(object):
def __new__(cls, arg):
cls.foo = arg
return super().__new__(cls) # $ unresolved_call=super().__new__(..)
@expects(4) # $ unresolved_call=expects(..) unresolved_call=expects(..)(..)
def test__new__():
# we want to make sure that we DON'T pass the synthetic pre-update node for
# the class instance to __new__, like we do for __init__.
nt = NewTest(SOURCE)
# the __new__ implementation sets the foo attribute on THE CLASS itself. The
# attribute lookup on the class instance will go to the class itself when the
# attribute isn't defined on the class instance, so we will actually see `nt.foo`
# contain the source, but the point of this test is that we should see identical
# behavior between NewTest.foo and nt.foo, which we dont!
#
# Also note that we currently (October 2022) dont' model writes to classes very
# well.
SINK(NewTest.foo) # $ MISSING: flow="SOURCE, l:-10 -> NewTest.foo"
SINK(nt.foo) # $ MISSING: flow="SOURCE, l:-11 -> nt.foo"
NewTest.foo = NONSOURCE
SINK_F(NewTest.foo)
SINK_F(nt.foo)
# ------------------------------------------------------------------------------
# Global scope
# ------------------------------------------------------------------------------
# since these are defined on global scope, and we still want to run them with
# `validTest.py`, we have them defined in a different file, and have hardcoded this
# number that reflects how many OK we expect to see ... Not an ideal solution, but at
# least we know that the tests are actually valid.
#
# Notice that since the tests are run in a random order, we cannot split the global
# scope tests into multiple functions, since we wouldn't know which one did the initial
# import that does all the printing :|
@expects(18 + 2) # $ unresolved_call=expects(..) unresolved_call=expects(..)(..)
def test_global_scope():
import fieldflow.test_global
fieldflow.test_global.func_defined_before() # $ unresolved_call=fieldflow.test_global.func_defined_before()
fieldflow.test_global.func_defined_after() # $ unresolved_call=fieldflow.test_global.func_defined_after()
# ------------------------------------------------------------------------------
# Global flow cases that doesn't work in this file, but works in test_global.py
# ------------------------------------------------------------------------------
# --------------------------------------
# method calls _before_ those ifs
# --------------------------------------
# def test_indirect_assign_method():
myobj2 = MyObj("OK")
myobj2.setFoo(SOURCE)
SINK(myobj2.foo) # $ flow="SOURCE, l:-1 -> myobj2.foo"
# def test_nested_obj_method():
x2 = SOURCE
a2 = NestedObj()
a2.getObj().foo = x2
SINK(a2.obj.foo) # $ flow="SOURCE, l:-3 -> a2.obj.foo"
# --------------------------------------
# using constructor
# --------------------------------------
# def test_constructor_assign():
obj2 = MyObj(SOURCE)
SINK(obj2.foo) # $ flow="SOURCE, l:-1 -> obj2.foo"
# apparently these if statements below makes a difference :O
# but one is not enough
cond = os.urandom(1)[0] > 128 # $ unresolved_call=os.urandom(..)
if cond:
pass
# def test_constructor_assign():
obj2 = MyObj(SOURCE)
SINK(obj2.foo) # $ flow="SOURCE, l:-1 -> obj2.foo"
if cond:
pass
# def test_constructor_assign():
obj2 = MyObj(SOURCE)
SINK(obj2.foo) # $ flow="SOURCE, l:-1 -> obj2.foo"
# def test_constructor_assign_kw():
obj3 = MyObj(foo=SOURCE)
SINK(obj3.foo) # $ flow="SOURCE, l:-1 -> obj3.foo"
# def test_fields():
SINK(fields_with_local_flow(SOURCE)) # $ flow="SOURCE -> fields_with_local_flow(..)"
# --------------------------------------
# method calls _after_ those ifs
# --------------------------------------
# def test_indirect_assign_method():
myobj2 = MyObj("OK")
myobj2.setFoo(SOURCE)
SINK(myobj2.foo) # $ flow="SOURCE, l:-1 -> myobj2.foo"
# def test_nested_obj_method():
x2 = SOURCE
a2 = NestedObj()
a2.getObj().foo = x2
SINK(a2.obj.foo) # $ flow="SOURCE, l:-3 -> a2.obj.foo"

View File

@@ -0,0 +1,80 @@
import sys
import os
sys.path.append(os.path.dirname(os.path.dirname((__file__)))) # $ unresolved_call=sys.path.append(..)
from testlib import expects
# These are defined so that we can evaluate the test code.
NONSOURCE = "not a source"
SOURCE = "source"
def is_source(x):
return x == "source" or x == b"source" or x == 42 or x == 42.0 or x == 42j
def SINK(x):
if is_source(x):
print("OK")
else:
print("Unexpected flow", x)
def SINK_F(x):
if is_source(x):
print("Unexpected flow", x)
else:
print("OK")
# ------------------------------------------------------------------------------
# Actual tests
# ------------------------------------------------------------------------------
@expects(2) # $ unresolved_call=expects(..) unresolved_call=expects(..)(..)
def test_dict_literal():
d = {"key": SOURCE}
SINK(d["key"]) # $ flow="SOURCE, l:-1 -> d['key']"
SINK(d.get("key")) # $ flow="SOURCE, l:-2 -> d.get(..)"
@expects(2) # $ unresolved_call=expects(..) unresolved_call=expects(..)(..)
def test_dict_update():
d = {}
d["key"] = SOURCE
SINK(d["key"]) # $ flow="SOURCE, l:-1 -> d['key']"
SINK(d.get("key")) # $ flow="SOURCE, l:-2 -> d.get(..)"
def test_dict_update_fresh_key():
# we had a regression where we did not create a dictionary element content
# for keys used in "inline update" like this
d = {}
d["fresh_key"] = SOURCE
SINK(d["fresh_key"]) # $ flow="SOURCE, l:-1 -> d['fresh_key']"
@expects(3) # $ unresolved_call=expects(..) unresolved_call=expects(..)(..)
def test_dict_setdefault():
d = {}
x = d.setdefault("key", SOURCE)
SINK(x) # $ flow="SOURCE, l:-1 -> x"
SINK(d["key"]) # $ flow="SOURCE, l:-2 -> d['key']"
SINK(d.setdefault("key", NONSOURCE)) # $ flow="SOURCE, l:-3 -> d.setdefault(..)"
@expects(2) # $ unresolved_call=expects(..) unresolved_call=expects(..)(..)
def test_dict_override():
d = {}
d["key"] = SOURCE
SINK(d["key"]) # $ flow="SOURCE, l:-1 -> d['key']"
d["key"] = NONSOURCE
SINK_F(d["key"])
@expects(3) # $ unresolved_call=expects(..) unresolved_call=expects(..)(..)
def test_dict_nonstring_key():
d = {}
d[42] = SOURCE
SINK(d[42]) # $ MISSING: flow
SINK(d.get(42)) # $ MISSING: flow
SINK(d.setdefault(42, NONSOURCE)) # $ MISSING: flow

View File

@@ -0,0 +1,179 @@
"""
This file contains a copy of the tests from `test.py` along with some cases that check
the interaction between global variables and assignment on global scope.
You might think that these are a bit useless since field-flow should work just the same
on global or non-global scope, but then you would be wrong!
"""
# These are defined so that we can evaluate the test code.
NONSOURCE = "not a source"
SOURCE = "source"
def is_source(x):
return x == "source" or x == b"source" or x == 42 or x == 42.0 or x == 42j
def SINK(x):
if is_source(x):
print("OK")
else:
print("Unexpected flow", x)
def SINK_F(x):
if is_source(x):
print("Unexpected flow", x)
else:
print("OK")
# ------------------------------------------------------------------------------
# Actual tests
# ------------------------------------------------------------------------------
class MyObj(object):
def __init__(self, foo):
self.foo = foo
def setFoo(self, foo):
self.foo = foo
def setFoo(obj, x):
SINK_F(obj.foo)
obj.foo = x
# def test_indirect_assign():
myobj1 = MyObj("OK")
setFoo(myobj1, SOURCE)
SINK(myobj1.foo) # $ flow="SOURCE, l:-1 -> myobj1.foo"
# def test_indirect_assign_method():
myobj2 = MyObj("OK")
myobj2.setFoo(SOURCE)
SINK(myobj2.foo) # $ flow="SOURCE, l:-1 -> myobj2.foo"
# def test_direct_assign():
myobj3 = MyObj(NONSOURCE)
myobj3.foo = SOURCE
SINK(myobj3.foo) # $ flow="SOURCE, l:-1 -> myobj3.foo"
# def test_direct_assign_overwrite():
myobj4 = MyObj(NONSOURCE)
myobj4.foo = SOURCE
myobj4.foo = NONSOURCE
SINK_F(myobj4.foo)
# def test_direct_if_assign(cond = False):
# this way, our analysis isn't able to understand that `cond` is just False,
# and therefore isn't able to determine that the if below will not hold.
cond = eval("False")
myobj5 = MyObj(NONSOURCE)
myobj5.foo = SOURCE
if cond:
myobj5.foo = NONSOURCE
SINK_F(myobj5.foo)
# SPLITTING happens here, so in one version there is flow, and in the other there isn't
# that's why it has both a flow and a MISSING: flow annotation
SINK(myobj5.foo) # $ flow="SOURCE, l:-6 -> myobj5.foo" MISSING: flow
# def test_direct_if_always_assign(cond = True):
myobj6 = MyObj(NONSOURCE)
myobj6.foo = SOURCE
if cond:
myobj6.foo = NONSOURCE
SINK_F(myobj6.foo)
else:
myobj6.foo = NONSOURCE
SINK_F(myobj6.foo)
SINK_F(myobj6.foo)
# def test_getattr():
myobj7 = MyObj(NONSOURCE)
myobj7.foo = SOURCE
SINK(getattr(myobj7, "foo")) # $ flow="SOURCE, l:-1 -> getattr(..)"
# def test_setattr():
myobj8 = MyObj(NONSOURCE)
setattr(myobj8, "foo", SOURCE)
SINK(myobj8.foo) # $ flow="SOURCE, l:-1 -> myobj8.foo"
# def test_setattr_getattr():
myobj9 = MyObj(NONSOURCE)
setattr(myobj9, "foo", SOURCE)
SINK(getattr(myobj9, "foo")) # $ flow="SOURCE, l:-1 -> getattr(..)"
# def test_setattr_getattr_overwrite():
myobj10 = MyObj(NONSOURCE)
setattr(myobj10, "foo", SOURCE)
setattr(myobj10, "foo", NONSOURCE)
SINK_F(getattr(myobj10, "foo"))
# def test_constructor_assign():
obj2 = MyObj(SOURCE)
SINK(obj2.foo) # $ flow="SOURCE, l:-1 -> obj2.foo"
# def test_constructor_assign_kw():
obj3 = MyObj(foo=SOURCE)
SINK(obj3.foo) # $ flow="SOURCE, l:-1 -> obj3.foo"
def fields_with_local_flow(x):
obj0 = MyObj(x)
a0 = obj0.foo
return a0
# def test_fields():
SINK(fields_with_local_flow(SOURCE)) # $ flow="SOURCE -> fields_with_local_flow(..)"
# ------------------------------------------------------------------------------
# Nested Object
# ------------------------------------------------------------------------------
class NestedObj(object):
def __init__(self):
self.obj = MyObj("OK")
def getObj(self):
return self.obj
# def test_nested_obj():
x1 = SOURCE
a1 = NestedObj()
a1.obj.foo = x1
SINK(a1.obj.foo) # $ flow="SOURCE, l:-3 -> a1.obj.foo"
# def test_nested_obj_method():
x2 = SOURCE
a2 = NestedObj()
a2.getObj().foo = x2
SINK(a2.obj.foo) # $ flow="SOURCE, l:-3 -> a2.obj.foo"
# ------------------------------------------------------------------------------
# Global scope interaction
# ------------------------------------------------------------------------------
def func_defined_before():
SINK(global_obj.foo) # $ MISSING: flow="SOURCE, l:+3 -> global_obj.foo"
global_obj = MyObj(NONSOURCE)
global_obj.foo = SOURCE
SINK(global_obj.foo) # $ flow="SOURCE, l:-1 -> global_obj.foo"
def func_defined_after():
SINK(global_obj.foo) # $ MISSING: flow="SOURCE, l:-4 -> global_obj.foo"

View File

@@ -0,0 +1,2 @@
testFailures
failures

View File

@@ -0,0 +1,34 @@
import python
import semmle.python.dataflow.new.DataFlow
import TestUtilities.InlineExpectationsTest
module GlobalReadTest implements TestSig {
string getARelevantTag() { result = "reads" }
predicate hasActualResult(Location location, string element, string tag, string value) {
exists(DataFlow::ModuleVariableNode n, DataFlow::Node read |
read = n.getARead() and
value = n.getVariable().getId() and
value != "print" and
tag = "reads" and
location = read.getLocation() and
element = read.toString()
)
}
}
module GlobalWriteTest implements TestSig {
string getARelevantTag() { result = "writes" }
predicate hasActualResult(Location location, string element, string tag, string value) {
exists(DataFlow::ModuleVariableNode n, DataFlow::Node read |
read = n.getAWrite() and
value = n.getVariable().getId() and
tag = "writes" and
location = read.getLocation() and
element = read.toString()
)
}
}
import MakeTest<MergeTests<GlobalReadTest, GlobalWriteTest>>

View File

@@ -0,0 +1 @@
known_attr = [1000] #$ writes=known_attr

View File

@@ -0,0 +1,121 @@
### Tests of global flow
# Simple assignment
g = [5] # $writes=g
# Multiple assignment
g1, g2 = [6], [7] # $writes=g1 writes=g2
# Assignment that's only referenced in this scope. This one will not give rise to a `ModuleVariableNode`.
unreferenced_g = [8]
print(unreferenced_g)
# Testing modifications of globals
# Modification by reassignment
g_mod = []
# This assignment does not produce any flow, since `g_mod` is immediately reassigned.
# The following assignment should not be a `ModuleVariableNode`,
# but currently our analysis thinks `g_mod` might be used in the `print` call
g_mod = [10] # $ SPURIOUS: writes=g_mod
print("foo")
g_mod = [100] # $writes=g_mod
# Modification by mutation
g_ins = [50] # $writes=g_ins
print(g_ins)
g_ins.append(75)
# A global with multiple potential definitions
import unknown_module
if unknown_module.attr:
g_mult = [200] # $writes=g_mult
else:
g_mult = [300] # $writes=g_mult
# A global variable that may be redefined depending on some unknown value
g_redef = [400] # $writes=g_redef
if unknown_module.attr:
g_redef = [500] # $writes=g_redef
def global_access():
l = 5
print(g) # $reads=g
print(g1) # $reads=g1
print(g2) # $reads=g2
print(g_mod) # $reads=g_mod
print(g_ins) # $reads=g_ins
print(g_mult) # $reads=g_mult
print(g_redef) # $reads=g_redef
def print_g_mod(): # $writes=print_g_mod
print(g_mod) # $reads=g_mod
def global_mod():
global g_mod
g_mod += [150] # $reads,writes=g_mod
print_g_mod() # $reads=print_g_mod
def global_inside_local_function():
def local_function():
print(g) # $reads=g
local_function()
## Imports
# Direct imports
import foo_module # $writes=foo_module
def use_foo():
print(foo_module.attr) # $reads=foo_module
# Partial imports
from bar import baz_attr, quux_attr # $writes=baz_attr writes=quux_attr
def use_partial_import():
print(baz_attr, quux_attr) # $reads=baz_attr reads=quux_attr
# Aliased imports
from spam_module import ham_attr as eggs_attr # $writes=eggs_attr
def use_aliased_import():
print(eggs_attr) # $reads=eggs_attr
# Import star (unlikely to work unless we happen to extract/model the referenced module)
# Unknown modules
from unknown import *
def secretly_use_unknown():
print(unknown_attr) # $reads=unknown_attr
# Known modules
from known import *
def secretly_use_known():
print(known_attr) # $reads=known_attr
# Local import in function
def imports_locally():
import mod1
# Global import hidden in function
def imports_stuff():
global mod2
import mod2 # $writes=mod2

View File

@@ -0,0 +1,2 @@
from trois import *
print(foo)

View File

@@ -0,0 +1,22 @@
| test3.py:1:17:1:19 | ControlFlowNode for ImportMember | test3.py:1:17:1:19 | ControlFlowNode for foo |
| test3.py:1:17:1:19 | ControlFlowNode for ImportMember | test3.py:2:7:2:9 | ControlFlowNode for foo |
| test3.py:1:17:1:19 | ControlFlowNode for foo | test3.py:2:7:2:9 | ControlFlowNode for foo |
| three.py:1:1:1:3 | ControlFlowNode for foo | test1.py:2:7:2:9 | ControlFlowNode for foo |
| three.py:1:1:1:3 | ControlFlowNode for foo | test3.py:1:17:1:19 | ControlFlowNode for ImportMember |
| three.py:1:1:1:3 | ControlFlowNode for foo | test3.py:1:17:1:19 | ControlFlowNode for foo |
| three.py:1:1:1:3 | ControlFlowNode for foo | test3.py:2:7:2:9 | ControlFlowNode for foo |
| three.py:1:1:1:3 | ControlFlowNode for foo | two.py:2:7:2:9 | ControlFlowNode for foo |
| three.py:1:7:1:7 | ControlFlowNode for IntegerLiteral | test1.py:2:7:2:9 | ControlFlowNode for foo |
| three.py:1:7:1:7 | ControlFlowNode for IntegerLiteral | test3.py:1:17:1:19 | ControlFlowNode for ImportMember |
| three.py:1:7:1:7 | ControlFlowNode for IntegerLiteral | test3.py:1:17:1:19 | ControlFlowNode for foo |
| three.py:1:7:1:7 | ControlFlowNode for IntegerLiteral | test3.py:2:7:2:9 | ControlFlowNode for foo |
| three.py:1:7:1:7 | ControlFlowNode for IntegerLiteral | three.py:1:1:1:3 | ControlFlowNode for foo |
| three.py:1:7:1:7 | ControlFlowNode for IntegerLiteral | two.py:2:7:2:9 | ControlFlowNode for foo |
| trois.py:1:1:1:3 | ControlFlowNode for foo | deux.py:2:7:2:9 | ControlFlowNode for foo |
| trois.py:1:1:1:3 | ControlFlowNode for foo | test2.py:2:7:2:9 | ControlFlowNode for foo |
| trois.py:1:7:1:7 | ControlFlowNode for IntegerLiteral | deux.py:2:7:2:9 | ControlFlowNode for foo |
| trois.py:1:7:1:7 | ControlFlowNode for IntegerLiteral | test2.py:2:7:2:9 | ControlFlowNode for foo |
| trois.py:1:7:1:7 | ControlFlowNode for IntegerLiteral | trois.py:1:1:1:3 | ControlFlowNode for foo |
| two.py:2:7:2:9 | ControlFlowNode for foo | test3.py:1:17:1:19 | ControlFlowNode for ImportMember |
| two.py:2:7:2:9 | ControlFlowNode for foo | test3.py:1:17:1:19 | ControlFlowNode for foo |
| two.py:2:7:2:9 | ControlFlowNode for foo | test3.py:2:7:2:9 | ControlFlowNode for foo |

View File

@@ -0,0 +1,19 @@
import semmle.python.dataflow.new.DataFlow
/**
* A configuration to find all flows.
* To be used on tiny programs.
*/
module AllFlowsConfig implements DataFlow::ConfigSig {
predicate isSource(DataFlow::Node node) { any() }
predicate isSink(DataFlow::Node node) { any() }
}
module AllFlowsFlow = DataFlow::Global<AllFlowsConfig>;
from DataFlow::CfgNode source, DataFlow::CfgNode sink
where
source != sink and
AllFlowsFlow::flow(source, sink)
select source, sink

View File

@@ -0,0 +1 @@
from two import *

View File

@@ -0,0 +1,2 @@
from one import *
print(foo)

View File

@@ -0,0 +1,2 @@
from un import *
print(foo)

Some files were not shown because too many files have changed in this diff Show More