mirror of
https://github.com/github/codeql.git
synced 2025-12-20 18:56:32 +01:00
This commit removes SSA nodes from the data flow graph. Specifically, for a definition and use such as ```python x = expr y = x + 2 ``` we used to have flow from `expr` to an SSA variable representing x and from that SSA variable to the use of `x` in the definition of `y`. Now we instead have flow from `expr` to the control flow node for `x` at line 1 and from there to the control flow node for `x` at line 2. Specific changes: - `EssaNode` from the data flow layer no longer exists. - Several glue steps between `EssaNode`s and `CfgNode`s have been deleted. - Entry nodes are now admitted as `CfgNodes` in the data flow layer (they were filtered out before). - Entry nodes now have a new `toString` taking into account that the module name may be ambigous. - Some tests have been rewritten to accomodate the changes, but only `python/ql/test/experimental/dataflow/basic/maximalFlowsConfig.qll` should have semantic changes. - Comments have been updated - Test output has been updated, but apart from `python/ql/test/experimental/dataflow/basic/maximalFlows.expected` only `python/ql/test/experimental/dataflow/typetracking-summaries/summaries.py` should have a semantic change. This is a bonus fix, probably meaning that something was never connected up correctly.
146 lines
5.0 KiB
Plaintext
146 lines
5.0 KiB
Plaintext
import python
|
|
import semmle.python.dataflow.new.DataFlow
|
|
private import semmle.python.dataflow.new.internal.DataFlowPrivate as DataFlowPrivate
|
|
import experimental.dataflow.TestUtil.RoutingTest
|
|
|
|
module Argument1RoutingTest implements RoutingTestSig {
|
|
class Argument = Unit;
|
|
|
|
string flowTag(Argument arg) { result = "arg1" and exists(arg) }
|
|
|
|
predicate relevantFlow(DataFlow::Node source, DataFlow::Node sink, Argument arg) {
|
|
(
|
|
exists(Argument1ExtraRoutingConfig cfg | cfg.hasFlow(source, sink))
|
|
or
|
|
exists(ArgumentRoutingConfig cfg |
|
|
cfg.hasFlow(source, sink) and
|
|
cfg.isArgSource(source, 1) and
|
|
cfg.isGoodSink(sink, 1)
|
|
)
|
|
) and
|
|
exists(arg)
|
|
}
|
|
}
|
|
|
|
class ArgNumber extends int {
|
|
ArgNumber() { this in [1 .. 7] }
|
|
}
|
|
|
|
class ArgumentRoutingConfig extends DataFlow::Configuration {
|
|
ArgumentRoutingConfig() { this = "ArgumentRoutingConfig" }
|
|
|
|
predicate isArgSource(DataFlow::Node node, ArgNumber argNumber) {
|
|
node.(DataFlow::CfgNode).getNode().(NameNode).getId() = "arg" + argNumber
|
|
}
|
|
|
|
override predicate isSource(DataFlow::Node node) { this.isArgSource(node, _) }
|
|
|
|
predicate isGoodSink(DataFlow::Node node, ArgNumber argNumber) {
|
|
exists(CallNode call |
|
|
call.getFunction().(NameNode).getId() = "SINK" + argNumber and
|
|
node.(DataFlow::CfgNode).getNode() = call.getAnArg()
|
|
)
|
|
}
|
|
|
|
predicate isBadSink(DataFlow::Node node, ArgNumber argNumber) {
|
|
exists(CallNode call |
|
|
call.getFunction().(NameNode).getId() = "SINK" + argNumber + "_F" and
|
|
node.(DataFlow::CfgNode).getNode() = call.getAnArg()
|
|
)
|
|
}
|
|
|
|
override predicate isSink(DataFlow::Node node) {
|
|
this.isGoodSink(node, _) or this.isBadSink(node, _)
|
|
}
|
|
|
|
/**
|
|
* We want to be able to use `arg` in a sequence of calls such as `func(kw=arg); ... ; func(arg)`.
|
|
* Use-use flow lets the argument to the first call reach the sink inside the second call,
|
|
* making it seem like we handle all cases even if we only handle the last one.
|
|
* We make the test honest by preventing flow into source nodes.
|
|
*/
|
|
override predicate isBarrierIn(DataFlow::Node node) { this.isSource(node) }
|
|
}
|
|
|
|
class Argument1ExtraRoutingConfig extends DataFlow::Configuration {
|
|
Argument1ExtraRoutingConfig() { this = "Argument1ExtraRoutingConfig" }
|
|
|
|
override predicate isSource(DataFlow::Node node) {
|
|
exists(AssignmentDefinition def, DataFlow::CallCfgNode call |
|
|
// def.getVariable() = node.(DataFlow::EssaNode).getVar() and
|
|
def.getDefiningNode() = node.(DataFlow::CfgNode).getNode() and
|
|
def.getValue() = call.getNode() and
|
|
call.getFunction().asCfgNode().(NameNode).getId().matches("With\\_%")
|
|
) and
|
|
// node.(DataFlow::EssaNode).getVar().getName().matches("with\\_%")
|
|
node.(DataFlow::CfgNode).getNode().(NameNode).getId().matches("with\\_%")
|
|
}
|
|
|
|
override predicate isSink(DataFlow::Node node) {
|
|
exists(CallNode call |
|
|
call.getFunction().(NameNode).getId() = "SINK1" and
|
|
node.(DataFlow::CfgNode).getNode() = call.getAnArg()
|
|
)
|
|
}
|
|
|
|
/**
|
|
* We want to be able to use `arg` in a sequence of calls such as `func(kw=arg); ... ; func(arg)`.
|
|
* Use-use flow lets the argument to the first call reach the sink inside the second call,
|
|
* making it seem like we handle all cases even if we only handle the last one.
|
|
* We make the test honest by preventing flow into source nodes.
|
|
*/
|
|
override predicate isBarrierIn(DataFlow::Node node) { this.isSource(node) }
|
|
}
|
|
|
|
module RestArgumentRoutingTest implements RoutingTestSig {
|
|
class Argument = ArgNumber;
|
|
|
|
string flowTag(Argument arg) { result = "arg" + arg }
|
|
|
|
predicate relevantFlow(DataFlow::Node source, DataFlow::Node sink, Argument arg) {
|
|
exists(ArgumentRoutingConfig cfg |
|
|
cfg.hasFlow(source, sink) and
|
|
cfg.isArgSource(source, arg) and
|
|
cfg.isGoodSink(sink, arg)
|
|
) and
|
|
arg > 1
|
|
}
|
|
}
|
|
|
|
/** Bad flow from `arg<n>` to `SINK<N>_F` */
|
|
module BadArgumentRoutingTestSinkF implements RoutingTestSig {
|
|
class Argument = ArgNumber;
|
|
|
|
string flowTag(Argument arg) { result = "bad" + arg }
|
|
|
|
predicate relevantFlow(DataFlow::Node source, DataFlow::Node sink, Argument arg) {
|
|
exists(ArgumentRoutingConfig cfg |
|
|
cfg.hasFlow(source, sink) and
|
|
cfg.isArgSource(source, arg) and
|
|
cfg.isBadSink(sink, arg)
|
|
)
|
|
}
|
|
}
|
|
|
|
/** Bad flow from `arg<n>` to `SINK<M>` or `SINK<M>_F`, where `n != m`. */
|
|
module BadArgumentRoutingTestWrongSink implements RoutingTestSig {
|
|
class Argument = ArgNumber;
|
|
|
|
string flowTag(Argument arg) { result = "bad" + arg }
|
|
|
|
predicate relevantFlow(DataFlow::Node source, DataFlow::Node sink, Argument arg) {
|
|
exists(ArgumentRoutingConfig cfg |
|
|
cfg.hasFlow(source, sink) and
|
|
cfg.isArgSource(source, any(ArgNumber i | not i = arg)) and
|
|
(
|
|
cfg.isGoodSink(sink, arg)
|
|
or
|
|
cfg.isBadSink(sink, arg)
|
|
)
|
|
)
|
|
}
|
|
}
|
|
|
|
import MakeTest<MergeTests4<MakeTestSig<Argument1RoutingTest>, MakeTestSig<RestArgumentRoutingTest>,
|
|
MakeTestSig<BadArgumentRoutingTestSinkF>, MakeTestSig<BadArgumentRoutingTestWrongSink>>>
|