Python: Move test utilities into the query pack.

This commit is contained in:
Michael Nebel
2024-12-11 13:20:41 +01:00
parent c3fe3e468c
commit 1490400ab0
15 changed files with 0 additions and 0 deletions

View File

@@ -1,9 +0,0 @@
/**
* Inline expectation tests for Python.
* See `shared/util/codeql/util/test/InlineExpectationsTest.qll`
*/
private import python as PY
private import codeql.util.test.InlineExpectationsTest
private import internal.InlineExpectationsTestImpl
import Make<Impl>

View File

@@ -1,21 +0,0 @@
/**
* @kind test-postprocess
*/
private import python
private import codeql.util.test.InlineExpectationsTest as T
private import internal.InlineExpectationsTestImpl
import T::TestPostProcessing
import T::TestPostProcessing::Make<Impl, Input>
private module Input implements T::TestPostProcessing::InputSig<Impl> {
string getRelativeUrl(Location location) {
exists(File f, int startline, int startcolumn, int endline, int endcolumn |
location.hasLocationInfo(_, startline, startcolumn, endline, endcolumn) and
f = location.getFile()
|
result =
f.getRelativePath() + ":" + startline + ":" + startcolumn + ":" + endline + ":" + endcolumn
)
}
}

View File

@@ -1,135 +0,0 @@
/**
* A test query that verifies assertions about the API graph embedded in source-code comments.
*
* An assertion is a comment of the form `def=<path>` or `use=<path>`, and asserts that
* there is a def/use feature reachable from the root along the given path, and its
* associated data-flow node must start on the same line as the comment.
*
* We also support negative assertions of the form `MISSING: def <path>` or `MISSING: use <path>`, which assert
* that there _isn't_ a node with the given path on the same line.
*
* The query only produces output for failed assertions, meaning that it should have no output
* under normal circumstances.
*
* The syntax is made to look exactly like inline expectation tests, so that the tests
* can remain consistent with other Python tests.
*/
import python
import semmle.python.dataflow.new.DataFlow
import semmle.python.ApiGraphs
private DataFlow::Node getNode(API::Node nd, string kind) {
kind = "def" and
result = nd.asSink()
or
kind = "use" and
result = nd.getAValueReachableFromSource()
}
private string getLocStr(Location loc) {
exists(string filepath, int startline |
loc.hasLocationInfo(filepath, startline, _, _, _) and
result = filepath + ":" + startline
)
}
/**
* An assertion matching a data-flow node against an API-graph feature.
*/
class Assertion extends Comment {
string expectedKind;
string expectedLoc;
string path;
string polarity;
Assertion() {
exists(string txt, string rex |
txt = this.getText().trim() and
rex = "#\\$.*?((?:MISSING: )?)(def|use)=([\\w\\(\\)\"\\.]*).*"
|
polarity = txt.regexpCapture(rex, 1) and
expectedKind = txt.regexpCapture(rex, 2) and
path = txt.regexpCapture(rex, 3) and
expectedLoc = getLocStr(this.getLocation())
)
}
string getEdgeLabel(int i) {
// matches a single edge. E.g. `getParameter(1)` or `getMember("foo")`.
// The lookbehind/lookahead ensure that the boundary is correct, that is
// either the edge is next to a ".", or it's the end of the string.
result = path.regexpFind("(?<=\\.|^)([\\w\\(\\)\"]+)(?=\\.|$)", i, _).trim()
}
int getPathLength() { result = max(int i | exists(this.getEdgeLabel(i))) + 1 }
predicate isNegative() { polarity = "MISSING: " }
API::Node lookup(int i) {
i = 0 and
result = API::root()
or
result =
this.lookup(i - 1)
.getASuccessor(any(API::Label::ApiLabel label |
label.toString() = this.getEdgeLabel(i - 1)
))
}
API::Node lookup() { result = this.lookup(this.getPathLength()) }
predicate holds() { getLocStr(getNode(this.lookup(), expectedKind).getLocation()) = expectedLoc }
string tryExplainFailure() {
exists(int i, API::Node nd, string prefix, string suffix |
nd = this.lookup(i) and
i < this.getPathLength() and
not exists(this.lookup([i + 1 .. this.getPathLength()])) and
prefix = nd + " has no outgoing edge labelled " + this.getEdgeLabel(i) + ";" and
if exists(nd.getASuccessor())
then
suffix =
"it does have outgoing edges labelled " +
concat(string lbl |
exists(nd.getASuccessor(any(API::Label::ApiLabel label | label.toString() = lbl)))
|
lbl, ", "
) + "."
else suffix = "it has no outgoing edges at all."
|
result = prefix + " " + suffix
)
or
exists(API::Node nd, string kind | nd = this.lookup() |
exists(getNode(nd, kind)) and
not exists(getNode(nd, expectedKind)) and
result = "Expected " + expectedKind + " node, but found " + kind + " node."
)
or
exists(DataFlow::Node nd | nd = getNode(this.lookup(), expectedKind) |
not getLocStr(nd.getLocation()) = expectedLoc and
result =
"Node not found on this line (but there is one on line " + min(getLocStr(nd.getLocation())) +
")."
)
}
string explainFailure() {
if this.isNegative()
then (
this.holds() and
result = "Negative assertion failed."
) else (
not this.holds() and
(
result = this.tryExplainFailure()
or
not exists(this.tryExplainFailure()) and
result = "Positive assertion failed for unknown reasons."
)
)
}
}
query predicate failed(Assertion a, string explanation) { explanation = a.explainFailure() }

View File

@@ -1,105 +0,0 @@
import python
import semmle.python.dataflow.new.DataFlow
import semmle.python.dataflow.new.TaintTracking
import TestUtilities.InlineExpectationsTest
private import semmle.python.dataflow.new.internal.PrintNode
signature module QueryTestSig {
predicate isSink(DataFlow::Node sink);
predicate flowTo(DataFlow::Node sink);
}
module MakeQueryTest<QueryTestSig Impl> {
module DataFlowQueryTest implements TestSig {
string getARelevantTag() { result = "result" }
predicate hasActualResult(Location location, string element, string tag, string value) {
exists(DataFlow::Node sink | Impl::flowTo(sink) |
location = sink.getLocation() and
tag = "result" and
value = "BAD" and
element = sink.toString()
)
}
// We allow annotating any sink with `result=OK` to signal
// safe sinks.
// Sometimes a line contains both an alert and a safe sink.
// In this situation, the annotation form `OK(safe sink)`
// can be useful.
predicate hasOptionalResult(Location location, string element, string tag, string value) {
exists(DataFlow::Node sink | Impl::isSink(sink) |
location = sink.getLocation() and
tag = "result" and
value in ["OK", "OK(" + prettyNode(sink) + ")"] and
element = sink.toString()
)
}
}
import MakeTest<DataFlowQueryTest>
query predicate missingAnnotationOnSink(Location location, string error, string element) {
error = "ERROR, you should add `# $ MISSING: result=BAD` or `result=OK` annotation" and
exists(DataFlow::Node sink |
exists(sink.getLocation().getFile().getRelativePath()) and
Impl::isSink(sink) and
location = sink.getLocation() and
element = prettyExpr(sink.asExpr()) and
not Impl::flowTo(sink) and
not exists(FalseNegativeTestExpectation missingResult |
missingResult.getTag() = "result" and
missingResult.getValue() = "BAD" and
missingResult.getLocation().getFile() = location.getFile() and
missingResult.getLocation().getStartLine() = location.getStartLine()
) and
not exists(GoodTestExpectation okResult |
okResult.getTag() = "result" and
okResult.getValue() in ["OK", "OK(" + prettyNode(sink) + ")"] and
okResult.getLocation().getFile() = location.getFile() and
okResult.getLocation().getStartLine() = location.getStartLine()
)
)
}
}
module FromDataFlowConfig<DataFlow::ConfigSig C> {
module Impl implements QueryTestSig {
predicate isSink(DataFlow::Node sink) { C::isSink(sink) }
predicate flowTo(DataFlow::Node sink) { DataFlow::Global<C>::flowTo(sink) }
}
import MakeQueryTest<Impl>
}
module FromDataFlowStateConfig<DataFlow::StateConfigSig C> {
module Impl implements QueryTestSig {
predicate isSink(DataFlow::Node sink) { C::isSink(sink) or C::isSink(sink, _) }
predicate flowTo(DataFlow::Node sink) { DataFlow::GlobalWithState<C>::flowTo(sink) }
}
import MakeQueryTest<Impl>
}
module FromTaintTrackingConfig<DataFlow::ConfigSig C> {
module Impl implements QueryTestSig {
predicate isSink(DataFlow::Node sink) { C::isSink(sink) }
predicate flowTo(DataFlow::Node sink) { TaintTracking::Global<C>::flowTo(sink) }
}
import MakeQueryTest<Impl>
}
module FromTaintTrackingStateConfig<DataFlow::StateConfigSig C> {
module Impl implements QueryTestSig {
predicate isSink(DataFlow::Node sink) { C::isSink(sink) or C::isSink(sink, _) }
predicate flowTo(DataFlow::Node sink) { TaintTracking::GlobalWithState<C>::flowTo(sink) }
}
import MakeQueryTest<Impl>
}

View File

@@ -1,39 +0,0 @@
import python
import semmle.python.dataflow.new.DataFlow
import TestUtilities.InlineExpectationsTest
private import semmle.python.dataflow.new.internal.PrintNode
signature module FlowTestSig {
string flowTag();
predicate relevantFlow(DataFlow::Node fromNode, DataFlow::Node toNode);
}
module MakeTestSig<FlowTestSig Impl> implements TestSig {
string getARelevantTag() { result = Impl::flowTag() }
predicate hasActualResult(Location location, string element, string tag, string value) {
exists(DataFlow::Node fromNode, DataFlow::Node toNode | Impl::relevantFlow(fromNode, toNode) |
location = toNode.getLocation() and
tag = Impl::flowTag() and
value =
"\"" + prettyNode(fromNode).replaceAll("\"", "'") + lineStr(fromNode, toNode) + " -> " +
prettyNode(toNode).replaceAll("\"", "'") + "\"" and
element = toNode.toString()
)
}
pragma[inline]
private string lineStr(DataFlow::Node fromNode, DataFlow::Node toNode) {
exists(int delta |
delta = fromNode.getLocation().getStartLine() - toNode.getLocation().getStartLine()
|
if delta = 0
then result = ""
else
if delta > 0
then result = ", l:+" + delta.toString()
else result = ", l:" + delta.toString()
)
}
}

View File

@@ -1,13 +0,0 @@
import python
import semmle.python.dataflow.new.DataFlow
import FlowTest
module LocalFlowStepTest implements FlowTestSig {
string flowTag() { result = "step" }
predicate relevantFlow(DataFlow::Node fromNode, DataFlow::Node toNode) {
DataFlow::localFlowStep(fromNode, toNode)
}
}
import MakeTest<MakeTestSig<LocalFlowStepTest>>

View File

@@ -1,42 +0,0 @@
import python
import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.internal.DataFlowPrivate
import FlowTest
module MaximalFlowTest implements FlowTestSig {
string flowTag() { result = "flow" }
predicate relevantFlow(DataFlow::Node source, DataFlow::Node sink) {
source != sink and
MaximalFlows::flow(source, sink)
}
}
import MakeTest<MakeTestSig<MaximalFlowTest>>
/**
* A configuration to find all "maximal" flows.
* To be used on small programs.
*/
module MaximalFlowsConfig implements DataFlow::ConfigSig {
predicate isSource(DataFlow::Node node) {
exists(node.getLocation().getFile().getRelativePath()) and
not node.asCfgNode() instanceof CallNode and
not node.asCfgNode().getNode() instanceof Return and
not node instanceof DataFlow::ParameterNode and
not node instanceof DataFlow::PostUpdateNode and
// not node.asExpr() instanceof FunctionExpr and
// not node.asExpr() instanceof ClassExpr and
not DataFlow::localFlowStep(_, node)
}
predicate isSink(DataFlow::Node node) {
exists(node.getLocation().getFile().getRelativePath()) and
not any(CallNode c).getArg(_) = node.asCfgNode() and
not node instanceof DataFlow::ArgumentNode and
not node.asCfgNode().(NameNode).getId().matches("SINK%") and
not DataFlow::localFlowStep(node, _)
}
}
module MaximalFlows = DataFlow::Global<MaximalFlowsConfig>;

View File

@@ -1,34 +0,0 @@
import python
import TestUtilities.dataflow.FlowTest
import TestUtilities.dataflow.testConfig
private import semmle.python.dataflow.new.internal.PrintNode
module DataFlowTest implements FlowTestSig {
string flowTag() { result = "flow" }
predicate relevantFlow(DataFlow::Node source, DataFlow::Node sink) {
TestFlow::flow(source, sink)
}
}
import MakeTest<MakeTestSig<DataFlowTest>>
query predicate missingAnnotationOnSink(Location location, string error, string element) {
error = "ERROR, you should add `# $ MISSING: flow` annotation" and
exists(DataFlow::Node sink |
TestConfig::isSink(sink) and
// note: we only care about `SINK` and not `SINK_F`, so we have to reconstruct manually.
exists(DataFlow::CallCfgNode call |
call.getFunction().asCfgNode().(NameNode).getId() = "SINK" and
(sink = call.getArg(_) or sink = call.getArgByName(_))
) and
location = sink.getLocation() and
element = prettyExpr(sink.asExpr()) and
not TestFlow::flowTo(sink) and
not exists(FalseNegativeTestExpectation missingResult |
missingResult.getTag() = "flow" and
missingResult.getLocation().getFile() = location.getFile() and
missingResult.getLocation().getStartLine() = location.getStartLine()
)
)
}

View File

@@ -1,33 +0,0 @@
import python
import TestUtilities.dataflow.FlowTest
import TestUtilities.dataflow.testTaintConfig
private import semmle.python.dataflow.new.internal.PrintNode
module DataFlowTest implements FlowTestSig {
string flowTag() { result = "flow" }
predicate relevantFlow(DataFlow::Node source, DataFlow::Node sink) {
TestFlow::flow(source, sink)
}
}
import MakeTest<MakeTestSig<DataFlowTest>>
query predicate missingAnnotationOnSink(Location location, string error, string element) {
error = "ERROR, you should add `# $ MISSING: flow` annotation" and
exists(DataFlow::Node sink |
exists(DataFlow::CallCfgNode call |
// note: we only care about `SINK` and not `SINK_F`, so we have to reconstruct manually.
call.getFunction().asCfgNode().(NameNode).getId() = "SINK" and
(sink = call.getArg(_) or sink = call.getArgByName(_))
) and
location = sink.getLocation() and
element = prettyExpr(sink.asExpr()) and
not TestFlow::flowTo(sink) and
not exists(FalseNegativeTestExpectation missingResult |
missingResult.getTag() = "flow" and
missingResult.getLocation().getFile() = location.getFile() and
missingResult.getLocation().getStartLine() = location.getStartLine()
)
)
}

View File

@@ -1,58 +0,0 @@
import python
import semmle.python.dataflow.new.DataFlow
import TestUtilities.InlineExpectationsTest
private import semmle.python.dataflow.new.internal.PrintNode
private import semmle.python.dataflow.new.internal.DataFlowPrivate as DataFlowPrivate
/**
* A routing test is designed to test that values are routed to the
* correct arguments of the correct functions. It is assumed that
* the functions tested sink their arguments sequentially, that is
* `SINK1(arg1)`, etc.
*/
signature module RoutingTestSig {
class Argument;
string flowTag(Argument arg);
predicate relevantFlow(DataFlow::Node fromNode, DataFlow::Node toNode, Argument arg);
}
module MakeTestSig<RoutingTestSig Impl> implements TestSig {
string getARelevantTag() { result in ["func", Impl::flowTag(_)] }
predicate hasActualResult(Location location, string element, string tag, string value) {
exists(DataFlow::Node fromNode, DataFlow::Node toNode, Impl::Argument arg |
Impl::relevantFlow(fromNode, toNode, arg)
|
location = fromNode.getLocation() and
element = fromNode.toString() and
(
tag = Impl::flowTag(arg) and
if "\"" + tag + "\"" = fromValue(fromNode) then value = "" else value = fromValue(fromNode)
or
// only have result for `func` tag if the function where `arg<n>` is used, is
// different from the function name of the call where `arg<n>` was specified as
// an argument
tag = "func" and
value = toFunc(toNode) and
not value = fromFunc(fromNode)
)
)
}
}
pragma[inline]
private string fromValue(DataFlow::Node fromNode) {
result = "\"" + prettyNode(fromNode).replaceAll("\"", "'") + "\""
}
pragma[inline]
private string fromFunc(DataFlow::ArgumentNode fromNode) {
result = fromNode.getCall().getNode().(CallNode).getFunction().getNode().(Name).getId()
}
pragma[inline]
private string toFunc(DataFlow::Node toNode) {
result = toNode.getEnclosingCallable().getQualifiedName()
}

View File

@@ -1,37 +0,0 @@
import python
private import semmle.python.dataflow.new.internal.PrintNode
private import semmle.python.dataflow.new.internal.DataFlowPrivate as DataFlowPrivate
private import semmle.python.ApiGraphs
import TestUtilities.InlineExpectationsTest
signature module UnresolvedCallExpectationsSig {
predicate unresolvedCall(CallNode call);
}
module DefaultUnresolvedCallExpectations implements UnresolvedCallExpectationsSig {
predicate unresolvedCall(CallNode call) {
not exists(DataFlowPrivate::DataFlowCall dfc |
exists(dfc.getCallable()) and dfc.getNode() = call
) and
not DataFlowPrivate::resolveClassCall(call, _) and
not call = API::builtin(_).getACall().asCfgNode()
}
}
module MakeUnresolvedCallExpectations<UnresolvedCallExpectationsSig Impl> {
private module UnresolvedCallExpectations implements TestSig {
string getARelevantTag() { result = "unresolved_call" }
predicate hasActualResult(Location location, string element, string tag, string value) {
exists(location.getFile().getRelativePath()) and
exists(CallNode call | Impl::unresolvedCall(call) |
location = call.getLocation() and
tag = "unresolved_call" and
value = prettyExpr(call.getNode()) and
element = call.toString()
)
}
}
import MakeTest<UnresolvedCallExpectations>
}

View File

@@ -1,22 +0,0 @@
private import python
import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.internal.DataFlowPrivate as DataFlowPrivate
/**
* A configuration to find the call graph edges.
*/
module CallGraphConfig implements DataFlow::ConfigSig {
predicate isSource(DataFlow::Node node) {
node instanceof DataFlowPrivate::ReturnNode
or
node instanceof DataFlow::ArgumentNode
}
predicate isSink(DataFlow::Node node) {
node instanceof DataFlowPrivate::OutNode
or
node instanceof DataFlow::ParameterNode
}
}
module CallGraphFlow = DataFlow::Global<CallGraphConfig>;

View File

@@ -1,49 +0,0 @@
/**
* Configuration to test selected data flow
* Sources in the source code are denoted by the special name `SOURCE`,
* and sinks are denoted by arguments to the special function `SINK`.
* For example, given the test code
* ```python
* def test():
* s = SOURCE
* SINK(s)
* ```
* `SOURCE` will be a source and the second occurrence of `s` will be a sink.
*
* In order to test literals, alternative sources are defined for each type:
*
* for | use
* ----------
* string | `"source"`
* integer | `42`
* float | `42.0`
* complex | `42j` (not supported yet)
*/
private import python
import semmle.python.dataflow.new.DataFlow
module TestConfig implements DataFlow::ConfigSig {
predicate isSource(DataFlow::Node node) {
node.(DataFlow::CfgNode).getNode().(NameNode).getId() = "SOURCE"
or
node.(DataFlow::CfgNode).getNode().getNode().(StringLiteral).getS() = "source"
or
node.(DataFlow::CfgNode).getNode().getNode().(IntegerLiteral).getN() = "42"
or
node.(DataFlow::CfgNode).getNode().getNode().(FloatLiteral).getN() = "42.0"
// No support for complex numbers
}
predicate isSink(DataFlow::Node node) {
exists(DataFlow::CallCfgNode call |
call.getFunction().asCfgNode().(NameNode).getId() in ["SINK", "SINK_F"] and
(node = call.getArg(_) or node = call.getArgByName(_)) and
not node = call.getArgByName("not_present_at_runtime")
)
}
predicate isBarrierIn(DataFlow::Node node) { isSource(node) }
}
module TestFlow = DataFlow::Global<TestConfig>;

View File

@@ -1,49 +0,0 @@
/**
* Configuration to test selected data flow
* Sources in the source code are denoted by the special name `SOURCE`,
* and sinks are denoted by arguments to the special function `SINK`.
* For example, given the test code
* ```python
* def test():
* s = SOURCE
* SINK(s)
* ```
* `SOURCE` will be a source and the second occurrence of `s` will be a sink.
*
* In order to test literals, alternative sources are defined for each type:
*
* for | use
* ----------
* string | `"source"`
* integer | `42`
* float | `42.0`
* complex | `42j` (not supported yet)
*/
private import python
import semmle.python.dataflow.new.DataFlow
import semmle.python.dataflow.new.TaintTracking
module TestConfig implements DataFlow::ConfigSig {
predicate isSource(DataFlow::Node node) {
node.(DataFlow::CfgNode).getNode().(NameNode).getId() = "SOURCE"
or
node.(DataFlow::CfgNode).getNode().getNode().(StringLiteral).getS() = "source"
or
node.(DataFlow::CfgNode).getNode().getNode().(IntegerLiteral).getN() = "42"
or
node.(DataFlow::CfgNode).getNode().getNode().(FloatLiteral).getN() = "42.0"
// No support for complex numbers
}
predicate isSink(DataFlow::Node node) {
exists(CallNode call |
call.getFunction().(NameNode).getId() in ["SINK", "SINK_F"] and
node.(DataFlow::CfgNode).getNode() = call.getAnArg()
)
}
predicate isBarrierIn(DataFlow::Node node) { isSource(node) }
}
module TestFlow = TaintTracking::Global<TestConfig>;

View File

@@ -1,12 +0,0 @@
private import python as PY
private import codeql.util.test.InlineExpectationsTest
module Impl implements InlineExpectationsTestSig {
/**
* A class representing line comments in Python. As this is the only form of comment Python
* permits, we simply reuse the `Comment` class.
*/
class ExpectationComment = PY::Comment;
class Location = PY::Location;
}