mirror of
https://github.com/github/codeql.git
synced 2026-06-02 20:30:15 +02:00
Flips the Python dataflow trunk from the legacy CFG (semmle/python/Flow.qll) and legacy ESSA SSA (semmle/python/essa/*) to the new shared CFG facade (semmle.python.controlflow.internal.Cfg) and the new SSA adapter (semmle.python.dataflow.new.internal.SsaImpl), both introduced additively in the preceding PRs in this stack. This is the trunk-flip equivalent of the original draft PR #21894 (kept around as documentation), rebased on top of the four preparatory PRs: P1: Remove AstNode.getAFlowNode() and rewrite callers (#21919). P2: Qualify Flow.qll's AST references with Py:: prefix (#21920). P3: Add new shared-CFG-backed control flow graph (#21921). P4: Add new shared-SSA-backed SSA adapter (#21923). The Python dataflow library (semmle/python/dataflow/new/) now imports the new CFG facade and SSA adapter. All CFG-typed predicates (ControlFlowNode, CallNode, BasicBlock, NameNode, AttrNode, ...) are qualified with the Cfg:: prefix; SSA references switch from EssaVariable/EssaDefinition to SsaImpl::Definition/SourceVariable. GuardNode is redesigned to use the new CFG's outcome-node model (isAfterTrue / isAfterFalse) instead of the legacy ConditionBlock + flipped indirection. Only BarrierGuard<...> is preserved as public API. Framework files (Bottle, FastApi, Django, Tornado, Pyramid, Stdlib, ...) are updated to take CFG nodes from the new facade. A handful of dataflow consistency tweaks for the new CFG: - Augmented-assignment targets are treated as both load and store. - 'from X import *' produces uncertain SSA writes for unknown names. - CFG nodes are canonicalised so dataflow does not see equivalent pre/post-order pairs as distinct nodes. Two AST tweaks for the new CFG: - AstNodeImpl: omit PEP 695 type-parameter names from FunctionDefExpr / ClassDefExpr children. - ImportResolution: drop the legacy essa import. Test churn (~175 files): reblessed library- and query-test .expected files reflect slightly different CFG granularity, different toString output, and a handful of true alert deltas in security queries. Verification: all 367 lib + src + consistency-queries compile clean. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
141 lines
5.4 KiB
Plaintext
141 lines
5.4 KiB
Plaintext
import python
|
|
import utils.test.InlineExpectationsTest
|
|
private import semmle.python.dataflow.new.internal.DataFlowDispatch as TT
|
|
private import semmle.python.controlflow.internal.Cfg as Cfg
|
|
private import LegacyPointsTo
|
|
|
|
/** Holds when `call` is resolved to `callable` using points-to based call-graph. */
|
|
predicate pointsToCallEdge(Call call, Function callable) {
|
|
exists(call.getLocation().getFile().getRelativePath()) and
|
|
exists(callable.getLocation().getFile().getRelativePath()) and
|
|
// I did try using viableCallable from `DataFlowDispatchPointsTo` (from temporary copy
|
|
// of `dataflow.new.internal` that still uses points-to) instead of direct
|
|
// `getACall()` on a Value, but it only added results for `__init__` methods, not for
|
|
// anything else.
|
|
exists(PythonFunctionValue funcValue, CallNode legacyCall |
|
|
funcValue.getScope() = callable and
|
|
legacyCall = funcValue.getACall() and
|
|
legacyCall.getNode() = call
|
|
)
|
|
}
|
|
|
|
/** Holds when `call` is resolved to `callable` using type-tracking based call-graph. */
|
|
predicate typeTrackerCallEdge(Call call, Function callable) {
|
|
exists(call.getLocation().getFile().getRelativePath()) and
|
|
exists(callable.getLocation().getFile().getRelativePath()) and
|
|
exists(TT::DataFlowCallable dfCallable, TT::DataFlowCall dfCall |
|
|
dfCallable.getScope() = callable and
|
|
dfCall.getNode().getNode() = call and
|
|
dfCallable = TT::viableCallable(dfCall)
|
|
)
|
|
}
|
|
|
|
/** Holds if the call edge is from a class call. */
|
|
predicate typeTrackerClassCall(Call call, Function callable) {
|
|
exists(call.getLocation().getFile().getRelativePath()) and
|
|
exists(callable.getLocation().getFile().getRelativePath()) and
|
|
exists(Cfg::CallNode cfgCall |
|
|
cfgCall.getNode() = call and
|
|
TT::resolveCall(cfgCall, callable, any(TT::TCallType t | t instanceof TT::CallTypeClass))
|
|
)
|
|
}
|
|
|
|
module CallGraphTest implements TestSig {
|
|
string getARelevantTag() { result in ["pt", "tt"] }
|
|
|
|
predicate hasActualResult(Location location, string element, string tag, string value) {
|
|
exists(location.getFile().getRelativePath()) and
|
|
exists(Call call, Function target |
|
|
tag = "tt" and
|
|
typeTrackerCallEdge(call, target)
|
|
or
|
|
tag = "pt" and
|
|
pointsToCallEdge(call, target)
|
|
|
|
|
location = call.getLocation() and
|
|
element = call.toString() and
|
|
value = getCallEdgeValue(call, target)
|
|
)
|
|
}
|
|
}
|
|
|
|
import MakeTest<CallGraphTest>
|
|
|
|
bindingset[call, target]
|
|
string getCallEdgeValue(Call call, Function target) {
|
|
if call.getLocation().getFile() = target.getLocation().getFile()
|
|
then result = betterQualName(target)
|
|
else
|
|
exists(string fixedRelativePath |
|
|
fixedRelativePath =
|
|
target.getLocation().getFile().getAbsolutePath().regexpCapture(".*/CallGraph[^/]*/(.*)", 1)
|
|
|
|
|
// the value needs to be enclosed in quotes to allow special characters
|
|
result = "\"" + fixedRelativePath + ":" + betterQualName(target) + "\""
|
|
)
|
|
}
|
|
|
|
bindingset[func]
|
|
string betterQualName(Function func) {
|
|
// note: `target.getQualifiedName` for Lambdas is just "lambda", so is not very useful :|
|
|
not func.isLambda() and
|
|
if
|
|
strictcount(Function f |
|
|
f.getEnclosingModule() = func.getEnclosingModule() and
|
|
f.getQualifiedName() = func.getQualifiedName()
|
|
) = 1
|
|
then result = func.getQualifiedName()
|
|
else result = func.getLocation().getStartLine() + ":" + func.getQualifiedName()
|
|
or
|
|
func.isLambda() and
|
|
result =
|
|
"lambda[" + func.getLocation().getFile().getShortName() + ":" +
|
|
func.getLocation().getStartLine() + ":" + func.getLocation().getStartColumn() + "]"
|
|
}
|
|
|
|
query predicate debug_callableNotUnique(Function callable, string message) {
|
|
exists(callable.getLocation().getFile().getRelativePath()) and
|
|
exists(Function f |
|
|
f != callable and
|
|
betterQualName(f) = betterQualName(callable) and
|
|
f.getLocation().getFile() = callable.getLocation().getFile()
|
|
) and
|
|
message =
|
|
"Qualified function name '" + callable.getQualifiedName() +
|
|
"' is not unique within its file. Please fix."
|
|
}
|
|
|
|
query predicate pointsTo_found_typeTracker_notFound(Call call, string qualname) {
|
|
exists(Function target |
|
|
pointsToCallEdge(call, target) and
|
|
not typeTrackerCallEdge(call, target) and
|
|
qualname = getCallEdgeValue(call, target) and
|
|
// ignore SPURIOUS call edges
|
|
not exists(FalsePositiveTestExpectation spuriousResult |
|
|
spuriousResult.getTag() = "pt" and
|
|
spuriousResult.getValue() = getCallEdgeValue(call, target) and
|
|
spuriousResult.getLocation().getFile() = call.getLocation().getFile() and
|
|
spuriousResult.getLocation().getStartLine() = call.getLocation().getStartLine()
|
|
)
|
|
)
|
|
}
|
|
|
|
query predicate typeTracker_found_pointsTo_notFound(Call call, string qualname) {
|
|
exists(Function target |
|
|
not pointsToCallEdge(call, target) and
|
|
typeTrackerCallEdge(call, target) and
|
|
qualname = getCallEdgeValue(call, target) and
|
|
// We filter out result differences for points-to and type-tracking for class calls,
|
|
// since otherwise it gives too much noise (these are just handled differently
|
|
// between the two).
|
|
not typeTrackerClassCall(call, target) and
|
|
// ignore SPURIOUS call edges
|
|
not exists(FalsePositiveTestExpectation spuriousResult |
|
|
spuriousResult.getTag() = "tt" and
|
|
spuriousResult.getValue() = getCallEdgeValue(call, target) and
|
|
spuriousResult.getLocation().getFile() = call.getLocation().getFile() and
|
|
spuriousResult.getLocation().getStartLine() = call.getLocation().getStartLine()
|
|
)
|
|
)
|
|
}
|