mirror of
https://github.com/github/codeql.git
synced 2026-06-02 20:30:15 +02:00
Flips the Python dataflow trunk from the legacy CFG (semmle/python/Flow.qll) and legacy ESSA SSA (semmle/python/essa/*) to the new shared CFG facade (semmle.python.controlflow.internal.Cfg) and the new SSA adapter (semmle.python.dataflow.new.internal.SsaImpl), both introduced additively in the preceding PRs in this stack. This is the trunk-flip equivalent of the original draft PR #21894 (kept around as documentation), rebased on top of the four preparatory PRs: P1: Remove AstNode.getAFlowNode() and rewrite callers (#21919). P2: Qualify Flow.qll's AST references with Py:: prefix (#21920). P3: Add new shared-CFG-backed control flow graph (#21921). P4: Add new shared-SSA-backed SSA adapter (#21923). The Python dataflow library (semmle/python/dataflow/new/) now imports the new CFG facade and SSA adapter. All CFG-typed predicates (ControlFlowNode, CallNode, BasicBlock, NameNode, AttrNode, ...) are qualified with the Cfg:: prefix; SSA references switch from EssaVariable/EssaDefinition to SsaImpl::Definition/SourceVariable. GuardNode is redesigned to use the new CFG's outcome-node model (isAfterTrue / isAfterFalse) instead of the legacy ConditionBlock + flipped indirection. Only BarrierGuard<...> is preserved as public API. Framework files (Bottle, FastApi, Django, Tornado, Pyramid, Stdlib, ...) are updated to take CFG nodes from the new facade. A handful of dataflow consistency tweaks for the new CFG: - Augmented-assignment targets are treated as both load and store. - 'from X import *' produces uncertain SSA writes for unknown names. - CFG nodes are canonicalised so dataflow does not see equivalent pre/post-order pairs as distinct nodes. Two AST tweaks for the new CFG: - AstNodeImpl: omit PEP 695 type-parameter names from FunctionDefExpr / ClassDefExpr children. - ImportResolution: drop the legacy essa import. Test churn (~175 files): reblessed library- and query-test .expected files reflect slightly different CFG granularity, different toString output, and a handful of true alert deltas in security queries. Verification: all 367 lib + src + consistency-queries compile clean. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
155 lines
6.8 KiB
Plaintext
155 lines
6.8 KiB
Plaintext
/**
|
|
* Provides consistency queries for checking invariants in the language-specific
|
|
* data-flow classes and predicates.
|
|
*/
|
|
|
|
private import python
|
|
import semmle.python.dataflow.new.DataFlow::DataFlow
|
|
private import semmle.python.dataflow.new.internal.DataFlowImplSpecific
|
|
private import semmle.python.dataflow.new.internal.DataFlowDispatch
|
|
private import semmle.python.dataflow.new.internal.TaintTrackingImplSpecific
|
|
private import codeql.dataflow.internal.DataFlowImplConsistency
|
|
private import semmle.python.controlflow.internal.Cfg as Cfg
|
|
|
|
private module Input implements InputSig<Location, PythonDataFlow> {
|
|
private import Private
|
|
private import Public
|
|
|
|
predicate postWithInFlowExclude(Node n) { n instanceof FlowSummaryNode }
|
|
|
|
predicate uniqueNodeLocationExclude(Node n) { n instanceof FlowSummaryNode }
|
|
|
|
predicate missingLocationExclude(Node n) { n instanceof FlowSummaryNode }
|
|
|
|
predicate argHasPostUpdateExclude(ArgumentNode n) {
|
|
// TODO: Implement post-updates for *args, see tests added in https://github.com/github/codeql/pull/14936
|
|
exists(ArgumentPosition apos | n.argumentOf(_, apos) and apos.isStarArgs(_))
|
|
or
|
|
// TODO: Implement post-updates for **kwargs, see tests added in https://github.com/github/codeql/pull/14936
|
|
exists(ArgumentPosition apos | n.argumentOf(_, apos) and apos.isDictSplat())
|
|
or
|
|
missingArgumentCallExclude(n)
|
|
}
|
|
|
|
predicate reverseReadExclude(Node n) {
|
|
// since `self`/`cls` parameters can be marked as implicit argument to `super()`,
|
|
// they will have PostUpdateNodes. We have a read-step from the synthetic `**kwargs`
|
|
// parameter, but dataflow-consistency queries should _not_ complain about there not
|
|
// being a post-update node for the synthetic `**kwargs` parameter.
|
|
n instanceof SynthDictSplatParameterNode
|
|
}
|
|
|
|
predicate uniqueParameterNodePositionExclude(DataFlowCallable c, ParameterPosition pos, Node p) {
|
|
// For normal parameters that can both be passed as positional arguments or keyword
|
|
// arguments, we currently have parameter positions for both cases..
|
|
//
|
|
// TODO: Figure out how bad breaking this consistency check is
|
|
exists(Function func, Parameter param |
|
|
c.getScope() = func and
|
|
p = parameterNode(param) and
|
|
c.getParameter(pos) = p and
|
|
param = func.getArg(_) and
|
|
param = func.getArgByName(_)
|
|
)
|
|
}
|
|
|
|
predicate uniqueEnclosingCallableExclude(Node n) {
|
|
// We only have a selection of valid callables.
|
|
// For instance, we do not have classes as `DataFlowCallable`s.
|
|
not n.(SynthCaptureNode).getSynthesizedCaptureNode().getEnclosingCallable() instanceof Function and
|
|
not n.(SynthCaptureNode).getSynthesizedCaptureNode().getEnclosingCallable() instanceof Module
|
|
}
|
|
|
|
predicate uniqueCallEnclosingCallableExclude(DataFlowCall call) {
|
|
not exists(call.getLocation().getFile().getRelativePath())
|
|
}
|
|
|
|
predicate identityLocalStepExclude(Node n) {
|
|
not exists(n.getLocation().getFile().getRelativePath())
|
|
}
|
|
|
|
predicate multipleArgumentCallExclude(ArgumentNode arg, DataFlowCall call) {
|
|
// since we can have multiple DataFlowCall for a CallNode (for example if it can
|
|
// resolve to multiple functions), but we only make _one_ ArgumentNode for each
|
|
// argument in the CallNode, we end up violating this consistency check in those
|
|
// cases. (see `getCallArg` in DataFlowDispatch.qll)
|
|
exists(DataFlowCall other, Cfg::CallNode cfgCall | other != call |
|
|
call.getNode() = cfgCall and
|
|
other.getNode() = cfgCall and
|
|
isArgumentNode(arg, call, _) and
|
|
isArgumentNode(arg, other, _)
|
|
)
|
|
or
|
|
// bound methods that refer to the same self argument.
|
|
// Example: In `bm = self.foo; bm(); bm()` both bm() calls use the same `self` as
|
|
// the (pos self) argument
|
|
exists(AttrRead attr, DataFlowCall other | other != call |
|
|
// for simple cases we can track the function back to the attr read but when the
|
|
// call appears in the body of a list-comprehension, we can't do that, and simply
|
|
// allow it instead.
|
|
(
|
|
call.getScope() = attr.getScope() and
|
|
any(CfgNode n | n.asCfgNode() = call.getNode().(Cfg::CallNode).getFunction())
|
|
.getALocalSource() = attr
|
|
or
|
|
not exists(call.getScope().(Function).getDefinition()) and
|
|
call.getScope().getScope+() = attr.getScope()
|
|
) and
|
|
(
|
|
other.getScope() = attr.getScope() and
|
|
any(CfgNode n | n.asCfgNode() = other.getNode().(Cfg::CallNode).getFunction())
|
|
.getALocalSource() = attr
|
|
or
|
|
not exists(other.getScope().(Function).getDefinition()) and
|
|
other.getScope().getScope+() = attr.getScope()
|
|
) and
|
|
arg = attr.getObject() and
|
|
arg = call.getArgument(any(ArgumentPosition p | p.isSelf())) and
|
|
arg = other.getArgument(any(ArgumentPosition p | p.isSelf()))
|
|
)
|
|
or
|
|
// `f = getattr(obj, "foo"); f()` where `obj` is used as (pos self) argument for
|
|
// `f()` call
|
|
exists(DataFlowCall getAttrCall, DataFlowCall methodCall, AttrRead attr |
|
|
call in [getAttrCall, methodCall]
|
|
|
|
|
arg = getAttrCall.getArgument(any(ArgumentPosition p | p.isPositional(0))) and
|
|
arg = methodCall.getArgument(any(ArgumentPosition p | p.isSelf())) and
|
|
attr.getObject() = arg and
|
|
attr.(CfgNode).getNode() = getAttrCall.getNode()
|
|
)
|
|
or
|
|
// In the code `super(Base, self).foo()` we use `self` as an argument in both the
|
|
// super() call (pos 1) and in the .foo() call (pos self).
|
|
exists(DataFlowCall superCall, DataFlowCall methodCall | call in [superCall, methodCall] |
|
|
exists(superCallTwoArgumentTracker(_, arg)) and
|
|
arg = superCall.getArgument(any(ArgumentPosition p | p.isPositional(1))) and
|
|
arg = methodCall.getArgument(any(ArgumentPosition p | p.isSelf()))
|
|
)
|
|
or
|
|
// in the code `def func(self): super().foo(); super.bar()` we use `self` as the
|
|
// (pos self) argument in both .foo() and .bar() calls.
|
|
exists(Function f, DataFlowCall other | other != call |
|
|
exprNode(f.getArg(0)) = arg and
|
|
call.getNode().getScope() = f and
|
|
arg = call.getArgument(any(ArgumentPosition p | p.isSelf())) and
|
|
arg = other.getArgument(any(ArgumentPosition p | p.isSelf())) and
|
|
other.getNode().getScope() = f
|
|
)
|
|
}
|
|
|
|
predicate missingArgumentCallExclude(ArgumentNode arg) {
|
|
// We overapproximate the argument nodes in order to not rely on the global `getCallArg`
|
|
// predicate.
|
|
// Because of this, we must exclude the cases where we have an approximation but no actual
|
|
// argument node.
|
|
arg = getCallArgApproximation() and not getCallArg(_, _, _, arg, _)
|
|
or
|
|
// Likewise, capturing closure arguments do not have corresponding argument nodes in some cases.
|
|
arg instanceof SynthCapturedVariablesArgumentNode and
|
|
not arg.argumentOf(_, _)
|
|
}
|
|
}
|
|
|
|
import MakeConsistency<Location, PythonDataFlow, PythonTaintTracking, Input>
|