mirror of
https://github.com/github/codeql.git
synced 2026-07-03 18:45:34 +02:00
Compare commits
3 Commits
yoff/pytho
...
python-two
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
cdd557f877 | ||
|
|
2bf6031c0f | ||
|
|
a5444b573a |
@@ -1,2 +0,0 @@
|
||||
import semmle.python.controlflow.internal.AstNodeImpl
|
||||
import ControlFlow::Consistency
|
||||
@@ -1,4 +0,0 @@
|
||||
---
|
||||
category: minorAnalysis
|
||||
---
|
||||
* A new Python control flow graph implementation has been added under `semmle.python.controlflow.internal.Cfg` (backed by `AstNodeImpl.qll`), built on the shared `codeql.controlflow.ControlFlowGraph` library. It is not yet used by the dataflow library or any production query; the legacy CFG in `semmle/python/Flow.qll` remains the default. The new library is exposed for tests and for upcoming migrations.
|
||||
@@ -1,4 +0,0 @@
|
||||
---
|
||||
category: minorAnalysis
|
||||
---
|
||||
* A new SSA adapter has been added under `semmle.python.dataflow.new.internal.SsaImpl`, built on the shared `codeql.ssa.Ssa` library and the new shared CFG (`semmle.python.controlflow.internal.Cfg`). It is not yet used by the dataflow library or any production query; the legacy ESSA SSA in `semmle/python/essa/*` remains the default. The new SSA adapter is exposed for tests and for the upcoming dataflow migration.
|
||||
@@ -1,4 +0,0 @@
|
||||
---
|
||||
category: minorAnalysis
|
||||
---
|
||||
* The new (shared-CFG-based) Python control flow graph now visits parameter and return type annotations as CFG nodes for function definitions, matching the legacy CFG. This restores annotation-based type tracking through framework models such as FastAPI's `Depends()`, Pydantic request models, Starlette `WebSocket` handlers, and any other models that flow a class reference through `Parameter.getAnnotation()` to identify instances of the annotated class.
|
||||
@@ -0,0 +1,5 @@
|
||||
---
|
||||
category: minorAnalysis
|
||||
---
|
||||
|
||||
- Temporarily disabled the `instanceFieldStep` disjunct of the internal `TypeTrackingInput::levelStepCall` predicate, which was introduced in 7.2.0 and caused catastrophic query slowdowns on some OOP-heavy Python codebases (e.g. `mypy` and `dask`).
|
||||
@@ -1,42 +0,0 @@
|
||||
/**
|
||||
* @name Print CFG
|
||||
* @description Produces a representation of a file's Control Flow Graph.
|
||||
* This query is used by the VS Code extension.
|
||||
* @id py/print-cfg
|
||||
* @kind graph
|
||||
* @tags ide-contextual-queries/print-cfg
|
||||
*/
|
||||
|
||||
import semmle.python.Files as Files
|
||||
// import semmle.python.Scope
|
||||
import semmle.python.controlflow.internal.AstNodeImpl
|
||||
|
||||
external string selectedSourceFile();
|
||||
|
||||
private predicate selectedSourceFileAlias = selectedSourceFile/0;
|
||||
|
||||
external int selectedSourceLine();
|
||||
|
||||
private predicate selectedSourceLineAlias = selectedSourceLine/0;
|
||||
|
||||
external int selectedSourceColumn();
|
||||
|
||||
private predicate selectedSourceColumnAlias = selectedSourceColumn/0;
|
||||
|
||||
module ViewCfgQueryInput implements ControlFlow::ViewCfgQueryInputSig<Files::File> {
|
||||
predicate selectedSourceFile = selectedSourceFileAlias/0;
|
||||
|
||||
predicate selectedSourceLine = selectedSourceLineAlias/0;
|
||||
|
||||
predicate selectedSourceColumn = selectedSourceColumnAlias/0;
|
||||
|
||||
predicate cfgScopeSpan(
|
||||
Ast::Callable scope, Files::File file, int startLine, int startColumn, int endLine,
|
||||
int endColumn
|
||||
) {
|
||||
file = scope.getLocation().getFile() and
|
||||
scope.getLocation().hasLocationInfo(_, startLine, startColumn, endLine, endColumn)
|
||||
}
|
||||
}
|
||||
|
||||
import ControlFlow::ViewCfgQuery<Files::File, ViewCfgQueryInput>
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -1138,7 +1138,9 @@ predicate clearsContent(Node n, ContentSet cs) {
|
||||
* Holds if the value that is being tracked is expected to be stored inside content `c`
|
||||
* at node `n`.
|
||||
*/
|
||||
predicate expectsContent(Node n, ContentSet c) { none() }
|
||||
predicate expectsContent(Node n, ContentSet c) {
|
||||
FlowSummaryImpl::Private::Steps::summaryExpectsContent(n.(FlowSummaryNode).getSummaryNode(), c)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if values stored inside attribute `c` are cleared at node `n`.
|
||||
|
||||
@@ -91,6 +91,8 @@ module Input implements InputSig<Location, DataFlowImplSpecific::PythonDataFlow>
|
||||
cs.isAnyTupleOrDictionaryElement() and result = "AnyTupleOrDictionaryElement" and arg = ""
|
||||
}
|
||||
|
||||
string encodeWithContent(ContentSet c, string arg) { result = "With" + encodeContent(c, arg) }
|
||||
|
||||
bindingset[token]
|
||||
ParameterPosition decodeUnknownParameterPosition(AccessPath::AccessPathTokenBase token) {
|
||||
// needed to support `Argument[x..y]` ranges
|
||||
|
||||
@@ -1,547 +0,0 @@
|
||||
/**
|
||||
* Provides the Python SSA implementation built on the new (shared) CFG.
|
||||
*
|
||||
* Mirrors the Java SSA adapter at
|
||||
* `java/ql/lib/semmle/code/java/dataflow/internal/SsaImpl.qll`:
|
||||
* an `InputSig` is defined in terms of positional `(BasicBlock, int)`
|
||||
* variable references, and the shared
|
||||
* `codeql.ssa.Ssa::Make<Location, Cfg, Input>` module is then
|
||||
* instantiated.
|
||||
*
|
||||
* `SourceVariable` is the AST-level `Py::Variable`. Variable references
|
||||
* are looked up via the CFG facade's `NameNode.defines`/`uses`/`deletes`
|
||||
* predicates, which themselves are one-line bridges to AST-level
|
||||
* `Name.defines`/`uses`/`deletes`.
|
||||
*
|
||||
* Implicit-entry definitions are inserted for:
|
||||
* - non-local / global / builtin variables that are read in the scope
|
||||
* but never assigned (no enclosing CFG node defines them),
|
||||
* - captured variables (variables defined in an enclosing scope that
|
||||
* are read inside the scope), and
|
||||
* - parameters, but only if the corresponding parameter name is *not*
|
||||
* itself a CFG node. With the C#-style parameter wiring already
|
||||
* installed in `AstNodeImpl.qll`, parameter names *are* CFG nodes,
|
||||
* so the regular `variableWrite` path handles them — no `i = -1`
|
||||
* entry is needed for ordinary parameters.
|
||||
*/
|
||||
overlay[local?]
|
||||
module;
|
||||
|
||||
private import python as Py
|
||||
private import semmle.python.controlflow.internal.AstNodeImpl as CfgImpl
|
||||
private import semmle.python.controlflow.internal.Cfg as Cfg
|
||||
private import codeql.ssa.Ssa as SsaImplCommon
|
||||
private import codeql.controlflow.BasicBlock as BB
|
||||
|
||||
/**
|
||||
* Adapts the Python `Cfg` facade to the shared SSA library's `CfgSig`.
|
||||
* All members are inherited from `Cfg::ControlFlowNode` and
|
||||
* `Cfg::BasicBlock`.
|
||||
*/
|
||||
private module CfgForSsa implements BB::CfgSig<Py::Location> {
|
||||
class ControlFlowNode = CfgImpl::ControlFlowNode;
|
||||
|
||||
class BasicBlock = CfgImpl::BasicBlock;
|
||||
|
||||
class EntryBasicBlock = CfgImpl::Cfg::EntryBasicBlock;
|
||||
|
||||
predicate dominatingEdge = CfgImpl::Cfg::dominatingEdge/2;
|
||||
}
|
||||
|
||||
/**
|
||||
* A source variable for SSA, wrapping a Python AST `Variable`.
|
||||
*
|
||||
* We only track variables that are read at least once in their scope —
|
||||
* tracking write-only variables would be unnecessary work — *except*
|
||||
* for module-scope globals, where the "read" can be external (e.g.
|
||||
* `import mymodule; mymodule.x`). Such globals are tracked
|
||||
* unconditionally so that import-resolution can find their defining
|
||||
* write.
|
||||
*/
|
||||
private newtype TSsaSourceVariable =
|
||||
TPyVar(Py::Variable v) {
|
||||
// Has a use somewhere — read-relevant for SSA.
|
||||
exists(Cfg::NameNode n | n.uses(v))
|
||||
or
|
||||
// Or has a deletion (treated as a write that destroys the value).
|
||||
exists(Cfg::NameNode n | n.deletes(v))
|
||||
or
|
||||
// Or is a module-scope global written in this module — must be
|
||||
// tracked even if never read locally, because importers may read
|
||||
// it as an attribute on the module object.
|
||||
v.getScope() instanceof Py::Module and
|
||||
exists(Cfg::NameNode n | n.defines(v))
|
||||
or
|
||||
// Or is a parameter — parameters must always have a
|
||||
// `ParameterDefinition` for dataflow argument-routing to work,
|
||||
// even if the parameter is never read in its scope. Mirrors
|
||||
// legacy ESSA's `ParameterDefinition` (which fired for every
|
||||
// parameter binding regardless of liveness).
|
||||
exists(Py::Parameter p | p.asName() = v.getAStore())
|
||||
}
|
||||
|
||||
/**
|
||||
* A source variable for SSA, wrapping a Python AST `Variable`.
|
||||
*/
|
||||
class SsaSourceVariable extends TSsaSourceVariable {
|
||||
/** Gets the underlying Python AST variable. */
|
||||
Py::Variable getVariable() { this = TPyVar(result) }
|
||||
|
||||
/** Gets the (textual) name of this variable. */
|
||||
string getName() { result = this.getVariable().getId() }
|
||||
|
||||
/** Gets a textual representation of this source variable. */
|
||||
string toString() { result = this.getVariable().toString() }
|
||||
|
||||
/** Gets the location of this source variable. */
|
||||
Py::Location getLocation() { result = this.getVariable().getScope().getLocation() }
|
||||
|
||||
/** Gets the scope in which this variable lives. */
|
||||
Py::Scope getScope() { result = this.getVariable().getScope() }
|
||||
|
||||
/**
|
||||
* Gets a use of this variable as it appears in the source — a `NameNode`
|
||||
* that loads or deletes the variable. Mirrors legacy
|
||||
* `SsaSourceVariable.getASourceUse()`.
|
||||
*/
|
||||
Cfg::ControlFlowNode getASourceUse() {
|
||||
exists(Cfg::NameNode n | result = n |
|
||||
n.uses(this.getVariable()) or n.deletes(this.getVariable())
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets an implicit use of this variable. The new SSA does not have
|
||||
* implicit-use refinements, but we keep this for API parity — every
|
||||
* normal-exit of the variable's scope counts as a sink, ensuring
|
||||
* variables stay live to scope exit for taint-tracking.
|
||||
*/
|
||||
Cfg::ControlFlowNode getAnImplicitUse() {
|
||||
result.isNormalExit() and result.getScope() = this.getScope()
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a use of this variable — either an explicit source use or an
|
||||
* implicit use at scope exit. Mirrors legacy `SsaSourceVariable.getAUse()`.
|
||||
*/
|
||||
Cfg::ControlFlowNode getAUse() {
|
||||
result = this.getASourceUse() or result = this.getAnImplicitUse()
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `v` is a non-local read in scope `s`, in the sense that `s`
|
||||
* uses `v` but does not write it within `s`. This includes globals,
|
||||
* builtins, and variables captured from an enclosing function scope.
|
||||
*
|
||||
* The `Py::Variable` `v` lives in some defining scope (the module for
|
||||
* globals, an outer function for closures, etc.); the reading scope
|
||||
* `s` is the scope where the use of `v` occurs.
|
||||
*/
|
||||
private predicate nonLocalReadIn(Py::Variable v, Py::Scope s) {
|
||||
exists(Cfg::NameNode n |
|
||||
n.uses(v) and
|
||||
n.getScope() = s and
|
||||
not exists(Cfg::NameNode def | def.defines(v) and def.getScope() = s)
|
||||
) and
|
||||
// Match legacy ESSA: only create entry defs for variables that have
|
||||
// at least one defining store somewhere — otherwise the entry def
|
||||
// represents "nothing reaches here", which is the default anyway and
|
||||
// introduces no useful flow. (Legacy's `ModuleVariable` required a
|
||||
// store; this is the closure-aware generalisation.)
|
||||
exists(Cfg::NameNode store | store.defines(v))
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `bb` is the entry basic block of a scope where `v` should
|
||||
* have an implicit entry definition. This covers:
|
||||
* - non-local / global / builtin variables read in `s`, and
|
||||
* - captured variables (defined in an enclosing scope but read in `s`).
|
||||
*
|
||||
* Each reading scope gets its own entry def, so a closure variable can
|
||||
* have multiple entry defs across all functions/methods that read it.
|
||||
*
|
||||
* Parameters are *not* included: their bound `Name` is itself a CFG
|
||||
* node (per the C#-style parameter wiring), so `variableWrite` fires at
|
||||
* the parameter's natural CFG index.
|
||||
*/
|
||||
private predicate hasEntryDefIn(SsaSourceVariable v, CfgImpl::BasicBlock bb) {
|
||||
exists(Py::Scope s |
|
||||
nonLocalReadIn(v.getVariable(), s) and
|
||||
bb = entryBlock(s)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the entry basic block of scope `s`, where implicit entry
|
||||
* definitions are placed (at synthetic index `-1`).
|
||||
*/
|
||||
private CfgImpl::BasicBlock entryBlock(Py::Scope s) {
|
||||
exists(CfgImpl::ControlFlowNode entry |
|
||||
entry instanceof CfgImpl::ControlFlow::EntryNode and
|
||||
entry.getEnclosingCallable().asScope() = s and
|
||||
result = entry.getBasicBlock()
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* The SSA `InputSig` for Python. References are positional
|
||||
* `(BasicBlock, int)` pairs into the new CFG.
|
||||
*/
|
||||
private module SsaImplInput implements SsaImplCommon::InputSig<Py::Location, CfgImpl::BasicBlock> {
|
||||
class SourceVariable = SsaSourceVariable;
|
||||
|
||||
predicate variableWrite(CfgImpl::BasicBlock bb, int i, SourceVariable v, boolean certain) {
|
||||
// Explicit binding at a CFG node — includes assignments,
|
||||
// parameter Names (wired in via the C# pattern), exception-handler
|
||||
// `as`-bindings, import aliases, and match-pattern captures.
|
||||
exists(Cfg::NameNode n |
|
||||
bb.getNode(i) = n and
|
||||
n.defines(v.getVariable()) and
|
||||
certain = true
|
||||
)
|
||||
or
|
||||
// `del x` — removes the binding. Modelled as a certain write that
|
||||
// makes any subsequent read invalid.
|
||||
exists(Cfg::NameNode n |
|
||||
bb.getNode(i) = n and
|
||||
n.deletes(v.getVariable()) and
|
||||
certain = true
|
||||
)
|
||||
or
|
||||
// Implicit entry definition for non-local / captured / global /
|
||||
// builtin variables read in some scope. Each reading scope's entry
|
||||
// block gets one such write, allowing closures: e.g. when `x` is a
|
||||
// parameter of an outer function and read inside a nested
|
||||
// function, both scopes get entry defs for `x`.
|
||||
hasEntryDefIn(v, bb) and
|
||||
i = -1 and
|
||||
certain = true
|
||||
or
|
||||
// `from X import *` — possibly rebinds every name in the importing
|
||||
// scope. Modelled as an uncertain write at the import-star's CFG
|
||||
// position for every variable that lives in (or is referenced
|
||||
// from) the same scope as the import-star. Mirrors legacy ESSA's
|
||||
// `ImportStarRefinement` (see `essa/SsaDefinitions.qll`'s
|
||||
// `import_star_refinement` predicate). The write is uncertain so
|
||||
// that prior definitions of the variable remain available — the
|
||||
// shared-SSA `SsaUncertainWrite` merges the new value with the
|
||||
// immediately preceding definition.
|
||||
exists(Cfg::ImportStarNode imp |
|
||||
bb.getNode(i) = imp and
|
||||
certain = false and
|
||||
(
|
||||
v.getVariable().getScope() = imp.getScope()
|
||||
or
|
||||
// Variable is defined in some other scope but referenced in
|
||||
// the same scope as the import-star (matches legacy clause 2:
|
||||
// `other.uses(v) and def.getScope() = other.getScope()`).
|
||||
exists(Cfg::NameNode other |
|
||||
other.uses(v.getVariable()) and
|
||||
imp.getScope() = other.getScope()
|
||||
)
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
predicate variableRead(CfgImpl::BasicBlock bb, int i, SourceVariable v, boolean certain) {
|
||||
// Explicit source use — a `Name` load or a `del x` of the variable.
|
||||
exists(Cfg::NameNode n |
|
||||
bb.getNode(i) = n and
|
||||
n.uses(v.getVariable()) and
|
||||
certain = true
|
||||
)
|
||||
or
|
||||
// Synthetic use at the normal exit of the variable's defining scope.
|
||||
// This keeps every variable live to scope exit so that callers (e.g.
|
||||
// `module_export` in ImportResolution.qll, or taint-tracking pass-through
|
||||
// through unread locals) can ask "which definition reaches end of
|
||||
// scope?". Mirrors legacy ESSA's `SsaSourceVariable.getAUse()` which
|
||||
// included `getScope().getANormalExit()`.
|
||||
exists(Cfg::ControlFlowNode exit |
|
||||
exit.isNormalExit() and
|
||||
exit.getScope() = v.getVariable().getScope() and
|
||||
bb.getNode(i) = exit and
|
||||
certain = true
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* The shared SSA instantiation for Python.
|
||||
*
|
||||
* Members:
|
||||
* - `Definition` — the union of explicit, uncertain, and phi definitions
|
||||
* - `WriteDefinition`, `UncertainWriteDefinition`, `PhiNode`
|
||||
* - the standard SSA predicates (`getAUse`, `getAnUltimateDefinition`, ...).
|
||||
*/
|
||||
module Ssa = SsaImplCommon::Make<Py::Location, CfgForSsa, SsaImplInput>;
|
||||
|
||||
final class Definition = Ssa::Definition;
|
||||
|
||||
final class WriteDefinition = Ssa::WriteDefinition;
|
||||
|
||||
final class UncertainWriteDefinition = Ssa::UncertainWriteDefinition;
|
||||
|
||||
final class PhiNode = Ssa::PhiNode;
|
||||
|
||||
// ===========================================================================
|
||||
// ESSA-shaped adapter layer
|
||||
//
|
||||
// The dataflow library (`python/ql/lib/semmle/python/dataflow/new/`) and
|
||||
// related modules (`ApiGraphs.qll`, etc.) consume the legacy ESSA API
|
||||
// (`EssaVariable`, `EssaDefinition`, `AssignmentDefinition`,
|
||||
// `ScopeEntryDefinition`, `ParameterDefinition`, `WithDefinition`,
|
||||
// `PhiFunction`, plus the `AdjacentUses` module). To migrate them off
|
||||
// the legacy CFG, we expose the same API surface on top of the
|
||||
// shared SSA built above.
|
||||
//
|
||||
// This adapter is intentionally narrow: it covers only the predicates
|
||||
// that new dataflow consumes. The richer legacy ESSA — refinement
|
||||
// nodes, attribute refinements, edge refinements — stays available
|
||||
// via `semmle.python.essa.Essa` for points-to / legacy code.
|
||||
// ===========================================================================
|
||||
/**
|
||||
* Gets the CFG node at which a write definition's binding takes place.
|
||||
*
|
||||
* For ordinary writes (assignment, deletion, parameter) this is the
|
||||
* canonical CFG node of the bound Name. For implicit entry definitions
|
||||
* (synthesised at position `-1` of a scope's entry BB) this is the
|
||||
* scope's entry node.
|
||||
*/
|
||||
private Cfg::ControlFlowNode writeDefNode(Ssa::WriteDefinition def) {
|
||||
exists(CfgImpl::BasicBlock bb, int i | def.definesAt(_, bb, i) |
|
||||
i >= 0 and result = bb.getNode(i)
|
||||
or
|
||||
i = -1 and result = bb.getNode(0)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* A write definition whose binding has a corresponding CFG node — i.e.
|
||||
* everything that's not a phi node. Mirrors legacy ESSA's
|
||||
* `EssaNodeDefinition`.
|
||||
*/
|
||||
class EssaNodeDefinition extends Ssa::WriteDefinition {
|
||||
/** Gets the CFG node where this definition's binding takes place. */
|
||||
Cfg::ControlFlowNode getDefiningNode() { result = writeDefNode(this) }
|
||||
|
||||
/** Gets the variable defined here (legacy name). */
|
||||
SsaSourceVariable getVariable() { result = this.getSourceVariable() }
|
||||
|
||||
/** Gets the enclosing scope. */
|
||||
Py::Scope getScope() {
|
||||
exists(Cfg::ControlFlowNode n | n = this.getDefiningNode() | result = n.getScope())
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if this definition defines source variable `v` at CFG node
|
||||
* `defNode`. Flatter form of `getSourceVariable()` +
|
||||
* `getDefiningNode()`, matching legacy ESSA's `definedBy`.
|
||||
*/
|
||||
predicate definedBy(SsaSourceVariable v, Cfg::ControlFlowNode defNode) {
|
||||
v = this.getSourceVariable() and defNode = this.getDefiningNode()
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* An assignment definition: any binding where the value being assigned
|
||||
* is statically known via `Cfg::DefinitionNode.getValue()`. Includes
|
||||
* plain assignments, walrus, annotated assignments, augmented
|
||||
* assignments, import aliases (`import x` / `from m import x [as y]`),
|
||||
* `with ... as x`, and for-target bindings (where `getValue()` returns
|
||||
* the iter expression's CFG node). Excludes parameter bindings —
|
||||
* those are modelled by `ParameterDefinition`.
|
||||
*/
|
||||
class AssignmentDefinition extends EssaNodeDefinition {
|
||||
AssignmentDefinition() {
|
||||
exists(Cfg::NameNode n | n = this.getDefiningNode() |
|
||||
exists(n.(Cfg::DefinitionNode).getValue()) and
|
||||
not n.(Cfg::ControlFlowNode).isParameter()
|
||||
)
|
||||
}
|
||||
|
||||
/** Gets the CFG node for the value being assigned, if statically known. */
|
||||
Cfg::ControlFlowNode getValue() {
|
||||
result = this.getDefiningNode().(Cfg::DefinitionNode).getValue()
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A parameter definition — the binding of a parameter name in a
|
||||
* function's scope.
|
||||
*/
|
||||
class ParameterDefinition extends EssaNodeDefinition {
|
||||
ParameterDefinition() { this.getDefiningNode().isParameter() }
|
||||
|
||||
/** Gets the AST `Parameter` (a `Py::Name` in param context). */
|
||||
Py::Name getParameter() { result = this.getDefiningNode().getNode() }
|
||||
}
|
||||
|
||||
/**
|
||||
* A definition introduced by a `with ... as x:` clause.
|
||||
*/
|
||||
class WithDefinition extends EssaNodeDefinition {
|
||||
WithDefinition() {
|
||||
exists(Cfg::NameNode n, Py::With w |
|
||||
n = this.getDefiningNode() and
|
||||
w.getOptionalVars() = n.getNode()
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* An assignment where the LHS is a tuple/list and the RHS is unpacked:
|
||||
* `a, b = (1, 2)` or `a, *rest = xs`. The SSA def lives at the inner
|
||||
* `Name` CFG node, but for IterableUnpacking integration we expose
|
||||
* the enclosing `StarredNode` as the `getDefiningNode()` for `*rest`
|
||||
* patterns — mirroring legacy ESSA's `multi_assignment_definition`,
|
||||
* which placed the def at the StarredNode CFG node.
|
||||
*/
|
||||
class MultiAssignmentDefinition extends EssaNodeDefinition {
|
||||
MultiAssignmentDefinition() {
|
||||
exists(Cfg::NameNode n | n = super.getDefiningNode() |
|
||||
exists(Py::Assign a, Py::Expr lhs |
|
||||
a.getATarget() = lhs and
|
||||
(lhs instanceof Py::Tuple or lhs instanceof Py::List) and
|
||||
lhs.getASubExpression+() = n.getNode()
|
||||
)
|
||||
or
|
||||
// For-loop with tuple/list target: `for a, b in xs:` —
|
||||
// tuple-unpacking semantics applies to the for-target.
|
||||
exists(Py::For f, Py::Expr lhs |
|
||||
f.getTarget() = lhs and
|
||||
(lhs instanceof Py::Tuple or lhs instanceof Py::List) and
|
||||
lhs.getASubExpression+() = n.getNode()
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
override Cfg::ControlFlowNode getDefiningNode() {
|
||||
// Default: the underlying `Name` CFG node (where the SSA def lives).
|
||||
not exists(Cfg::StarredNode s |
|
||||
s.getNode().(Py::Starred).getValue() = super.getDefiningNode().getNode()
|
||||
) and
|
||||
result = super.getDefiningNode()
|
||||
or
|
||||
// Exception: for `*rest`, expose the enclosing `Starred` CFG node
|
||||
// so that `IterableUnpacking::iterableUnpackingStarredElementStoreStep`
|
||||
// can attach the rest-list to it.
|
||||
exists(Cfg::StarredNode s |
|
||||
s.getNode().(Py::Starred).getValue() = super.getDefiningNode().getNode()
|
||||
|
|
||||
result = s
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* An implicit entry definition for a non-local / captured / global /
|
||||
* builtin variable read in a scope but not defined there.
|
||||
*
|
||||
* Inherits from `EssaNodeDefinition` and exposes the scope's entry node
|
||||
* as its defining node (matching legacy ESSA semantics).
|
||||
*/
|
||||
class ScopeEntryDefinition extends EssaNodeDefinition {
|
||||
ScopeEntryDefinition() {
|
||||
exists(CfgImpl::BasicBlock bb |
|
||||
this.definesAt(_, bb, -1) and
|
||||
bb instanceof CfgImpl::Cfg::EntryBasicBlock
|
||||
)
|
||||
}
|
||||
|
||||
/** Gets the enclosing scope (the scope whose entry block this def is in). */
|
||||
override Py::Scope getScope() {
|
||||
exists(CfgImpl::BasicBlock bb |
|
||||
this.definesAt(_, bb, -1) and
|
||||
result = bb.getNode(0).(Cfg::ControlFlowNode).getScope()
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/** A phi node (alias matching legacy naming). */
|
||||
class PhiFunction extends PhiNode {
|
||||
/**
|
||||
* Gets an input to this phi function (a definition that flows into
|
||||
* the phi from one of its predecessor blocks). Mirrors legacy
|
||||
* ESSA's `PhiFunction.getAnInput()`.
|
||||
*/
|
||||
Ssa::Definition getAnInput() { Ssa::phiHasInputFromBlock(this, result, _) }
|
||||
}
|
||||
|
||||
/** Base class for all ESSA definitions (legacy-shaped). */
|
||||
class EssaDefinition = Ssa::Definition;
|
||||
|
||||
/**
|
||||
* An adapter representing a single SSA-defined "variable" — wrapping
|
||||
* one `Ssa::Definition`. Mirrors legacy `EssaVariable` API.
|
||||
*/
|
||||
class EssaVariable extends Ssa::Definition {
|
||||
/** Gets the underlying SSA definition (legacy name). */
|
||||
Ssa::Definition getDefinition() { result = this }
|
||||
|
||||
/**
|
||||
* Gets a CFG node where this definition is used. Includes regular
|
||||
* `Name` reads as well as the synthetic scope-exit "use" registered
|
||||
* via `SsaImplInput::variableRead` — mirrors legacy ESSA's
|
||||
* `EssaVariable.getAUse()` which inherited the synthetic exit-use
|
||||
* from `SsaSourceVariable`.
|
||||
*/
|
||||
Cfg::ControlFlowNode getAUse() {
|
||||
exists(CfgImpl::BasicBlock bb, int i |
|
||||
Ssa::ssaDefReachesRead(this.getSourceVariable(), this, bb, i) and
|
||||
bb.getNode(i) = result
|
||||
)
|
||||
}
|
||||
|
||||
/** Gets the (textual) name of the underlying variable. */
|
||||
string getName() { result = this.getSourceVariable().getVariable().getId() }
|
||||
|
||||
/** Gets the scope in which this variable lives. */
|
||||
Py::Scope getScope() { result = this.getSourceVariable().getVariable().getScope() }
|
||||
|
||||
/** Gets an ultimate non-phi ancestor of this definition. */
|
||||
EssaVariable getAnUltimateDefinition() {
|
||||
if this instanceof PhiNode
|
||||
then
|
||||
exists(Ssa::Definition input |
|
||||
Ssa::phiHasInputFromBlock(this, input, _) and
|
||||
result = input.(EssaVariable).getAnUltimateDefinition()
|
||||
)
|
||||
else result = this
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Adjacent use-use and def-use relations exposed by the shared SSA
|
||||
* library. Provides the same interface as legacy
|
||||
* `semmle.python.essa.SsaCompute::AdjacentUses`.
|
||||
*/
|
||||
module AdjacentUses {
|
||||
/** Holds if `nodeFrom` and `nodeTo` are adjacent uses of the same SSA variable. */
|
||||
predicate adjacentUseUse(Cfg::NameNode nodeFrom, Cfg::NameNode nodeTo) {
|
||||
exists(SsaSourceVariable v, CfgImpl::BasicBlock bb1, int i1, CfgImpl::BasicBlock bb2, int i2 |
|
||||
Ssa::adjacentUseUse(bb1, i1, bb2, i2, v, _) and
|
||||
nodeFrom = bb1.getNode(i1) and
|
||||
nodeTo = bb2.getNode(i2)
|
||||
)
|
||||
}
|
||||
|
||||
/** Holds if `use` is a first use of definition `def`. */
|
||||
predicate firstUse(Ssa::Definition def, Cfg::NameNode use) {
|
||||
exists(CfgImpl::BasicBlock bb, int i |
|
||||
Ssa::firstUse(def, bb, i, _) and
|
||||
use = bb.getNode(i)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `use` is any reachable use of definition `def`. Combines
|
||||
* `firstUse` with transitive use-use adjacency.
|
||||
*/
|
||||
predicate useOfDef(Ssa::Definition def, Cfg::NameNode use) {
|
||||
firstUse(def, use)
|
||||
or
|
||||
exists(Cfg::NameNode mid | useOfDef(def, mid) and adjacentUseUse(mid, use))
|
||||
}
|
||||
}
|
||||
@@ -170,7 +170,13 @@ module TypeTrackingInput implements Shared::TypeTrackingInput<Location> {
|
||||
|
||||
/** Holds if there is a level step from `nodeFrom` to `nodeTo`, which may depend on the call graph. */
|
||||
predicate levelStepCall(Node nodeFrom, LocalSourceNode nodeTo) {
|
||||
instanceFieldStep(nodeFrom, nodeTo)
|
||||
// HOTFIX: `instanceFieldStep` is temporarily disabled (via `and none()`).
|
||||
// It uses `classInstanceTracker(cls)` -- itself a type-tracker run --
|
||||
// from inside `levelStepCall`, creating a structural mutual recursion
|
||||
// that causes catastrophic query slowdowns on some OOP-heavy Python
|
||||
// codebases (e.g. mypy and dask). The `and none()` should be removed
|
||||
// once that recursion is redesigned.
|
||||
instanceFieldStep(nodeFrom, nodeTo) and none()
|
||||
or
|
||||
inheritedFieldStep(nodeFrom, nodeTo)
|
||||
}
|
||||
|
||||
@@ -4199,11 +4199,9 @@ module StdlibPrivate {
|
||||
// The positional argument contains a mapping.
|
||||
// TODO: these values can be overwritten by keyword arguments
|
||||
// - dict mapping
|
||||
exists(DataFlow::DictionaryElementContent dc, string key | key = dc.getKey() |
|
||||
input = "Argument[0].DictionaryElement[" + key + "]" and
|
||||
output = "ReturnValue.DictionaryElement[" + key + "]" and
|
||||
preservesValue = true
|
||||
)
|
||||
input = "Argument[0].WithAnyDictionaryElement" and
|
||||
output = "ReturnValue" and
|
||||
preservesValue = true
|
||||
or
|
||||
// - list-of-pairs mapping
|
||||
input = "Argument[0].ListElement.TupleElement[1]" and
|
||||
@@ -4240,9 +4238,7 @@ module StdlibPrivate {
|
||||
or
|
||||
input = "Argument[0].SetElement"
|
||||
or
|
||||
exists(DataFlow::TupleElementContent tc, int i | i = tc.getIndex() |
|
||||
input = "Argument[0].TupleElement[" + i.toString() + "]"
|
||||
)
|
||||
input = "Argument[0].AnyTupleElement"
|
||||
// TODO: Once we have DictKeyContent, we need to transform that into ListElementContent
|
||||
) and
|
||||
// Element content is mutated into list element content
|
||||
@@ -4266,11 +4262,9 @@ module StdlibPrivate {
|
||||
}
|
||||
|
||||
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
|
||||
exists(DataFlow::TupleElementContent tc, int i | i = tc.getIndex() |
|
||||
input = "Argument[0].TupleElement[" + i.toString() + "]" and
|
||||
output = "ReturnValue.TupleElement[" + i.toString() + "]" and
|
||||
preservesValue = true
|
||||
)
|
||||
input = "Argument[0].WithAnyTupleElement" and
|
||||
output = "ReturnValue" and
|
||||
preservesValue = true
|
||||
or
|
||||
input = "Argument[0].ListElement" and
|
||||
output = "ReturnValue" and
|
||||
@@ -4294,9 +4288,7 @@ module StdlibPrivate {
|
||||
or
|
||||
input = "Argument[0].SetElement"
|
||||
or
|
||||
exists(DataFlow::TupleElementContent tc, int i | i = tc.getIndex() |
|
||||
input = "Argument[0].TupleElement[" + i.toString() + "]"
|
||||
)
|
||||
input = "Argument[0].AnyTupleElement"
|
||||
// TODO: Once we have DictKeyContent, we need to transform that into ListElementContent
|
||||
) and
|
||||
output = "ReturnValue.SetElement" and
|
||||
@@ -4342,9 +4334,7 @@ module StdlibPrivate {
|
||||
or
|
||||
input = "Argument[0].SetElement"
|
||||
or
|
||||
exists(DataFlow::TupleElementContent tc, int i | i = tc.getIndex() |
|
||||
input = "Argument[0].TupleElement[" + i.toString() + "]"
|
||||
)
|
||||
input = "Argument[0].AnyTupleElement"
|
||||
// TODO: Once we have DictKeyContent, we need to transform that into ListElementContent
|
||||
) and
|
||||
output = "ReturnValue.ListElement" and
|
||||
@@ -4372,9 +4362,7 @@ module StdlibPrivate {
|
||||
or
|
||||
content = "SetElement"
|
||||
or
|
||||
exists(DataFlow::TupleElementContent tc, int i | i = tc.getIndex() |
|
||||
content = "TupleElement[" + i.toString() + "]"
|
||||
)
|
||||
content = "AnyTupleElement"
|
||||
|
|
||||
// TODO: Once we have DictKeyContent, we need to transform that into ListElementContent
|
||||
input = "Argument[0]." + content and
|
||||
@@ -4404,9 +4392,7 @@ module StdlibPrivate {
|
||||
or
|
||||
input = "Argument[0].SetElement"
|
||||
or
|
||||
exists(DataFlow::TupleElementContent tc, int i | i = tc.getIndex() |
|
||||
input = "Argument[0].TupleElement[" + i.toString() + "]"
|
||||
)
|
||||
input = "Argument[0].AnyTupleElement"
|
||||
// TODO: Once we have DictKeyContent, we need to transform that into ListElementContent
|
||||
) and
|
||||
output = "ReturnValue.ListElement" and
|
||||
@@ -4434,9 +4420,7 @@ module StdlibPrivate {
|
||||
or
|
||||
input = "Argument[0].SetElement"
|
||||
or
|
||||
exists(DataFlow::TupleElementContent tc, int i | i = tc.getIndex() |
|
||||
input = "Argument[0].TupleElement[" + i.toString() + "]"
|
||||
)
|
||||
input = "Argument[0].AnyTupleElement"
|
||||
// TODO: Once we have DictKeyContent, we need to transform that into ListElementContent
|
||||
) and
|
||||
output = "ReturnValue" and
|
||||
@@ -4468,9 +4452,7 @@ module StdlibPrivate {
|
||||
// We reduce generality slightly by not tracking tuple contents on list arguments beyond the first, for performance.
|
||||
// TODO: Once we have TupleElementAny, this generality can be increased.
|
||||
i = 0 and
|
||||
exists(DataFlow::TupleElementContent tc, int j | j = tc.getIndex() |
|
||||
input = "Argument[1].TupleElement[" + j.toString() + "]"
|
||||
)
|
||||
input = "Argument[1].AnyTupleElement"
|
||||
// TODO: Once we have DictKeyContent, we need to transform that into ListElementContent
|
||||
) and
|
||||
output = "Argument[0].Parameter[" + i.toString() + "]" and
|
||||
@@ -4499,9 +4481,7 @@ module StdlibPrivate {
|
||||
or
|
||||
input = "Argument[1].SetElement"
|
||||
or
|
||||
exists(DataFlow::TupleElementContent tc, int i | i = tc.getIndex() |
|
||||
input = "Argument[1].TupleElement[" + i.toString() + "]"
|
||||
)
|
||||
input = "Argument[1].AnyTupleElement"
|
||||
// TODO: Once we have DictKeyContent, we need to transform that into ListElementContent
|
||||
) and
|
||||
(output = "Argument[0].Parameter[0]" or output = "ReturnValue.ListElement") and
|
||||
@@ -4525,9 +4505,7 @@ module StdlibPrivate {
|
||||
or
|
||||
input = "Argument[0].SetElement"
|
||||
or
|
||||
exists(DataFlow::TupleElementContent tc, int i | i = tc.getIndex() |
|
||||
input = "Argument[0].TupleElement[" + i.toString() + "]"
|
||||
)
|
||||
input = "Argument[0].AnyTupleElement"
|
||||
// TODO: Once we have DictKeyContent, we need to transform that into ListElementContent
|
||||
) and
|
||||
output = "ReturnValue.ListElement.TupleElement[1]" and
|
||||
@@ -4552,12 +4530,7 @@ module StdlibPrivate {
|
||||
or
|
||||
input = "Argument[" + i.toString() + "].SetElement"
|
||||
or
|
||||
// We reduce generality slightly by not tracking tuple contents on arguments beyond the first two, for performance.
|
||||
// TODO: Once we have TupleElementAny, this generality can be increased.
|
||||
i in [0 .. 1] and
|
||||
exists(DataFlow::TupleElementContent tc, int j | j = tc.getIndex() |
|
||||
input = "Argument[" + i.toString() + "].TupleElement[" + j.toString() + "]"
|
||||
)
|
||||
input = "Argument[" + i.toString() + "].AnyTupleElement"
|
||||
// TODO: Once we have DictKeyContent, we need to transform that into ListElementContent
|
||||
) and
|
||||
output = "ReturnValue.ListElement.TupleElement[" + i.toString() + "]" and
|
||||
@@ -4580,12 +4553,6 @@ module StdlibPrivate {
|
||||
override DataFlow::ArgumentNode getACallback() { none() }
|
||||
|
||||
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
|
||||
exists(DataFlow::Content c |
|
||||
input = "Argument[self]." + c.getMaDRepresentation() and
|
||||
output = "ReturnValue." + c.getMaDRepresentation() and
|
||||
preservesValue = true
|
||||
)
|
||||
or
|
||||
input = "Argument[self]" and
|
||||
output = "ReturnValue" and
|
||||
preservesValue = true
|
||||
@@ -4741,12 +4708,10 @@ module StdlibPrivate {
|
||||
override DataFlow::ArgumentNode getACallback() { none() }
|
||||
|
||||
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
|
||||
exists(DataFlow::DictionaryElementContent dc, string key | key = dc.getKey() |
|
||||
input = "Argument[self].DictionaryElement[" + key + "]" and
|
||||
output = "ReturnValue.TupleElement[1]" and
|
||||
preservesValue = true
|
||||
// TODO: put `key` into "ReturnValue.TupleElement[0]"
|
||||
)
|
||||
input = "Argument[self].AnyDictionaryElement" and
|
||||
output = "ReturnValue.TupleElement[1]" and
|
||||
preservesValue = true
|
||||
// TODO: put `key` into "ReturnValue.TupleElement[0]"
|
||||
}
|
||||
}
|
||||
|
||||
@@ -4825,11 +4790,9 @@ module StdlibPrivate {
|
||||
}
|
||||
|
||||
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
|
||||
exists(DataFlow::DictionaryElementContent dc, string key | key = dc.getKey() |
|
||||
input = "Argument[self].DictionaryElement[" + key + "]" and
|
||||
output = "ReturnValue.ListElement" and
|
||||
preservesValue = true
|
||||
)
|
||||
input = "Argument[self].AnyDictionaryElement" and
|
||||
output = "ReturnValue.ListElement" and
|
||||
preservesValue = true
|
||||
or
|
||||
input = "Argument[self]" and
|
||||
output = "ReturnValue" and
|
||||
@@ -4876,11 +4839,9 @@ module StdlibPrivate {
|
||||
}
|
||||
|
||||
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
|
||||
exists(DataFlow::DictionaryElementContent dc, string key | key = dc.getKey() |
|
||||
input = "Argument[self].DictionaryElement[" + key + "]" and
|
||||
output = "ReturnValue.ListElement.TupleElement[1]" and
|
||||
preservesValue = true
|
||||
)
|
||||
input = "Argument[self].AnyDictionaryElement" and
|
||||
output = "ReturnValue.ListElement.TupleElement[1]" and
|
||||
preservesValue = true
|
||||
or
|
||||
// TODO: Add the keys to output list
|
||||
input = "Argument[self]" and
|
||||
|
||||
@@ -1,4 +0,0 @@
|
||||
consistencyOverview
|
||||
| deadEnd | 1 |
|
||||
deadEnd
|
||||
| without_loop.py:7:5:7:9 | Break |
|
||||
@@ -1,32 +0,0 @@
|
||||
/**
|
||||
* Phase -1 of the dataflow CFG migration: verifies that every variable
|
||||
* binding visible to the AST (`Name.defines(v)`) corresponds to a CFG node
|
||||
* in the new CFG (`semmle.python.controlflow.internal.AstNodeImpl`).
|
||||
*
|
||||
* The expected tag is `cfgdefines=<name>`. Each binding annotation in the
|
||||
* test sources looks like `# $ cfgdefines=x` for a binding currently
|
||||
* covered by the new CFG, or `# $ MISSING: cfgdefines=x` for a binding
|
||||
* that is known to be uncovered (a "red" test case that should be
|
||||
* green-flipped once the corresponding `cfg-ext-*` extension lands).
|
||||
*/
|
||||
|
||||
import python
|
||||
import semmle.python.controlflow.internal.AstNodeImpl as CfgImpl
|
||||
import utils.test.InlineExpectationsTest
|
||||
|
||||
module CfgBindingsTest implements TestSig {
|
||||
string getARelevantTag() { result = "cfgdefines" }
|
||||
|
||||
predicate hasActualResult(Location location, string element, string tag, string value) {
|
||||
exists(Name n, Variable v, CfgImpl::ControlFlowNode cfg |
|
||||
n.defines(v) and
|
||||
cfg.getAstNode().asExpr() = n and
|
||||
location = n.getLocation() and
|
||||
element = n.toString() and
|
||||
tag = "cfgdefines" and
|
||||
value = v.getId()
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
import MakeTest<CfgBindingsTest>
|
||||
@@ -1,13 +0,0 @@
|
||||
# Annotated assignment (PEP 526). Both with and without an initializer.
|
||||
|
||||
a: int = 1 # $ cfgdefines=a
|
||||
b: str = "hi" # $ cfgdefines=b
|
||||
|
||||
# Annotation without value: the AST records `c` as defined,
|
||||
# and the new CFG now visits it via the AnnAssignStmt wrapper.
|
||||
c: int # $ cfgdefines=c
|
||||
|
||||
class K: # $ cfgdefines=K
|
||||
field: int = 0 # $ cfgdefines=field
|
||||
|
||||
|
||||
@@ -1,14 +0,0 @@
|
||||
# Compound (tuple/list) assignment targets — actually wired in the new CFG.
|
||||
|
||||
a, b = (1, 2) # $ cfgdefines=a cfgdefines=b
|
||||
[c, d] = [3, 4] # $ cfgdefines=c cfgdefines=d
|
||||
|
||||
# Nested unpacking.
|
||||
(e, (f, g)) = (1, (2, 3)) # $ cfgdefines=e cfgdefines=f cfgdefines=g
|
||||
|
||||
# Star unpacking.
|
||||
h, *i = [1, 2, 3] # $ cfgdefines=h cfgdefines=i
|
||||
|
||||
# Chained assignment with compound target.
|
||||
j = k, l = (5, 6) # $ cfgdefines=j cfgdefines=k cfgdefines=l
|
||||
|
||||
@@ -1,21 +0,0 @@
|
||||
# Comprehension and `for` loop targets — wired in the new CFG.
|
||||
# Comprehensions are nested function scopes with a synthetic `.0` parameter
|
||||
# bound to the iterable.
|
||||
|
||||
# Bare-name `for` target.
|
||||
for i in range(3): # $ cfgdefines=i
|
||||
pass
|
||||
|
||||
# Compound `for` target.
|
||||
for k, v in [(1, 2)]: # $ cfgdefines=k cfgdefines=v
|
||||
pass
|
||||
|
||||
# Comprehension targets.
|
||||
_ = [x for x in range(3)] # $ cfgdefines=_ cfgdefines=x cfgdefines=.0
|
||||
_ = {y: z for y, z in []} # $ cfgdefines=_ cfgdefines=y cfgdefines=z cfgdefines=.0
|
||||
_ = (a for a in []) # $ cfgdefines=_ cfgdefines=a cfgdefines=.0
|
||||
|
||||
# Nested comprehensions.
|
||||
_ = [b for c in [] for b in c] # $ cfgdefines=_ cfgdefines=c cfgdefines=b cfgdefines=.0
|
||||
|
||||
|
||||
@@ -1,53 +0,0 @@
|
||||
# Reachability of code following a try whose body always returns.
|
||||
#
|
||||
# The new CFG models exception edges for raise-prone expressions when
|
||||
# they appear inside a `try` (or `with`) statement, mirroring Java's
|
||||
# `mayThrow`. This means the body of a `try` has both a normal
|
||||
# completion edge and an exception edge to its handlers, so code
|
||||
# following the try-statement is reachable via the except-handler path
|
||||
# even when the try-body would otherwise always return.
|
||||
#
|
||||
# Code that is not reachable under either normal or exception flow
|
||||
# (for example, the `else` clause of a try whose body unconditionally
|
||||
# raises) remains correctly classified as dead.
|
||||
|
||||
|
||||
def f(obj): # $ cfgdefines=f cfgdefines=obj
|
||||
try:
|
||||
return len(obj)
|
||||
except TypeError:
|
||||
pass
|
||||
|
||||
# The try-body always returns, but `len(obj)` can raise (it is
|
||||
# inside the try, so we model its exception edge). The
|
||||
# `except TypeError: pass` handler falls through to here, making
|
||||
# the code below reachable.
|
||||
try:
|
||||
hint = type(obj).__length_hint__ # $ cfgdefines=hint
|
||||
except AttributeError:
|
||||
return None
|
||||
return hint
|
||||
|
||||
|
||||
def g(): # $ cfgdefines=g
|
||||
try:
|
||||
raise Exception("inner")
|
||||
except:
|
||||
raise Exception("outer")
|
||||
else:
|
||||
# Unreachable: the inner try body always raises (via an explicit
|
||||
# `raise`, which is modelled unconditionally), so the `else:`
|
||||
# clause never runs.
|
||||
hit_inner_else = True
|
||||
|
||||
|
||||
def h(cache, key): # $ cfgdefines=h cfgdefines=cache cfgdefines=key
|
||||
try:
|
||||
return cache[key]
|
||||
except KeyError:
|
||||
pass
|
||||
|
||||
# Same pattern as `f`: reachable via the except-handler fall-through.
|
||||
value = compute(key) # $ cfgdefines=value
|
||||
cache[key] = value
|
||||
return value
|
||||
@@ -1,30 +0,0 @@
|
||||
# Decorated `def`/`class` — wired in the new CFG.
|
||||
|
||||
|
||||
def deco(f): # $ cfgdefines=deco cfgdefines=f
|
||||
return f
|
||||
|
||||
|
||||
@deco
|
||||
def decorated_func(): # $ cfgdefines=decorated_func
|
||||
pass
|
||||
|
||||
|
||||
@deco
|
||||
class DecoratedClass: # $ cfgdefines=DecoratedClass
|
||||
pass
|
||||
|
||||
|
||||
# Stacked decorators.
|
||||
@deco
|
||||
@deco
|
||||
def doubly(): # $ cfgdefines=doubly
|
||||
pass
|
||||
|
||||
|
||||
# Inside a class body.
|
||||
class Outer: # $ cfgdefines=Outer
|
||||
@staticmethod
|
||||
def inner(): # $ cfgdefines=inner
|
||||
pass
|
||||
|
||||
@@ -1,19 +0,0 @@
|
||||
# Exception-handler name bindings. These are already wired in the new
|
||||
# CFG provided the try body can raise; `raise` statements are reliably
|
||||
# treated as exception sources.
|
||||
|
||||
try:
|
||||
raise ValueError("oops")
|
||||
except ValueError as e: # $ cfgdefines=e
|
||||
pass
|
||||
|
||||
try:
|
||||
raise TypeError("oops")
|
||||
except (TypeError, KeyError) as err: # $ cfgdefines=err
|
||||
pass
|
||||
|
||||
# Exception groups (Python 3.11+).
|
||||
try:
|
||||
raise ValueError("oops")
|
||||
except* ValueError as eg: # $ cfgdefines=eg
|
||||
pass
|
||||
@@ -1,14 +0,0 @@
|
||||
# Import aliases — all bound names below are now reachable via the new
|
||||
# CFG's `ImportStmt` wrapper.
|
||||
|
||||
import os # $ cfgdefines=os
|
||||
import os.path # $ cfgdefines=os
|
||||
import os as o # $ cfgdefines=o
|
||||
from os import path # $ cfgdefines=path
|
||||
from os import path as p # $ cfgdefines=p
|
||||
from os import sep, linesep # $ cfgdefines=sep cfgdefines=linesep
|
||||
from os import (
|
||||
getcwd, # $ cfgdefines=getcwd
|
||||
getcwdb, # $ cfgdefines=getcwdb
|
||||
)
|
||||
|
||||
@@ -1,24 +0,0 @@
|
||||
# Match-statement pattern bindings — wired in the new CFG.
|
||||
|
||||
def f(subject): # $ cfgdefines=f cfgdefines=subject
|
||||
match subject:
|
||||
case x: # $ cfgdefines=x
|
||||
pass
|
||||
case [a, b]: # $ cfgdefines=a cfgdefines=b
|
||||
pass
|
||||
case {"k": v}: # $ cfgdefines=v
|
||||
pass
|
||||
case Point(p, q): # $ cfgdefines=p cfgdefines=q
|
||||
pass
|
||||
case [_, *rest]: # $ cfgdefines=rest
|
||||
pass
|
||||
case (1 | 2) as n: # $ cfgdefines=n
|
||||
pass
|
||||
|
||||
|
||||
class Point: # $ cfgdefines=Point
|
||||
__match_args__ = ("x", "y") # $ cfgdefines=__match_args__
|
||||
x: int # $ cfgdefines=x
|
||||
y: int # $ cfgdefines=y
|
||||
|
||||
|
||||
@@ -1,42 +0,0 @@
|
||||
# Function parameters.
|
||||
|
||||
def positional(a, b): # $ cfgdefines=positional cfgdefines=a cfgdefines=b
|
||||
pass
|
||||
|
||||
|
||||
def with_default(x=1, y=2): # $ cfgdefines=with_default cfgdefines=x cfgdefines=y
|
||||
pass
|
||||
|
||||
|
||||
def with_vararg(*args): # $ cfgdefines=with_vararg cfgdefines=args
|
||||
pass
|
||||
|
||||
|
||||
def with_kwarg(**kwargs): # $ cfgdefines=with_kwarg cfgdefines=kwargs
|
||||
pass
|
||||
|
||||
|
||||
def with_kwonly(*, k1, k2=5): # $ cfgdefines=with_kwonly cfgdefines=k1 cfgdefines=k2
|
||||
pass
|
||||
|
||||
|
||||
def kitchen_sink(a, b=2, *args, k1, k2=5, **kw): # $ cfgdefines=kitchen_sink cfgdefines=a cfgdefines=b cfgdefines=args cfgdefines=k1 cfgdefines=k2 cfgdefines=kw
|
||||
pass
|
||||
|
||||
|
||||
# Methods get `self` / `cls`.
|
||||
class C: # $ cfgdefines=C
|
||||
def method(self, x): # $ cfgdefines=method cfgdefines=self cfgdefines=x
|
||||
pass
|
||||
|
||||
@classmethod
|
||||
def cmethod(cls, x): # $ cfgdefines=cmethod cfgdefines=cls cfgdefines=x
|
||||
pass
|
||||
|
||||
|
||||
# Lambda parameter.
|
||||
_ = lambda p: p + 1 # $ cfgdefines=_ cfgdefines=p
|
||||
|
||||
# PEP 570 positional-only.
|
||||
def pos_only(a, b, /, c): # $ cfgdefines=pos_only cfgdefines=a cfgdefines=b cfgdefines=c
|
||||
pass
|
||||
@@ -1,14 +0,0 @@
|
||||
# Simple bindings that should already work in the new CFG.
|
||||
# No MISSING annotations expected.
|
||||
|
||||
x = 1 # $ cfgdefines=x
|
||||
y = x + 1 # $ cfgdefines=y
|
||||
|
||||
def f(): # $ cfgdefines=f
|
||||
pass
|
||||
|
||||
class C: # $ cfgdefines=C
|
||||
pass
|
||||
|
||||
# Re-assignment.
|
||||
x = 2 # $ cfgdefines=x
|
||||
@@ -1,21 +0,0 @@
|
||||
# PEP 695 type parameters (Python 3.12+).
|
||||
|
||||
# PEP 695 type-param names on `def`/`class` bind in an annotation scope
|
||||
# that nests the function/class body — they have no CFG node in the
|
||||
# enclosing scope (matching the legacy CFG).
|
||||
def func[T](x: T) -> T: # $ cfgdefines=func cfgdefines=x
|
||||
return x
|
||||
|
||||
|
||||
class Box[T]: # $ cfgdefines=Box
|
||||
item: T # $ cfgdefines=item
|
||||
|
||||
|
||||
# Multi-parameter, with bound and variadics.
|
||||
def multi[T: int, *Ts, **P](x: T, *args: *Ts, **kwargs: P.kwargs) -> T: # $ cfgdefines=multi cfgdefines=x cfgdefines=args cfgdefines=kwargs
|
||||
return x
|
||||
|
||||
|
||||
# `type` statement (PEP 695).
|
||||
type Alias[T] = list[T] # $ cfgdefines=Alias cfgdefines=T
|
||||
|
||||
@@ -1,14 +0,0 @@
|
||||
# Walrus and starred-target edge cases — wired in the new CFG.
|
||||
|
||||
# Walrus in expression context.
|
||||
if (y := 5) > 0: # $ cfgdefines=y
|
||||
pass
|
||||
|
||||
# Walrus in a comprehension. The comprehension introduces a synthetic
|
||||
# `.0` parameter bound to the iterable.
|
||||
_ = [w for _ in range(3) if (w := 1)] # $ cfgdefines=_ cfgdefines=w cfgdefines=.0
|
||||
|
||||
# Starred target in a Tuple LHS.
|
||||
*head, tail = [1, 2, 3] # $ cfgdefines=head cfgdefines=tail
|
||||
|
||||
|
||||
@@ -1,21 +0,0 @@
|
||||
# `with cm() as x:` bindings — wired in the new CFG.
|
||||
|
||||
class CM: # $ cfgdefines=CM
|
||||
def __enter__(self): return self # $ cfgdefines=__enter__ cfgdefines=self
|
||||
def __exit__(self, *a): pass # $ cfgdefines=__exit__ cfgdefines=self cfgdefines=a
|
||||
|
||||
with CM() as x: # $ cfgdefines=x
|
||||
pass
|
||||
|
||||
# Multiple items.
|
||||
with CM() as a, CM() as b: # $ cfgdefines=a cfgdefines=b
|
||||
pass
|
||||
|
||||
# Parenthesised form (Python 3.10+).
|
||||
with (CM() as p, CM() as q): # $ cfgdefines=p cfgdefines=q
|
||||
pass
|
||||
|
||||
# Compound target in `with`.
|
||||
with CM() as (m, n): # $ cfgdefines=m cfgdefines=n
|
||||
pass
|
||||
|
||||
@@ -1,14 +0,0 @@
|
||||
/** New-CFG version of AllLiveReachable. */
|
||||
|
||||
import python
|
||||
import TimerUtils
|
||||
import NewCfgImpl
|
||||
|
||||
private module Utils = EvalOrderCfgUtils<NewCfg>;
|
||||
|
||||
private import Utils
|
||||
private import Utils::CfgTests
|
||||
|
||||
from TimerCfgNode a, TestFunction f
|
||||
where allLiveReachable(a, f)
|
||||
select a, "Unreachable live annotation; entry of $@ does not reach this node", f, f.getName()
|
||||
@@ -1 +0,0 @@
|
||||
|
||||
@@ -1,18 +0,0 @@
|
||||
/**
|
||||
* New-CFG version of AnnotationHasCfgNode.
|
||||
*
|
||||
* Checks that every timer annotation has a corresponding CFG node.
|
||||
*/
|
||||
|
||||
import python
|
||||
import TimerUtils
|
||||
import NewCfgImpl
|
||||
|
||||
private module Utils = EvalOrderCfgUtils<NewCfg>;
|
||||
|
||||
private import Utils::CfgTests
|
||||
|
||||
from TimerAnnotation ann
|
||||
where annotationWithoutCfgNode(ann)
|
||||
select ann, "Annotation in $@ has no CFG node", ann.getTestFunction(),
|
||||
ann.getTestFunction().getName()
|
||||
@@ -1,26 +0,0 @@
|
||||
/**
|
||||
* New-CFG version of BasicBlockAnnotationGap.
|
||||
*
|
||||
* Original:
|
||||
* Checks that within a basic block, if a node is annotated then its
|
||||
* successor is also annotated (or excluded). A gap in annotations
|
||||
* within a basic block indicates a missing annotation, since there
|
||||
* are no branches to justify the gap.
|
||||
*
|
||||
* Nodes with exceptional successors are excluded, as the exception
|
||||
* edge leaves the basic block and the normal successor may be dead.
|
||||
*/
|
||||
|
||||
import python
|
||||
import TimerUtils
|
||||
import NewCfgImpl
|
||||
|
||||
private module Utils = EvalOrderCfgUtils<NewCfg>;
|
||||
|
||||
private import Utils
|
||||
private import Utils::CfgTests
|
||||
|
||||
from TimerCfgNode a, CfgNode succ
|
||||
where basicBlockAnnotationGap(a, succ)
|
||||
select a, "Annotated node followed by unannotated $@ in the same basic block", succ,
|
||||
succ.getNode().toString()
|
||||
@@ -1,21 +0,0 @@
|
||||
/**
|
||||
* New-CFG version of BasicBlockOrdering.
|
||||
*
|
||||
* Original:
|
||||
* Checks that within a single basic block, annotations appear in
|
||||
* increasing minimum-timestamp order.
|
||||
*/
|
||||
|
||||
import python
|
||||
import TimerUtils
|
||||
import NewCfgImpl
|
||||
|
||||
private module Utils = EvalOrderCfgUtils<NewCfg>;
|
||||
|
||||
private import Utils
|
||||
private import Utils::CfgTests
|
||||
|
||||
from TimerCfgNode a, TimerCfgNode b, int minA, int minB
|
||||
where basicBlockOrdering(a, b, minA, minB)
|
||||
select a, "Basic block ordering: $@ appears before $@", a.getTimestampExpr(minA),
|
||||
"timestamp " + minA, b.getTimestampExpr(minB), "timestamp " + minB
|
||||
@@ -1,80 +0,0 @@
|
||||
/**
|
||||
* New-CFG version of BranchTimestamps.
|
||||
*
|
||||
* Checks that when a node has both a true and false successor, the
|
||||
* live timestamps on one branch are marked as dead on the other.
|
||||
* This ensures that boolean branches are fully annotated with dead()
|
||||
* markers for the paths not taken.
|
||||
*
|
||||
* Limitation: the `@ t[ts, ...]` / `dead(ts)` annotation scheme can only
|
||||
* model branch-dead-ness for plain boolean control flow that reconverges
|
||||
* linearly after the split — i.e. `if`-with-else and `if`-expression.
|
||||
* It cannot model:
|
||||
*
|
||||
* * loops (`while` / `for`): body timestamps repeat across iterations,
|
||||
* so the loop-exit annotation can't list them as dead;
|
||||
* * `match` statements: each `case` body is a syntactically distinct
|
||||
* sub-tree, and the branches don't reconverge through a common
|
||||
* annotation point in the timeline;
|
||||
* * `try` / `with` and `raise` / `assert`: exception edges are modelled
|
||||
* as true/false but flow to syntactically distinct handlers, with no
|
||||
* reconvergence in the linear annotation order;
|
||||
* * short-circuit `and` / `or` (`BoolExpr`): the branches reconverge at
|
||||
* the BoolExpr's after-node, so timestamps on one branch are live
|
||||
* downstream of the other rather than dead;
|
||||
* * `if` without an `else` clause, and `if`/`elif` chains: the false
|
||||
* branch reconverges with the true branch at the post-if statement
|
||||
* (no-else) or fans out across multiple elif-test annotations,
|
||||
* neither of which fit the binary annotation scheme.
|
||||
*
|
||||
* Branch nodes inside those constructs are therefore whitelisted out
|
||||
* below. The check still fires (and is useful) for plain `if`/`else`
|
||||
* and conditional-expression branching.
|
||||
*/
|
||||
|
||||
import python
|
||||
import TimerUtils
|
||||
import NewCfgImpl
|
||||
|
||||
private module Utils = EvalOrderCfgUtils<NewCfg>;
|
||||
|
||||
private import Utils
|
||||
private import Utils::CfgTests
|
||||
|
||||
/**
|
||||
* Holds if `f` contains a construct whose branches the linear-timestamp
|
||||
* annotation scheme cannot describe (see file-level comment).
|
||||
*/
|
||||
private predicate hasUnmodellableBranching(Function f) {
|
||||
exists(AstNode bad |
|
||||
bad.getScope() = f and
|
||||
(
|
||||
bad instanceof While
|
||||
or
|
||||
bad instanceof For
|
||||
or
|
||||
bad instanceof MatchStmt
|
||||
or
|
||||
bad instanceof Try
|
||||
or
|
||||
bad instanceof With
|
||||
or
|
||||
bad instanceof Raise
|
||||
or
|
||||
bad instanceof Assert
|
||||
or
|
||||
bad instanceof BoolExpr
|
||||
or
|
||||
bad instanceof If and
|
||||
(not exists(bad.(If).getAnOrelse()) or bad.(If).isElif())
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
from TimerCfgNode node, int ts, string branch
|
||||
where
|
||||
missingBranchTimestamp(node, ts, branch) and
|
||||
not hasUnmodellableBranching(node.getTestFunction())
|
||||
select node,
|
||||
"Timestamp " + ts + " on true/false branch is missing a dead() annotation on the " + branch +
|
||||
" successor in $@", node.getTestFunction(), node.getTestFunction().getName()
|
||||
@@ -1 +0,0 @@
|
||||
|
||||
@@ -1,22 +0,0 @@
|
||||
/**
|
||||
* New-CFG version of ConsecutivePredecessorTimestamps.
|
||||
*
|
||||
* Checks that each annotated node (except the minimum timestamp) has
|
||||
* a predecessor annotation with timestamp `a - 1`. This is the reverse
|
||||
* of ConsecutiveTimestamps: it catches nodes that are reachable but
|
||||
* arrived at from the wrong place (skipping an intermediate node).
|
||||
*/
|
||||
|
||||
import python
|
||||
import TimerUtils
|
||||
import NewCfgImpl
|
||||
|
||||
private module Utils = EvalOrderCfgUtils<NewCfg>;
|
||||
|
||||
private import Utils
|
||||
private import Utils::CfgTests
|
||||
|
||||
from TimerAnnotation ann, int a
|
||||
where consecutivePredecessorTimestamps(ann, a)
|
||||
select ann, "$@ in $@ has no consecutive predecessor (expected " + (a - 1) + ")",
|
||||
ann.getTimestampExpr(a), "Timestamp " + a, ann.getTestFunction(), ann.getTestFunction().getName()
|
||||
@@ -1,29 +0,0 @@
|
||||
/**
|
||||
* New-CFG version of ConsecutiveTimestamps.
|
||||
*
|
||||
* Original:
|
||||
* Checks that consecutive annotated nodes have consecutive timestamps:
|
||||
* for each annotation with timestamp `a`, some CFG node for that annotation
|
||||
* must have a next annotation containing `a + 1`.
|
||||
*
|
||||
* Handles CFG splitting (e.g., finally blocks duplicated for normal/exceptional
|
||||
* flow) by checking that at least one split has the required successor.
|
||||
*
|
||||
* Only applies to functions where all annotations are in the function's
|
||||
* own scope (excludes tests with generators, async, comprehensions, or
|
||||
* lambdas that have annotations in nested scopes).
|
||||
*/
|
||||
|
||||
import python
|
||||
import TimerUtils
|
||||
import NewCfgImpl
|
||||
|
||||
private module Utils = EvalOrderCfgUtils<NewCfg>;
|
||||
|
||||
private import Utils
|
||||
private import Utils::CfgTests
|
||||
|
||||
from TimerAnnotation ann, int a
|
||||
where consecutiveTimestamps(ann, a)
|
||||
select ann, "$@ in $@ has no consecutive successor (expected " + (a + 1) + ")",
|
||||
ann.getTimestampExpr(a), "Timestamp " + a, ann.getTestFunction(), ann.getTestFunction().getName()
|
||||
@@ -1,120 +0,0 @@
|
||||
/**
|
||||
* Implementation of the evaluation-order CFG signature using the new
|
||||
* shared control flow graph from AstNodeImpl.
|
||||
*/
|
||||
|
||||
private import python as Py
|
||||
import TimerUtils
|
||||
private import semmle.python.controlflow.internal.AstNodeImpl as CfgImpl
|
||||
private import codeql.controlflow.SuccessorType
|
||||
|
||||
private class NewControlFlowNode = CfgImpl::ControlFlowNode;
|
||||
|
||||
private class NewBasicBlock = CfgImpl::BasicBlock;
|
||||
|
||||
/** New (shared) CFG implementation of the evaluation-order signature. */
|
||||
module NewCfg implements EvalOrderCfgSig {
|
||||
class CfgNode instanceof NewControlFlowNode {
|
||||
// We must pick a *unique* representative CFG node for each AST node. The
|
||||
// shared CFG has several nodes per AST node (before / in-post-order / after
|
||||
// / after-value splits), but the timer test framework keys annotations on
|
||||
// `getNode()` and assumes one CFG node per annotated AST node. Without a
|
||||
// filter, an annotated `f()` would map to both `f()` and `After f()`, which
|
||||
// breaks two framework invariants: (1) the "no shared reachable" check
|
||||
// requires that two distinct nodes sharing a timestamp be mutually
|
||||
// unreachable (true/false branches of a condition), but `Before f()`,
|
||||
// `f()` and `After f()` share the annotation's timestamp *and* lie on one
|
||||
// linear path; and (2) the annotation walk (`nextTimerAnnotation`) halts at
|
||||
// the first reachable representative, so a second node for the same AST
|
||||
// node would stall the walk on the same timestamp instead of advancing to
|
||||
// the next evaluation event.
|
||||
//
|
||||
// We use the "after" node (`isAfter`) rather than the canonical `injects`
|
||||
// node, because `injects` represents short-circuit / conditional
|
||||
// expressions (`and`/`or`/`not`/ternary) by their *before* node, placing
|
||||
// them ahead of their operands — wrong for evaluation order. `isAfter`
|
||||
// instead picks the post-evaluation node: the merged before/after node for
|
||||
// simple leaves, the `TAfterNode` for post-order expressions, and the
|
||||
// `AfterValueNode`(s) for pre-order conditionals, all positioned after the
|
||||
// operands. The two value-split nodes of a conditional are genuinely
|
||||
// distinct evaluation outcomes (handled by `getATrueSuccessor` /
|
||||
// `getAFalseSuccessor`), so they do not violate the uniqueness assumption.
|
||||
CfgNode() { NewControlFlowNode.super.isAfter(_) }
|
||||
|
||||
string toString() { result = NewControlFlowNode.super.toString() }
|
||||
|
||||
Py::Location getLocation() { result = NewControlFlowNode.super.getLocation() }
|
||||
|
||||
Py::AstNode getNode() {
|
||||
result = CfgImpl::astNodeToPyNode(NewControlFlowNode.super.getAstNode())
|
||||
}
|
||||
|
||||
CfgNode getASuccessor() { nextCfgNode(this, result) }
|
||||
|
||||
CfgNode getATrueSuccessor() {
|
||||
NewControlFlowNode.super.isAfterTrue(_) and
|
||||
// Only where there's also a false branch (true boolean split)
|
||||
exists(NewControlFlowNode other | other.isAfterFalse(NewControlFlowNode.super.getAstNode())) and
|
||||
nextCfgNodeFrom(this, result)
|
||||
}
|
||||
|
||||
CfgNode getAFalseSuccessor() {
|
||||
NewControlFlowNode.super.isAfterFalse(_) and
|
||||
// Only where there's also a true branch (true boolean split)
|
||||
exists(NewControlFlowNode other | other.isAfterTrue(NewControlFlowNode.super.getAstNode())) and
|
||||
nextCfgNodeFrom(this, result)
|
||||
}
|
||||
|
||||
CfgNode getAnExceptionalSuccessor() {
|
||||
exists(NewControlFlowNode mid |
|
||||
mid = NewControlFlowNode.super.getAnExceptionSuccessor() and
|
||||
nextCfgNodeFrom(mid, result)
|
||||
)
|
||||
}
|
||||
|
||||
Py::Scope getScope() { result = NewControlFlowNode.super.getEnclosingCallable().asScope() }
|
||||
|
||||
BasicBlock getBasicBlock() {
|
||||
exists(NewBasicBlock bb, int i | bb.getNode(i) = this and result = bb)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `next` is the nearest CfgNode reachable from `n` via
|
||||
* one or more raw CFG successor edges, skipping non-CfgNode intermediaries.
|
||||
*/
|
||||
private predicate nextCfgNodeFrom(NewControlFlowNode n, CfgNode next) {
|
||||
next = n.getASuccessor()
|
||||
or
|
||||
exists(NewControlFlowNode mid |
|
||||
mid = n.getASuccessor() and
|
||||
not mid instanceof CfgNode and
|
||||
nextCfgNodeFrom(mid, next)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `next` is the nearest CfgNode successor of `n`,
|
||||
* skipping synthetic intermediate nodes.
|
||||
*/
|
||||
private predicate nextCfgNode(CfgNode n, CfgNode next) { nextCfgNodeFrom(n, next) }
|
||||
|
||||
class BasicBlock instanceof NewBasicBlock {
|
||||
string toString() { result = NewBasicBlock.super.toString() }
|
||||
|
||||
CfgNode getNode(int n) { result = NewBasicBlock.super.getNode(n) }
|
||||
|
||||
predicate reaches(BasicBlock bb) { this = bb or this.strictlyReaches(bb) }
|
||||
|
||||
predicate strictlyReaches(BasicBlock bb) { NewBasicBlock.super.getASuccessor+() = bb }
|
||||
|
||||
predicate strictlyDominates(BasicBlock bb) { NewBasicBlock.super.strictlyDominates(bb) }
|
||||
}
|
||||
|
||||
CfgNode scopeGetEntryNode(Py::Scope s) {
|
||||
exists(CfgImpl::ControlFlow::EntryNode entry |
|
||||
entry.getEnclosingCallable().asScope() = s and
|
||||
nextCfgNodeFrom(entry, result)
|
||||
)
|
||||
}
|
||||
}
|
||||
@@ -1,21 +0,0 @@
|
||||
/**
|
||||
* New-CFG version of NeverReachable.
|
||||
*
|
||||
* Original:
|
||||
* Checks that expressions annotated with `t.never` either have no CFG
|
||||
* node, or if they do, that the node is not reachable from its scope's
|
||||
* entry (including within the same basic block).
|
||||
*/
|
||||
|
||||
import python
|
||||
import TimerUtils
|
||||
import NewCfgImpl
|
||||
|
||||
private module Utils = EvalOrderCfgUtils<NewCfg>;
|
||||
|
||||
private import Utils::CfgTests
|
||||
|
||||
from TimerAnnotation ann
|
||||
where neverReachable(ann)
|
||||
select ann, "Node annotated with t.never is reachable in $@", ann.getTestFunction(),
|
||||
ann.getTestFunction().getName()
|
||||
@@ -1,22 +0,0 @@
|
||||
/**
|
||||
* New-CFG version of NoBackwardFlow.
|
||||
*
|
||||
* Original:
|
||||
* Checks that time never flows backward between consecutive timer annotations
|
||||
* in the CFG. For each pair of consecutive annotated nodes (A -> B), there must
|
||||
* exist timestamps a in A and b in B with a < b.
|
||||
*/
|
||||
|
||||
import python
|
||||
import TimerUtils
|
||||
import NewCfgImpl
|
||||
|
||||
private module Utils = EvalOrderCfgUtils<NewCfg>;
|
||||
|
||||
private import Utils
|
||||
private import Utils::CfgTests
|
||||
|
||||
from TimerCfgNode a, TimerCfgNode b, int minA, int maxB
|
||||
where noBackwardFlow(a, b, minA, maxB)
|
||||
select a, "Backward flow: $@ flows to $@ (max timestamp $@)", a.getTimestampExpr(minA),
|
||||
minA.toString(), b, b.getNode().toString(), b.getTimestampExpr(maxB), maxB.toString()
|
||||
@@ -1 +0,0 @@
|
||||
|
||||
@@ -1,18 +0,0 @@
|
||||
/**
|
||||
* New-CFG version of NoBasicBlock.
|
||||
*
|
||||
* Checks that every annotated CFG node belongs to a basic block.
|
||||
*/
|
||||
|
||||
import python
|
||||
import TimerUtils
|
||||
import NewCfgImpl
|
||||
|
||||
private module Utils = EvalOrderCfgUtils<NewCfg>;
|
||||
|
||||
private import Utils
|
||||
private import Utils::CfgTests
|
||||
|
||||
from CfgNode n, TestFunction f
|
||||
where noBasicBlock(n, f)
|
||||
select n, "CFG node in $@ does not belong to any basic block", f, f.getName()
|
||||
@@ -1,21 +0,0 @@
|
||||
/**
|
||||
* New-CFG version of NoSharedReachable.
|
||||
*
|
||||
* Original:
|
||||
* Checks that two annotations sharing a timestamp value are on
|
||||
* mutually exclusive CFG paths (neither can reach the other).
|
||||
*/
|
||||
|
||||
import python
|
||||
import TimerUtils
|
||||
import NewCfgImpl
|
||||
|
||||
private module Utils = EvalOrderCfgUtils<NewCfg>;
|
||||
|
||||
private import Utils
|
||||
private import Utils::CfgTests
|
||||
|
||||
from TimerCfgNode a, TimerCfgNode b, int ts
|
||||
where noSharedReachable(a, b, ts)
|
||||
select a, "Shared timestamp $@ but this node reaches $@", a.getTimestampExpr(ts), ts.toString(), b,
|
||||
b.getNode().toString()
|
||||
@@ -1,22 +0,0 @@
|
||||
/**
|
||||
* New-CFG version of StrictForward.
|
||||
*
|
||||
* Original:
|
||||
* Stronger version of NoBackwardFlow: for consecutive annotated nodes
|
||||
* A -> B that both have a single timestamp (non-loop code) and B does
|
||||
* NOT dominate A (forward edge), requires max(A) < min(B).
|
||||
*/
|
||||
|
||||
import python
|
||||
import TimerUtils
|
||||
import NewCfgImpl
|
||||
|
||||
private module Utils = EvalOrderCfgUtils<NewCfg>;
|
||||
|
||||
private import Utils
|
||||
private import Utils::CfgTests
|
||||
|
||||
from TimerCfgNode a, TimerCfgNode b, int maxA, int minB
|
||||
where strictForward(a, b, maxA, minB)
|
||||
select a, "Strict forward violation: $@ flows to $@", a.getTimestampExpr(maxA), "timestamp " + maxA,
|
||||
b.getTimestampExpr(minB), "timestamp " + minB
|
||||
@@ -3,14 +3,14 @@
|
||||
* Python control flow graph.
|
||||
*/
|
||||
|
||||
private import python as Py
|
||||
private import python as PY
|
||||
import TimerUtils
|
||||
|
||||
/** Existing Python CFG implementation of the evaluation-order signature. */
|
||||
module OldCfg implements EvalOrderCfgSig {
|
||||
class CfgNode = Py::ControlFlowNode;
|
||||
class CfgNode = PY::ControlFlowNode;
|
||||
|
||||
class BasicBlock = Py::BasicBlock;
|
||||
class BasicBlock = PY::BasicBlock;
|
||||
|
||||
CfgNode scopeGetEntryNode(Py::Scope s) { result = s.getEntryNode() }
|
||||
CfgNode scopeGetEntryNode(PY::Scope s) { result = s.getEntryNode() }
|
||||
}
|
||||
|
||||
@@ -85,7 +85,7 @@ def test_nested_if_else(t):
|
||||
else:
|
||||
z = 2 @ t[dead(4)]
|
||||
else:
|
||||
z = 3 @ t[dead(3), dead(4)]
|
||||
z = 3 @ t[dead(4)]
|
||||
w = 0 @ t[5]
|
||||
|
||||
|
||||
|
||||
@@ -1,41 +0,0 @@
|
||||
/**
|
||||
* Inline-expectations test for the store/load/delete/parameter
|
||||
* classification predicates on the new-CFG facade.
|
||||
*
|
||||
* Each tag fires when the corresponding predicate (`isLoad`,
|
||||
* `isStore`, `isDelete`, `isParameter`, `isAugLoad`, `isAugStore`)
|
||||
* holds on the canonical CFG node wrapping a `Py::Name` with the
|
||||
* given identifier. Subscript and attribute stores are not covered
|
||||
* by these tags — only the `Name`-typed targets/loads they involve.
|
||||
*/
|
||||
|
||||
import python
|
||||
import semmle.python.controlflow.internal.Cfg as Cfg
|
||||
import utils.test.InlineExpectationsTest
|
||||
|
||||
module StoreLoadTest implements TestSig {
|
||||
string getARelevantTag() { result = ["load", "store", "delete", "param", "augload", "augstore"] }
|
||||
|
||||
predicate hasActualResult(Location location, string element, string tag, string value) {
|
||||
exists(Cfg::NameNode n |
|
||||
location = n.getLocation() and
|
||||
element = n.toString() and
|
||||
value = n.getId() and
|
||||
(
|
||||
n.isLoad() and not n.isAugLoad() and tag = "load"
|
||||
or
|
||||
n.isStore() and not n.isAugStore() and tag = "store"
|
||||
or
|
||||
n.isDelete() and tag = "delete"
|
||||
or
|
||||
n.isParameter() and tag = "param"
|
||||
or
|
||||
n.isAugLoad() and tag = "augload"
|
||||
or
|
||||
n.isAugStore() and tag = "augstore"
|
||||
)
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
import MakeTest<StoreLoadTest>
|
||||
@@ -1,56 +0,0 @@
|
||||
# Store/load/delete/parameter classification on the new-CFG facade.
|
||||
#
|
||||
# Each annotated location carries the (sorted, deduplicated) set of
|
||||
# kinds the CFG facade reports there. Comparing against the legacy
|
||||
# 'semmle.python.Flow' classification is done by the comparison query
|
||||
# 'StoreLoadParity.ql' — annotations here are only the positive
|
||||
# assertions for the new facade.
|
||||
#
|
||||
# Tags:
|
||||
# load=<id> -- isLoad() fires on the Name
|
||||
# store=<id> -- isStore() fires
|
||||
# delete=<id> -- isDelete() fires
|
||||
# param=<id> -- isParameter() fires
|
||||
# augload=<id> -- isAugLoad() fires (the LHS of x += ... when read)
|
||||
# augstore=<id> -- isAugStore() fires (the LHS of x += ... when written)
|
||||
|
||||
|
||||
# --- plain load / store / delete ---
|
||||
|
||||
x = 1 # $ store=x
|
||||
y = x + 1 # $ store=y load=x
|
||||
print(y) # $ load=print load=y
|
||||
del x # $ delete=x
|
||||
|
||||
|
||||
# --- function definitions (parameters) ---
|
||||
|
||||
def f(a, b=2, *args, c, **kwargs): # $ store=f param=a param=b param=args param=c param=kwargs
|
||||
return a + b + c # $ load=a load=b load=c
|
||||
|
||||
|
||||
# --- augmented assignment splits one Name into load + store halves ---
|
||||
|
||||
def aug(): # $ store=aug
|
||||
n = 0 # $ store=n
|
||||
n += 1 # $ augload=n augstore=n
|
||||
return n # $ load=n
|
||||
|
||||
|
||||
# --- subscript / attribute stores ---
|
||||
|
||||
class C: # $ store=C
|
||||
pass
|
||||
|
||||
|
||||
def stores(obj, container, idx): # $ store=stores param=obj param=container param=idx
|
||||
obj.attr = 1 # $ load=obj
|
||||
container[idx] = 2 # $ load=container load=idx
|
||||
return obj # $ load=obj
|
||||
|
||||
|
||||
# --- tuple unpacking ---
|
||||
|
||||
def unpack(pair): # $ store=unpack param=pair
|
||||
a, b = pair # $ store=a store=b load=pair
|
||||
return a + b # $ load=a load=b
|
||||
@@ -1,5 +0,0 @@
|
||||
| def-only-old | $:0:0 |
|
||||
| def-only-old | __name__:0:0 |
|
||||
| def-only-old | __package__:0:0 |
|
||||
| def-only-old | e:37:1 |
|
||||
| def-only-old | x:20:1 |
|
||||
@@ -1,59 +0,0 @@
|
||||
/**
|
||||
* Compares the new-CFG SSA against the legacy ESSA on the same Python
|
||||
* sources. Reports definitions present in one implementation but not
|
||||
* the other, identified by variable name + source position.
|
||||
*
|
||||
* The `.expected` file records the current diff as a snapshot: as the
|
||||
* new SSA matures (closing captured-variable gap, exception bindings,
|
||||
* etc.) and tracks more variables, the snapshot should monotonically
|
||||
* shrink.
|
||||
*
|
||||
* Known categories of `def-only-old` mismatches:
|
||||
* - Function / class / global definitions with no in-scope read
|
||||
* (intentional: SSA is liveness-pruned, write-only variables are
|
||||
* not tracked).
|
||||
* - Captured / closure variables (gap: new SSA does not yet model
|
||||
* closure captures).
|
||||
* - Module variables `__name__`, `__package__`, `$` (legacy ESSA
|
||||
* adds implicit bindings the new SSA does not).
|
||||
* - Exception-handler `as` bindings (depend on raise modelling).
|
||||
*
|
||||
* `def-only-new` mismatches would indicate the new SSA produces spurious
|
||||
* definitions; currently none are expected.
|
||||
*/
|
||||
|
||||
import python
|
||||
import semmle.python.dataflow.new.internal.SsaImpl as NewSsa
|
||||
import semmle.python.controlflow.internal.Cfg as Cfg
|
||||
import semmle.python.essa.Essa
|
||||
|
||||
string newDefSig(NewSsa::EssaNodeDefinition def) {
|
||||
exists(Cfg::ControlFlowNode n | n = def.getDefiningNode() |
|
||||
result =
|
||||
def.getVariable().getVariable().getId() + ":" + n.getLocation().getStartLine() + ":" +
|
||||
n.getLocation().getStartColumn()
|
||||
)
|
||||
}
|
||||
|
||||
string legacyDefSig(EssaNodeDefinition def) {
|
||||
exists(ControlFlowNode n | n = def.getDefiningNode() |
|
||||
result =
|
||||
def.getSourceVariable().getName() + ":" + n.getLocation().getStartLine() + ":" +
|
||||
n.getLocation().getStartColumn()
|
||||
)
|
||||
}
|
||||
|
||||
from string kind, string sig
|
||||
where
|
||||
kind = "def-only-new" and
|
||||
exists(NewSsa::EssaNodeDefinition def |
|
||||
sig = newDefSig(def) and
|
||||
not exists(EssaNodeDefinition legacyDef | sig = legacyDefSig(legacyDef))
|
||||
)
|
||||
or
|
||||
kind = "def-only-old" and
|
||||
exists(EssaNodeDefinition legacyDef |
|
||||
sig = legacyDefSig(legacyDef) and
|
||||
not exists(NewSsa::EssaNodeDefinition def | sig = newDefSig(def))
|
||||
)
|
||||
select kind, sig
|
||||
@@ -1,53 +0,0 @@
|
||||
def simple_assign():
|
||||
x = 1
|
||||
return x
|
||||
|
||||
|
||||
def reassignment():
|
||||
x = 1
|
||||
x = 2
|
||||
return x
|
||||
|
||||
|
||||
def if_else_branch(cond):
|
||||
if cond:
|
||||
x = 1
|
||||
else:
|
||||
x = 2
|
||||
return x
|
||||
|
||||
|
||||
def loop(xs):
|
||||
total = 0
|
||||
for x in xs:
|
||||
total = total + x
|
||||
return total
|
||||
|
||||
|
||||
def parameter(a, b=2, *args, **kwargs):
|
||||
return a + b + sum(args)
|
||||
|
||||
|
||||
def closure(x):
|
||||
def inner():
|
||||
return x
|
||||
return inner
|
||||
|
||||
|
||||
def exception_binding():
|
||||
try:
|
||||
compute()
|
||||
except Exception as e:
|
||||
return e
|
||||
|
||||
|
||||
def with_binding():
|
||||
with open("file") as f:
|
||||
return f.read()
|
||||
|
||||
|
||||
GLOBAL = 1
|
||||
|
||||
|
||||
def read_global():
|
||||
return GLOBAL
|
||||
@@ -1,59 +0,0 @@
|
||||
/**
|
||||
* Inline-expectations test for the new-CFG SSA adapter
|
||||
* (`semmle.python.dataflow.new.internal.SsaImpl`).
|
||||
*
|
||||
* Tags:
|
||||
* - `def=<var>`: there is an SSA write definition of `<var>` at this
|
||||
* line (parameter init, plain assignment, augmented assignment,
|
||||
* exception-handler binding, deletion, etc.).
|
||||
* - `use=<var>`: `<var>` is used at this line, and some SSA definition
|
||||
* of `<var>` reaches the read.
|
||||
* - `phi=<var>`: there is an SSA phi definition of `<var>` whose BB
|
||||
* starts on this line.
|
||||
*/
|
||||
|
||||
import python
|
||||
import semmle.python.dataflow.new.internal.SsaImpl as SsaImpl
|
||||
import semmle.python.controlflow.internal.AstNodeImpl as CfgImpl
|
||||
import semmle.python.controlflow.internal.Cfg as Cfg
|
||||
import utils.test.InlineExpectationsTest
|
||||
|
||||
module SsaTest implements TestSig {
|
||||
string getARelevantTag() { result = ["def", "use", "phi"] }
|
||||
|
||||
predicate hasActualResult(Location location, string element, string tag, string value) {
|
||||
// A `def=<id>` fires when an SSA WriteDefinition is at a CFG node
|
||||
// on the given line.
|
||||
exists(SsaImpl::Ssa::WriteDefinition def, CfgImpl::BasicBlock bb, int i, Cfg::NameNode n |
|
||||
def.definesAt(_, bb, i) and
|
||||
bb.getNode(i) = n and
|
||||
tag = "def" and
|
||||
location = n.getLocation() and
|
||||
element = n.toString() and
|
||||
value = n.getId()
|
||||
)
|
||||
or
|
||||
// A `use=<id>` fires when an SSA Definition reaches a read at this
|
||||
// CFG node.
|
||||
exists(SsaImpl::Ssa::Definition def, CfgImpl::BasicBlock bb, int i, Cfg::NameNode n |
|
||||
SsaImpl::Ssa::ssaDefReachesRead(_, def, bb, i) and
|
||||
bb.getNode(i) = n and
|
||||
tag = "use" and
|
||||
location = n.getLocation() and
|
||||
element = n.toString() and
|
||||
value = n.getId()
|
||||
)
|
||||
or
|
||||
// A `phi=<id>` fires when there is a phi node whose BB's first
|
||||
// CFG node is on the given line.
|
||||
exists(SsaImpl::Ssa::PhiNode phi, CfgImpl::BasicBlock bb |
|
||||
phi.definesAt(_, bb, _) and
|
||||
tag = "phi" and
|
||||
location = bb.getNode(0).getLocation() and
|
||||
element = bb.toString() and
|
||||
value = phi.getSourceVariable().(SsaImpl::SsaSourceVariable).getVariable().getId()
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
import MakeTest<SsaTest>
|
||||
@@ -1,48 +0,0 @@
|
||||
# Basic SSA tests for the new-CFG SSA adapter.
|
||||
#
|
||||
# The shared SSA implementation prunes its construction by liveness:
|
||||
# definitions of variables that are not read are never materialised.
|
||||
# This is by design — write-only variables would only bloat the SSA
|
||||
# graph. Tests therefore must always include a read of each variable
|
||||
# being verified.
|
||||
#
|
||||
# Annotations:
|
||||
# def=<var>: there is an SSA write definition of <var> at this line
|
||||
# use=<var>: <var> is used here and the read resolves to some def
|
||||
#
|
||||
# Note: a module-level `def name(...)` statement is itself a write
|
||||
# definition of the module global `name`, which is live (it can be read
|
||||
# externally), so every function below carries a `def=<name>` annotation
|
||||
# on its `def` line.
|
||||
|
||||
|
||||
def basic_param(x): # $ def=basic_param def=x
|
||||
return x # $ use=x
|
||||
|
||||
|
||||
def basic_assign(): # $ def=basic_assign
|
||||
y = 1 # $ def=y
|
||||
return y # $ use=y
|
||||
|
||||
|
||||
def reassignment(): # $ def=reassignment
|
||||
x = 1
|
||||
x = 2 # $ def=x
|
||||
return x # $ use=x
|
||||
|
||||
|
||||
def if_else_phi(cond): # $ def=if_else_phi def=cond
|
||||
if cond: # $ use=cond phi=x
|
||||
x = 1 # $ def=x
|
||||
else:
|
||||
x = 2 # $ def=x
|
||||
return x # $ use=x
|
||||
|
||||
|
||||
# `some_undefined` is never assigned anywhere, so (matching legacy ESSA)
|
||||
# the SSA library creates no entry definition for it: an undefined-name
|
||||
# read resolves to no SSA def, hence there is no `use=` here.
|
||||
def use_global(): # $ def=use_global
|
||||
return some_undefined
|
||||
|
||||
|
||||
@@ -589,11 +589,11 @@ def test_zip_tuple():
|
||||
|
||||
SINK(z[0][0]) # $ flow="SOURCE, l:-7 -> z[0][0]"
|
||||
SINK(z[0][1]) # $ flow="SOURCE, l:-7 -> z[0][1]"
|
||||
SINK_F(z[0][2])
|
||||
SINK_F(z[0][2]) # $ SPURIOUS: flow="SOURCE, l:-7 -> z[0][2]"
|
||||
SINK_F(z[0][3])
|
||||
SINK(z[1][0]) # $ flow="SOURCE, l:-11 -> z[1][0]"
|
||||
SINK_F(z[1][1]) # $ SPURIOUS: flow="SOURCE, l:-11 -> z[1][1]"
|
||||
SINK(z[1][2]) # $ MISSING: flow="SOURCE, l:-11 -> z[1][2]" # Tuple contents are not tracked beyond the first two arguments for performance.
|
||||
SINK(z[1][2]) # $ flow="SOURCE, l:-11 -> z[1][2]"
|
||||
SINK_F(z[1][3])
|
||||
|
||||
@expects(4)
|
||||
|
||||
@@ -157,7 +157,7 @@ class MyClass2(object):
|
||||
print(self.foo) # $ tracked MISSING: tracked=foo
|
||||
|
||||
instance = MyClass2()
|
||||
print(instance.foo) # $ tracked MISSING: tracked=foo
|
||||
print(instance.foo) # $ MISSING: tracked=foo tracked
|
||||
instance.print_foo() # $ MISSING: tracked=foo
|
||||
|
||||
|
||||
@@ -195,7 +195,7 @@ class Sub1(Base1):
|
||||
|
||||
sub1 = Sub1()
|
||||
sub1.read_foo()
|
||||
print(sub1.foo) # $ tracked MISSING: tracked=foo
|
||||
print(sub1.foo) # $ MISSING: tracked=foo tracked
|
||||
|
||||
|
||||
# attribute written in a subclass method, read in an inherited base class method
|
||||
@@ -210,7 +210,7 @@ class Sub2(Base2):
|
||||
|
||||
sub2 = Sub2()
|
||||
sub2.read_bar()
|
||||
print(sub2.bar) # $ tracked MISSING: tracked=bar
|
||||
print(sub2.bar) # $ MISSING: tracked=bar tracked
|
||||
|
||||
|
||||
# attribute written in a base class method, read on an instance of the subclass
|
||||
@@ -223,4 +223,4 @@ class Sub3(Base3):
|
||||
pass
|
||||
|
||||
sub3 = Sub3()
|
||||
print(sub3.baz) # $ tracked MISSING: tracked=baz
|
||||
print(sub3.baz) # $ MISSING: tracked=baz tracked
|
||||
|
||||
@@ -362,7 +362,7 @@ def test_load_in_bulk():
|
||||
# see https://docs.djangoproject.com/en/4.0/ref/models/querysets/#in-bulk
|
||||
d = TestLoad.objects.in_bulk([1])
|
||||
for val in d.values():
|
||||
SINK(val.text) # $ MISSING: flow
|
||||
SINK(val.text) # $ flow="SOURCE, l:-65 -> val.text"
|
||||
SINK(d[1].text) # $ flow="SOURCE, l:-66 -> d[1].text"
|
||||
|
||||
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
#select
|
||||
| app.py:23:20:23:24 | ControlFlowNode for query | app.py:20:18:20:21 | ControlFlowNode for name | app.py:23:20:23:24 | ControlFlowNode for query | This SQL query depends on a $@. | app.py:20:18:20:21 | ControlFlowNode for name | user-provided value |
|
||||
| app.py:30:20:30:24 | ControlFlowNode for query | app.py:27:19:27:22 | ControlFlowNode for name | app.py:30:20:30:24 | ControlFlowNode for query | This SQL query depends on a $@. | app.py:27:19:27:22 | ControlFlowNode for name | user-provided value |
|
||||
| app.py:37:20:37:24 | ControlFlowNode for query | app.py:34:19:34:22 | ControlFlowNode for name | app.py:37:20:37:24 | ControlFlowNode for query | This SQL query depends on a $@. | app.py:34:19:34:22 | ControlFlowNode for name | user-provided value |
|
||||
| app.py:44:20:44:24 | ControlFlowNode for query | app.py:41:19:41:22 | ControlFlowNode for name | app.py:44:20:44:24 | ControlFlowNode for query | This SQL query depends on a $@. | app.py:41:19:41:22 | ControlFlowNode for name | user-provided value |
|
||||
| app.py:51:20:51:24 | ControlFlowNode for query | app.py:48:19:48:22 | ControlFlowNode for name | app.py:51:20:51:24 | ControlFlowNode for query | This SQL query depends on a $@. | app.py:48:19:48:22 | ControlFlowNode for name | user-provided value |
|
||||
| sql_injection.py:21:24:21:77 | ControlFlowNode for BinaryExpr | sql_injection.py:14:15:14:22 | ControlFlowNode for username | sql_injection.py:21:24:21:77 | ControlFlowNode for BinaryExpr | This SQL query depends on a $@. | sql_injection.py:14:15:14:22 | ControlFlowNode for username | user-provided value |
|
||||
@@ -25,8 +24,6 @@ edges
|
||||
| app.py:21:5:21:9 | ControlFlowNode for query | app.py:23:20:23:24 | ControlFlowNode for query | provenance | |
|
||||
| app.py:27:19:27:22 | ControlFlowNode for name | app.py:28:5:28:9 | ControlFlowNode for query | provenance | |
|
||||
| app.py:28:5:28:9 | ControlFlowNode for query | app.py:30:20:30:24 | ControlFlowNode for query | provenance | |
|
||||
| app.py:34:19:34:22 | ControlFlowNode for name | app.py:35:5:35:9 | ControlFlowNode for query | provenance | |
|
||||
| app.py:35:5:35:9 | ControlFlowNode for query | app.py:37:20:37:24 | ControlFlowNode for query | provenance | |
|
||||
| app.py:41:19:41:22 | ControlFlowNode for name | app.py:42:5:42:9 | ControlFlowNode for query | provenance | |
|
||||
| app.py:42:5:42:9 | ControlFlowNode for query | app.py:44:20:44:24 | ControlFlowNode for query | provenance | |
|
||||
| app.py:48:19:48:22 | ControlFlowNode for name | app.py:49:5:49:9 | ControlFlowNode for query | provenance | |
|
||||
@@ -54,9 +51,6 @@ nodes
|
||||
| app.py:27:19:27:22 | ControlFlowNode for name | semmle.label | ControlFlowNode for name |
|
||||
| app.py:28:5:28:9 | ControlFlowNode for query | semmle.label | ControlFlowNode for query |
|
||||
| app.py:30:20:30:24 | ControlFlowNode for query | semmle.label | ControlFlowNode for query |
|
||||
| app.py:34:19:34:22 | ControlFlowNode for name | semmle.label | ControlFlowNode for name |
|
||||
| app.py:35:5:35:9 | ControlFlowNode for query | semmle.label | ControlFlowNode for query |
|
||||
| app.py:37:20:37:24 | ControlFlowNode for query | semmle.label | ControlFlowNode for query |
|
||||
| app.py:41:19:41:22 | ControlFlowNode for name | semmle.label | ControlFlowNode for name |
|
||||
| app.py:42:5:42:9 | ControlFlowNode for query | semmle.label | ControlFlowNode for query |
|
||||
| app.py:44:20:44:24 | ControlFlowNode for query | semmle.label | ControlFlowNode for query |
|
||||
|
||||
@@ -31,10 +31,10 @@ async def unsafe2(name: str): # $ Source
|
||||
cursor.close()
|
||||
|
||||
@app.get("/unsafe3/")
|
||||
async def unsafe3(name: str): # $ Source
|
||||
async def unsafe3(name: str): # $ MISSING: Source
|
||||
query = "select * from users where name=" + name
|
||||
cursor = hdb_con3.cursor()
|
||||
cursor.execute(query) # $ Alert
|
||||
cursor.execute(query) # $ MISSING: Alert
|
||||
cursor.close()
|
||||
|
||||
@app.get("/unsafe4/")
|
||||
|
||||
@@ -28,6 +28,8 @@ nodes
|
||||
| string_flow.rb:227:10:227:10 | a | semmle.label | a |
|
||||
subpaths
|
||||
testFailures
|
||||
| string_flow.rb:85:10:85:10 | a | Unexpected result: hasValueFlow=a |
|
||||
| string_flow.rb:227:10:227:10 | a | Unexpected result: hasValueFlow=a |
|
||||
#select
|
||||
| string_flow.rb:3:10:3:22 | call to new | string_flow.rb:2:9:2:18 | call to source | string_flow.rb:3:10:3:22 | call to new | $@ | string_flow.rb:2:9:2:18 | call to source | call to source |
|
||||
| string_flow.rb:85:10:85:10 | a | string_flow.rb:83:9:83:18 | call to source | string_flow.rb:85:10:85:10 | a | $@ | string_flow.rb:83:9:83:18 | call to source | call to source |
|
||||
|
||||
@@ -82,7 +82,7 @@ end
|
||||
def m_clear
|
||||
a = source "a"
|
||||
a.clear
|
||||
sink a # $ SPURIOUS: hasValueFlow=a
|
||||
sink a
|
||||
end
|
||||
|
||||
# concat and prepend omitted because they clash with the summaries for
|
||||
@@ -224,7 +224,7 @@ def m_replace
|
||||
b = source "b"
|
||||
sink a.replace(b) # $ hasTaintFlow=b
|
||||
# TODO: currently we get value flow for a, because we don't clear content
|
||||
sink a # $ hasTaintFlow=b SPURIOUS: hasValueFlow=a
|
||||
sink a # $ hasTaintFlow=b
|
||||
end
|
||||
|
||||
def m_reverse
|
||||
|
||||
@@ -18,7 +18,7 @@ class OneController < ActionController::Base
|
||||
end
|
||||
|
||||
def c
|
||||
sink @foo # $ hasTaintFlow
|
||||
sink @foo
|
||||
end
|
||||
end
|
||||
|
||||
@@ -35,7 +35,7 @@ class TwoController < ActionController::Base
|
||||
end
|
||||
|
||||
def c
|
||||
sink @foo # $ SPURIOUS: hasTaintFlow
|
||||
sink @foo
|
||||
end
|
||||
end
|
||||
|
||||
@@ -52,7 +52,7 @@ class ThreeController < ActionController::Base
|
||||
end
|
||||
|
||||
def c
|
||||
sink @foo # $ SPURIOUS: hasTaintFlow
|
||||
sink @foo
|
||||
end
|
||||
end
|
||||
|
||||
@@ -68,7 +68,7 @@ class FourController < ActionController::Base
|
||||
end
|
||||
|
||||
def c
|
||||
sink(@foo.bar) # $ hasTaintFlow
|
||||
sink(@foo.bar)
|
||||
end
|
||||
end
|
||||
|
||||
@@ -84,7 +84,7 @@ class FiveController < ActionController::Base
|
||||
end
|
||||
|
||||
def c
|
||||
sink @foo # $ hasTaintFlow
|
||||
sink @foo
|
||||
end
|
||||
|
||||
def taint_foo
|
||||
|
||||
@@ -270,6 +270,11 @@ nodes
|
||||
| params_flow.rb:205:10:205:10 | a | semmle.label | a |
|
||||
subpaths
|
||||
testFailures
|
||||
| filter_flow.rb:21:10:21:13 | @foo | Unexpected result: hasTaintFlow |
|
||||
| filter_flow.rb:38:10:38:13 | @foo | Unexpected result: hasTaintFlow |
|
||||
| filter_flow.rb:55:10:55:13 | @foo | Unexpected result: hasTaintFlow |
|
||||
| filter_flow.rb:71:10:71:17 | call to bar | Unexpected result: hasTaintFlow |
|
||||
| filter_flow.rb:87:11:87:14 | @foo | Unexpected result: hasTaintFlow |
|
||||
#select
|
||||
| filter_flow.rb:21:10:21:13 | @foo | filter_flow.rb:14:12:14:17 | call to params | filter_flow.rb:21:10:21:13 | @foo | $@ | filter_flow.rb:14:12:14:17 | call to params | call to params |
|
||||
| filter_flow.rb:38:10:38:13 | @foo | filter_flow.rb:30:12:30:17 | call to params | filter_flow.rb:38:10:38:13 | @foo | $@ | filter_flow.rb:30:12:30:17 | call to params | call to params |
|
||||
|
||||
@@ -121,3 +121,37 @@ pub fn rule(input: TokenStream) -> TokenStream {
|
||||
Err(err) => err.to_compile_error().into(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Define a desugaring rule whose transform is a hand-written Rust block.
|
||||
///
|
||||
/// Use `manual_rule!` when the transform needs control over capture
|
||||
/// translation timing — for example, when an outer rule needs to set
|
||||
/// state in `ctx` (the `BuildCtx`'s user context) before recursive
|
||||
/// translation reaches inner rules that read that state.
|
||||
///
|
||||
/// ```text
|
||||
/// manual_rule!(
|
||||
/// (query_pattern field: (_) @name)
|
||||
/// {
|
||||
/// // `ctx` is a `&mut BuildCtx<'_, C>`; capture variables
|
||||
/// // (`name: NodeRef`, etc.) are bound from the query.
|
||||
/// let translated = ctx.translate(name)?;
|
||||
/// Ok(translated)
|
||||
/// }
|
||||
/// )
|
||||
/// ```
|
||||
///
|
||||
/// Differences from [`rule!`]:
|
||||
/// - Captures are **not** auto-translated before the body runs; they
|
||||
/// refer to raw input-schema nodes. Use [`BuildCtx::translate`] (or
|
||||
/// [`BuildCtx::translate_opt`]) to translate them when you choose.
|
||||
/// - The body is plain Rust returning `Result<Vec<Id>, String>` — no
|
||||
/// tree template, no `Ok(...)` wrap.
|
||||
#[proc_macro]
|
||||
pub fn manual_rule(input: TokenStream) -> TokenStream {
|
||||
let input2: TokenStream2 = input.into();
|
||||
match parse::parse_manual_rule_top(input2) {
|
||||
Ok(output) => output.into(),
|
||||
Err(err) => err.to_compile_error().into(),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -22,9 +22,10 @@ pub fn parse_query_top(input: TokenStream) -> Result<TokenStream> {
|
||||
/// Parse a single query node (possibly with a trailing `@capture`).
|
||||
fn parse_query_node(tokens: &mut Tokens) -> Result<TokenStream> {
|
||||
let base = parse_query_atom(tokens)?;
|
||||
// Check for trailing @capture or @@capture
|
||||
// Check for trailing @capture
|
||||
if peek_is_at(tokens) {
|
||||
let capture_name = consume_capture_marker(tokens)?;
|
||||
tokens.next(); // consume @
|
||||
let capture_name = expect_ident(tokens, "expected capture name after @")?;
|
||||
let name_str = capture_name.to_string();
|
||||
Ok(quote! {
|
||||
yeast::query::QueryNode::Capture {
|
||||
@@ -158,7 +159,8 @@ fn parse_query_fields(tokens: &mut Tokens) -> Result<Vec<TokenStream>> {
|
||||
push_field_elem(&mut field_order, &mut field_elems, field_str, elem);
|
||||
} else {
|
||||
let child = if peek_is_at(tokens) {
|
||||
let capture_name = consume_capture_marker(tokens)?;
|
||||
tokens.next();
|
||||
let capture_name = expect_ident(tokens, "expected capture name after @")?;
|
||||
let name_str = capture_name.to_string();
|
||||
quote! {
|
||||
yeast::query::QueryNode::Capture {
|
||||
@@ -648,9 +650,6 @@ fn parse_direct_list(tokens: &mut Tokens, ctx: &Ident) -> Result<Vec<TokenStream
|
||||
struct CaptureInfo {
|
||||
name: String,
|
||||
multiplicity: CaptureMultiplicity,
|
||||
/// `true` for `@@name` captures: the auto-translate prefix skips them,
|
||||
/// so the bound `NodeRef` refers to the raw (input-schema) node.
|
||||
raw: bool,
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, PartialEq)]
|
||||
@@ -709,14 +708,6 @@ fn extract_captures_inner(
|
||||
extract_captures_inner(&mut inner, captures, child_mult);
|
||||
}
|
||||
TokenTree::Punct(p) if p.as_char() == '@' => {
|
||||
// `@@name` marks the capture as raw (skip auto-translate).
|
||||
let raw = matches!(
|
||||
tokens.peek(),
|
||||
Some(TokenTree::Punct(p)) if p.as_char() == '@'
|
||||
);
|
||||
if raw {
|
||||
tokens.next(); // consume the second `@`
|
||||
}
|
||||
if let Some(TokenTree::Ident(name)) = tokens.next() {
|
||||
let mult = if parent_mult == CaptureMultiplicity::Repeated
|
||||
|| last_mult == CaptureMultiplicity::Repeated
|
||||
@@ -732,7 +723,6 @@ fn extract_captures_inner(
|
||||
captures.push(CaptureInfo {
|
||||
name: name.to_string(),
|
||||
multiplicity: mult,
|
||||
raw,
|
||||
});
|
||||
}
|
||||
last_mult = CaptureMultiplicity::Single;
|
||||
@@ -786,14 +776,6 @@ pub fn parse_rule_top(input: TokenStream) -> Result<TokenStream> {
|
||||
// Parse query
|
||||
let query_code = parse_query_top(query_stream.clone())?;
|
||||
|
||||
// Capture names marked `@@name` (raw) — passed to the auto-translate
|
||||
// prefix as a skip list so those captures keep their input-schema ids.
|
||||
let raw_capture_names: Vec<&str> = captures
|
||||
.iter()
|
||||
.filter(|c| c.raw)
|
||||
.map(|c| c.name.as_str())
|
||||
.collect();
|
||||
|
||||
// Generate capture bindings
|
||||
let ctx_ident = Ident::new(IMPLICIT_CTX, Span::call_site());
|
||||
let bindings: Vec<TokenStream> = captures
|
||||
@@ -909,14 +891,11 @@ pub fn parse_rule_top(input: TokenStream) -> Result<TokenStream> {
|
||||
let __query = #query_code;
|
||||
yeast::Rule::new(__query, Box::new(|__ast: &mut yeast::Ast, mut __captures: yeast::captures::Captures, __fresh: &yeast::tree_builder::FreshScope, __source_range: Option<tree_sitter::Range>, __user_ctx: &mut _, __translator: yeast::TranslatorHandle<'_, _>| {
|
||||
// Auto-translation prefix: recursively translate every
|
||||
// captured node before invoking the user's transform body,
|
||||
// except for `@@name` captures listed in `__skip` which the
|
||||
// body consumes raw.
|
||||
// captured node before invoking the user's transform body.
|
||||
// For OneShot rules this preserves the legacy behaviour
|
||||
// (input-schema captures translated to output-schema
|
||||
// nodes); for Repeating rules it is a no-op.
|
||||
let __skip: &[&str] = &[#(#raw_capture_names),*];
|
||||
__translator.auto_translate_captures(&mut __captures, __ast, __user_ctx, __skip)?;
|
||||
__translator.auto_translate_captures(&mut __captures, __ast, __user_ctx)?;
|
||||
#(#bindings)*
|
||||
let mut #ctx_ident = yeast::build::BuildCtx::with_translator(__ast, &__captures, __fresh, __source_range, __user_ctx, __translator);
|
||||
let __result: Vec<usize> = { #transform_body };
|
||||
@@ -926,6 +905,106 @@ pub fn parse_rule_top(input: TokenStream) -> Result<TokenStream> {
|
||||
})
|
||||
}
|
||||
|
||||
/// Parse `manual_rule!( query { body } )`.
|
||||
///
|
||||
/// Like [`parse_rule_top`] but:
|
||||
/// - Expects a Rust block `{ ... }` after the query (no `=>` arrow).
|
||||
/// - Generates code that does NOT auto-translate captures before
|
||||
/// running the body. Capture variables refer to raw (input-schema)
|
||||
/// nodes; the body is responsible for explicit translation via
|
||||
/// `ctx.translate(...)`.
|
||||
/// - The body is included verbatim and must evaluate to
|
||||
/// `Result<Vec<usize>, String>`.
|
||||
pub fn parse_manual_rule_top(input: TokenStream) -> Result<TokenStream> {
|
||||
let mut tokens = input.into_iter().peekable();
|
||||
|
||||
// Collect query tokens up to the body block `{ ... }`.
|
||||
let mut query_tokens = Vec::new();
|
||||
loop {
|
||||
match tokens.peek() {
|
||||
None => {
|
||||
return Err(syn::Error::new(
|
||||
Span::call_site(),
|
||||
"expected a Rust block `{ ... }` after the query in manual_rule!",
|
||||
))
|
||||
}
|
||||
Some(TokenTree::Group(g)) if g.delimiter() == Delimiter::Brace => break,
|
||||
_ => {
|
||||
query_tokens.push(tokens.next().unwrap());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let query_stream: TokenStream = query_tokens.into_iter().collect();
|
||||
|
||||
// Extract captures from the query (same as in `rule!`).
|
||||
let captures = extract_captures(&query_stream);
|
||||
|
||||
// Parse the query into the QueryNode-building expression.
|
||||
let query_code = parse_query_top(query_stream)?;
|
||||
|
||||
// Generate capture bindings (same as in `rule!`).
|
||||
let ctx_ident = Ident::new(IMPLICIT_CTX, Span::call_site());
|
||||
let bindings: Vec<TokenStream> = captures
|
||||
.iter()
|
||||
.map(|cap| {
|
||||
let name = Ident::new(&cap.name, Span::call_site());
|
||||
let name_str = &cap.name;
|
||||
match cap.multiplicity {
|
||||
CaptureMultiplicity::Repeated => quote! {
|
||||
let #name: Vec<yeast::NodeRef> = __captures.get_all(#name_str)
|
||||
.into_iter()
|
||||
.map(yeast::NodeRef)
|
||||
.collect();
|
||||
},
|
||||
CaptureMultiplicity::Optional => quote! {
|
||||
let #name: Option<yeast::NodeRef> =
|
||||
__captures.get_opt(#name_str).map(yeast::NodeRef);
|
||||
},
|
||||
CaptureMultiplicity::Single => quote! {
|
||||
let #name: yeast::NodeRef =
|
||||
yeast::NodeRef(__captures.get_var(#name_str).unwrap());
|
||||
},
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
|
||||
// Consume the body block.
|
||||
let body_group = match tokens.next() {
|
||||
Some(TokenTree::Group(g)) if g.delimiter() == Delimiter::Brace => g,
|
||||
other => {
|
||||
return Err(syn::Error::new(
|
||||
Span::call_site(),
|
||||
format!(
|
||||
"expected a Rust block `{{ ... }}` after the query in manual_rule!, found: {other:?}"
|
||||
),
|
||||
))
|
||||
}
|
||||
};
|
||||
let body_stream = body_group.stream();
|
||||
|
||||
// No tokens should follow the body.
|
||||
if let Some(tok) = tokens.next() {
|
||||
return Err(syn::Error::new_spanned(
|
||||
tok,
|
||||
"unexpected token after manual_rule! body",
|
||||
));
|
||||
}
|
||||
|
||||
Ok(quote! {
|
||||
{
|
||||
let __query = #query_code;
|
||||
yeast::Rule::new(__query, Box::new(|__ast: &mut yeast::Ast, __captures: yeast::captures::Captures, __fresh: &yeast::tree_builder::FreshScope, __source_range: Option<tree_sitter::Range>, __user_ctx: &mut _, __translator: yeast::TranslatorHandle<'_, _>| {
|
||||
// No auto-translate prefix for manual rules — the body
|
||||
// is responsible for translating captures explicitly.
|
||||
#(#bindings)*
|
||||
let mut #ctx_ident = yeast::build::BuildCtx::with_translator(__ast, &__captures, __fresh, __source_range, __user_ctx, __translator);
|
||||
#body_stream
|
||||
}))
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Token utilities
|
||||
// ---------------------------------------------------------------------------
|
||||
@@ -934,16 +1013,6 @@ fn peek_is_at(tokens: &mut Tokens) -> bool {
|
||||
matches!(tokens.peek(), Some(TokenTree::Punct(p)) if p.as_char() == '@')
|
||||
}
|
||||
|
||||
/// Consume an `@` or `@@` capture marker and the following name ident.
|
||||
/// Caller has already verified `peek_is_at(tokens)`.
|
||||
fn consume_capture_marker(tokens: &mut Tokens) -> Result<Ident> {
|
||||
tokens.next(); // consume the first `@`
|
||||
if peek_is_at(tokens) {
|
||||
tokens.next(); // consume the second `@` of `@@`
|
||||
}
|
||||
expect_ident(tokens, "expected capture name after `@` or `@@`")
|
||||
}
|
||||
|
||||
fn peek_is_literal(tokens: &mut Tokens) -> bool {
|
||||
matches!(tokens.peek(), Some(TokenTree::Literal(_)))
|
||||
}
|
||||
@@ -1044,7 +1113,8 @@ fn expect_repetition(tokens: &mut Tokens) -> Result<TokenStream> {
|
||||
|
||||
fn maybe_wrap_capture(tokens: &mut Tokens, base: TokenStream) -> Result<TokenStream> {
|
||||
if peek_is_at(tokens) {
|
||||
let name = consume_capture_marker(tokens)?;
|
||||
tokens.next(); // consume @
|
||||
let name = expect_ident(tokens, "expected capture name after @")?;
|
||||
let name_str = name.to_string();
|
||||
Ok(quote! {
|
||||
yeast::query::QueryNode::Capture {
|
||||
@@ -1071,12 +1141,13 @@ fn maybe_wrap_repetition(tokens: &mut Tokens, single: TokenStream) -> Result<Tok
|
||||
}
|
||||
}
|
||||
|
||||
/// If `@name` (or `@@name`) follows a Repeated list element, wrap each
|
||||
/// child SingleNode inside the repetition with a Capture. This matches
|
||||
/// tree-sitter semantics where `(_)* @name` captures each matched node.
|
||||
/// If `@name` follows a Repeated list element, wrap each child SingleNode
|
||||
/// inside the repetition with a Capture. This matches tree-sitter semantics
|
||||
/// where `(_)* @name` captures each matched node.
|
||||
fn maybe_wrap_list_capture(tokens: &mut Tokens, elem: TokenStream) -> Result<TokenStream> {
|
||||
if peek_is_at(tokens) {
|
||||
let name = consume_capture_marker(tokens)?;
|
||||
tokens.next();
|
||||
let name = expect_ident(tokens, "expected capture name after @")?;
|
||||
let name_str = name.to_string();
|
||||
// Re-parse the element isn't practical, so we generate a wrapper
|
||||
// that creates a new Repeated with each child wrapped in a capture.
|
||||
|
||||
@@ -292,37 +292,6 @@ Inside `rule!`, captures are Rust variables, so `{name}` inserts a
|
||||
single capture (`Id`) and `{..name}` splices a repeated capture
|
||||
(`Vec<Id>`).
|
||||
|
||||
### Raw captures (`@@name`)
|
||||
|
||||
The default `@name` capture marker is *auto-translated*: in OneShot
|
||||
phases the macro recursively translates the captured node before
|
||||
binding it, so `{name}` in the output template splices a node that
|
||||
already conforms to the output schema.
|
||||
|
||||
For rules that need the raw (input-schema) capture — typically to read
|
||||
its source text or to translate it explicitly with mutable context
|
||||
state between calls — use `@@name` instead. The body sees the original
|
||||
input-schema `NodeRef`:
|
||||
|
||||
```rust
|
||||
yeast::rule!(
|
||||
(assignment left: (_) @@raw_lhs right: (_) @rhs)
|
||||
=>
|
||||
{
|
||||
// raw_lhs is untranslated: read its original source text.
|
||||
let text = ctx.ast.source_text(raw_lhs.into());
|
||||
// rhs is already translated by the auto-translate prefix.
|
||||
tree!((call
|
||||
method: (identifier #{text.as_str()})
|
||||
receiver: {rhs}))
|
||||
}
|
||||
);
|
||||
```
|
||||
|
||||
Mix `@` and `@@` freely in the same rule. In a Repeating phase both
|
||||
markers are equivalent (auto-translation is a no-op for repeating
|
||||
rules).
|
||||
|
||||
## Complete example: for-loop desugaring
|
||||
|
||||
This rule rewrites Ruby's `for pat in val do body end` into
|
||||
|
||||
@@ -80,28 +80,6 @@ impl Captures {
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Like [`try_map_all_captures`] but leaves captures whose name appears
|
||||
/// in `skip` untouched. Used by the `rule!` macro to support `@@name`
|
||||
/// (raw) captures alongside the default auto-translated `@name`
|
||||
/// captures.
|
||||
pub fn try_map_captures_except<E>(
|
||||
&mut self,
|
||||
skip: &[&str],
|
||||
mut f: impl FnMut(Id) -> Result<Vec<Id>, E>,
|
||||
) -> Result<(), E> {
|
||||
for (name, ids) in self.captures.iter_mut() {
|
||||
if skip.contains(name) {
|
||||
continue;
|
||||
}
|
||||
let mut new_ids = Vec::with_capacity(ids.len());
|
||||
for &id in ids.iter() {
|
||||
new_ids.extend(f(id)?);
|
||||
}
|
||||
*ids = new_ids;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
pub fn map_captures_to(&mut self, from: &str, to: &'static str, f: &mut impl FnMut(Id) -> Id) {
|
||||
if let Some(from_ids) = self.captures.get(from) {
|
||||
let new_values = from_ids.iter().copied().map(f).collect();
|
||||
|
||||
@@ -16,7 +16,7 @@ pub mod schema;
|
||||
pub mod tree_builder;
|
||||
mod visitor;
|
||||
|
||||
pub use yeast_macros::{query, rule, tree, trees};
|
||||
pub use yeast_macros::{manual_rule, query, rule, tree, trees};
|
||||
|
||||
use captures::Captures;
|
||||
pub use cursor::Cursor;
|
||||
@@ -48,12 +48,6 @@ impl From<NodeRef> for Id {
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Id> for NodeRef {
|
||||
fn from(value: Id) -> Self {
|
||||
NodeRef(value)
|
||||
}
|
||||
}
|
||||
|
||||
/// Like [`std::fmt::Display`], but the formatting routine is given access to
|
||||
/// the [`Ast`] so that node references can resolve to their source text.
|
||||
///
|
||||
@@ -763,14 +757,13 @@ impl<'a, C: Clone> TranslatorHandle<'a, C> {
|
||||
}
|
||||
|
||||
/// Translate every captured node in `captures` in place (OneShot phase
|
||||
/// only), except for captures whose name appears in `skip` — those are
|
||||
/// left as raw (input-schema) ids for the rule body to consume
|
||||
/// directly. In a Repeating phase this is a no-op — Repeating rules
|
||||
/// receive raw captures regardless of `skip`.
|
||||
/// only). In a Repeating phase this is a no-op — Repeating rules
|
||||
/// receive raw captures.
|
||||
///
|
||||
/// Used by the `rule!` macro's generated prefix. `skip` is populated
|
||||
/// from the macro's `@@name` capture markers; for plain `@name`
|
||||
/// captures (and rules with no `@@` markers) it is empty.
|
||||
/// Used by the `rule!` macro's generated prefix to preserve the
|
||||
/// pre-existing "auto-translate captures before running the transform
|
||||
/// body" behavior. Manually-written transforms typically translate
|
||||
/// captures selectively via [`translate`] instead.
|
||||
///
|
||||
/// To avoid infinite recursion, a capture whose id matches the rule's
|
||||
/// matched root (e.g. from a `(_) @_` pattern) is left unchanged.
|
||||
@@ -779,12 +772,11 @@ impl<'a, C: Clone> TranslatorHandle<'a, C> {
|
||||
captures: &mut Captures,
|
||||
ast: &mut Ast,
|
||||
user_ctx: &mut C,
|
||||
skip: &[&str],
|
||||
) -> Result<(), String> {
|
||||
match &self.inner {
|
||||
TranslatorImpl::OneShot { matched_root, .. } => {
|
||||
let root = *matched_root;
|
||||
captures.try_map_captures_except(skip, |cid| {
|
||||
captures.try_map_all_captures(|cid| {
|
||||
if cid == root {
|
||||
Ok(vec![cid])
|
||||
} else {
|
||||
|
||||
@@ -1058,111 +1058,6 @@ fn test_one_shot_does_not_recurse_into_wrapper_output() {
|
||||
);
|
||||
}
|
||||
|
||||
/// Verify that `@@name` capture markers skip the auto-translate prefix:
|
||||
/// the body sees the *raw* (input-schema) NodeRef and can read its
|
||||
/// source text or call `ctx.translate(...)` explicitly. Compare with
|
||||
/// the bare `@name` form, where the auto-translate prefix runs the
|
||||
/// same translation up front and the body sees the post-translate id.
|
||||
#[test]
|
||||
fn test_raw_capture_marker() {
|
||||
let lang: tree_sitter::Language = tree_sitter_ruby::LANGUAGE.into();
|
||||
let schema =
|
||||
yeast::node_types_yaml::schema_from_yaml_with_language(OUTPUT_SCHEMA_YAML, &lang).unwrap();
|
||||
let rules: Vec<Rule> = vec![
|
||||
yeast::rule!(
|
||||
(program (_)* @stmts)
|
||||
=>
|
||||
(program stmt: {..stmts})
|
||||
),
|
||||
// `@@raw_lhs` is untranslated: the body reads its source text
|
||||
// ("x") and embeds it directly as the identifier content. `@rhs`
|
||||
// is auto-translated (rhs already points to (integer "INT")).
|
||||
yeast::rule!(
|
||||
(assignment left: (_) @@raw_lhs right: (_) @rhs)
|
||||
=>
|
||||
{
|
||||
let text = ctx.ast.source_text(raw_lhs.into());
|
||||
tree!((call
|
||||
method: (identifier #{text.as_str()})
|
||||
receiver: {rhs}))
|
||||
}
|
||||
),
|
||||
yeast::rule!((identifier) => (identifier "ID")),
|
||||
yeast::rule!((integer) => (integer "INT")),
|
||||
];
|
||||
let phases = vec![Phase::new("translate", PhaseKind::OneShot, rules)];
|
||||
let runner: Runner = Runner::with_schema(lang, &schema, &phases);
|
||||
|
||||
let input = "x = 1";
|
||||
let ast = runner.run(input).unwrap();
|
||||
let dump = dump_ast(&ast, ast.get_root(), input);
|
||||
// `method:` uses the raw source text ("x"); if `@@` were broken and
|
||||
// auto-translation ran on `raw_lhs`, it would still produce the
|
||||
// string "x" (source_text inherits the input range), so the dump
|
||||
// wouldn't change here. The companion test
|
||||
// `test_raw_capture_marker_explicit_translate` exercises the
|
||||
// stronger property that `ctx.translate(raw_lhs)?` succeeds and
|
||||
// produces the translated `(identifier "ID")`.
|
||||
assert_dump_eq(
|
||||
&dump,
|
||||
r#"
|
||||
program
|
||||
stmt:
|
||||
call
|
||||
method: identifier "x"
|
||||
receiver: integer "INT"
|
||||
"#,
|
||||
);
|
||||
}
|
||||
|
||||
/// Companion to `test_raw_capture_marker`: confirms that calling
|
||||
/// `ctx.translate(raw)` on a `@@`-captured NodeRef from the rule body
|
||||
/// produces the correctly-translated output-schema node. With `@`, the
|
||||
/// translation has already happened, so `ctx.translate(...)` inside the
|
||||
/// body would attempt to re-translate an output node (which has no
|
||||
/// matching rule and would error).
|
||||
#[test]
|
||||
fn test_raw_capture_marker_explicit_translate() {
|
||||
let lang: tree_sitter::Language = tree_sitter_ruby::LANGUAGE.into();
|
||||
let schema =
|
||||
yeast::node_types_yaml::schema_from_yaml_with_language(OUTPUT_SCHEMA_YAML, &lang).unwrap();
|
||||
let rules: Vec<Rule> = vec![
|
||||
yeast::rule!(
|
||||
(program (_)* @stmts)
|
||||
=>
|
||||
(program stmt: {..stmts})
|
||||
),
|
||||
yeast::rule!(
|
||||
(assignment left: (_) @@raw_lhs right: (_) @rhs)
|
||||
=>
|
||||
{
|
||||
let translated_lhs = ctx.translate(raw_lhs)?;
|
||||
tree!((call
|
||||
method: {..translated_lhs}
|
||||
receiver: {rhs}))
|
||||
}
|
||||
),
|
||||
yeast::rule!((identifier) => (identifier "ID")),
|
||||
yeast::rule!((integer) => (integer "INT")),
|
||||
];
|
||||
let phases = vec![Phase::new("translate", PhaseKind::OneShot, rules)];
|
||||
let runner: Runner = Runner::with_schema(lang, &schema, &phases);
|
||||
|
||||
let input = "x = 1";
|
||||
let ast = runner.run(input).unwrap();
|
||||
let dump = dump_ast(&ast, ast.get_root(), input);
|
||||
assert_dump_eq(
|
||||
&dump,
|
||||
r#"
|
||||
program
|
||||
stmt:
|
||||
call
|
||||
method: identifier "ID"
|
||||
receiver: integer "INT"
|
||||
"#,
|
||||
);
|
||||
}
|
||||
|
||||
// ---- Cursor tests ----
|
||||
|
||||
#[test]
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
use codeql_extractor::extractor::simple;
|
||||
use yeast::{ConcreteDesugarer, DesugaringConfig, PhaseKind, Rule, rule, tree};
|
||||
use yeast::{ConcreteDesugarer, DesugaringConfig, PhaseKind, Rule, manual_rule, rule, tree};
|
||||
|
||||
/// User context propagated from outer rules down to the inner rules that
|
||||
/// emit the corresponding output declarations, so that each emitted node
|
||||
@@ -15,26 +15,26 @@ struct SwiftContext {
|
||||
/// (`computed_getter`/`computed_setter`/`computed_modify`/
|
||||
/// `willset_clause`/`didset_clause`/`getter_specifier`/
|
||||
/// `setter_specifier`).
|
||||
property_name: Option<yeast::NodeRef>,
|
||||
property_name: Option<yeast::Id>,
|
||||
/// Translated type node for the property type. Set by the outer
|
||||
/// `property_binding` rule (computed accessors variant) and
|
||||
/// `protocol_property_declaration` when present; read by the
|
||||
/// accessor inner rules.
|
||||
property_type: Option<yeast::NodeRef>,
|
||||
property_type: Option<yeast::Id>,
|
||||
/// Default-value expression for the next translated `parameter`. Set
|
||||
/// by the outer `function_parameter` rule; read by the `parameter`
|
||||
/// rules.
|
||||
default_value: Option<yeast::NodeRef>,
|
||||
default_value: Option<yeast::Id>,
|
||||
/// Translated outer modifiers (e.g. visibility, attributes) to
|
||||
/// attach to each child of a flattening outer rule. Set by
|
||||
/// `property_declaration`, `enum_entry`, and
|
||||
/// `protocol_property_declaration`.
|
||||
outer_modifiers: Vec<yeast::NodeRef>,
|
||||
outer_modifiers: Vec<yeast::Id>,
|
||||
/// The `let`/`var` binding modifier for a `property_declaration`.
|
||||
/// Set by `property_declaration`; read by the inner declaration
|
||||
/// rules (`property_binding` variants, accessor rules) so they
|
||||
/// emit it as part of the output node's `modifier:` field.
|
||||
binding_modifier: Option<yeast::NodeRef>,
|
||||
binding_modifier: Option<yeast::Id>,
|
||||
/// True when the current child of a flattening outer rule is not
|
||||
/// the first one — its inner rule should emit a
|
||||
/// `chained_declaration` modifier so the original grouping can be
|
||||
@@ -45,10 +45,10 @@ struct SwiftContext {
|
||||
/// Build a freshly-created `chained_declaration` modifier node if
|
||||
/// `ctx.is_chained`, else `None`. Used by inner declaration rules to
|
||||
/// emit the chained tag for non-first children of a flattening outer
|
||||
/// rule. Returns `Option<NodeRef>` so it splices via `{..…}` to 0 or 1 ids.
|
||||
fn chained_modifier(ctx: &mut yeast::build::BuildCtx<'_, SwiftContext>) -> Option<yeast::NodeRef> {
|
||||
/// rule. Returns `Option<Id>` so it splices via `{..…}` to 0 or 1 ids.
|
||||
fn chained_modifier(ctx: &mut yeast::build::BuildCtx<'_, SwiftContext>) -> Option<yeast::Id> {
|
||||
if ctx.is_chained {
|
||||
Some(ctx.literal("modifier", "chained_declaration").into())
|
||||
Some(ctx.literal("modifier", "chained_declaration"))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
@@ -192,15 +192,21 @@ fn translation_rules() -> Vec<Rule<SwiftContext>> {
|
||||
// this whole property_binding is itself a non-first declarator
|
||||
// of a containing property_declaration); subsequent accessors
|
||||
// always emit `chained_declaration`.
|
||||
rule!(
|
||||
manual_rule!(
|
||||
(property_binding
|
||||
name: @pattern
|
||||
type: _? @ty
|
||||
computed_value: (computed_property accessor: _+ @@accessors))
|
||||
=>
|
||||
{..{
|
||||
ctx.property_name = Some(tree!((identifier #{pattern})).into());
|
||||
ctx.property_type = ty;
|
||||
computed_value: (computed_property accessor: _+ @accessors))
|
||||
{
|
||||
// Translate `ty` first so the context holds an
|
||||
// output-schema node id.
|
||||
let translated_ty = ctx.translate_opt(ty)?;
|
||||
// Build the property-name identifier from the
|
||||
// (untranslated) pattern leaf.
|
||||
let name_id = tree!((identifier #{pattern}));
|
||||
|
||||
ctx.property_name = Some(name_id);
|
||||
ctx.property_type = translated_ty;
|
||||
|
||||
let mut result = Vec::new();
|
||||
for (i, acc) in accessors.into_iter().enumerate() {
|
||||
@@ -209,8 +215,8 @@ fn translation_rules() -> Vec<Rule<SwiftContext>> {
|
||||
}
|
||||
result.extend(ctx.translate(acc)?);
|
||||
}
|
||||
result
|
||||
}}
|
||||
Ok(result)
|
||||
}
|
||||
),
|
||||
// Computed property: shorthand getter (no explicit get/set, just
|
||||
// statements) → a single accessor_declaration with kind "get".
|
||||
@@ -242,26 +248,30 @@ fn translation_rules() -> Vec<Rule<SwiftContext>> {
|
||||
// The `variable_declaration` itself inherits the outer rule's
|
||||
// chained state; observers always get `chained_declaration`
|
||||
// because they're subsequent outputs of this flattening rule.
|
||||
rule!(
|
||||
manual_rule!(
|
||||
(property_binding
|
||||
name: (pattern bound_identifier: @name)
|
||||
type: _? @ty
|
||||
value: _? @val
|
||||
observers: (willset_didset_block willset: _? @@ws didset: _? @@ds))
|
||||
=>
|
||||
{..{
|
||||
observers: (willset_didset_block willset: _? @ws didset: _? @ds))
|
||||
{
|
||||
// Translate ty and val so the variable_declaration
|
||||
// below contains output-schema nodes.
|
||||
let translated_ty = ctx.translate_opt(ty)?;
|
||||
let translated_val = ctx.translate_opt(val)?;
|
||||
|
||||
let var_decl = tree!(
|
||||
(variable_declaration
|
||||
modifier: {..ctx.binding_modifier}
|
||||
modifier: {..ctx.outer_modifiers.clone()}
|
||||
modifier: {..chained_modifier(&mut ctx)}
|
||||
pattern: (name_pattern identifier: (identifier #{name}))
|
||||
type: {..ty}
|
||||
value: {..val})
|
||||
type: {..translated_ty}
|
||||
value: {..translated_val})
|
||||
);
|
||||
|
||||
// Publish the property name for the observer rules.
|
||||
ctx.property_name = Some(tree!((identifier #{name})).into());
|
||||
ctx.property_name = Some(tree!((identifier #{name})));
|
||||
// Observers are subsequent outputs of this flattening
|
||||
// rule, so they always get `chained_declaration`.
|
||||
ctx.is_chained = true;
|
||||
@@ -270,8 +280,8 @@ fn translation_rules() -> Vec<Rule<SwiftContext>> {
|
||||
for obs in ws.into_iter().chain(ds) {
|
||||
result.extend(ctx.translate(obs)?);
|
||||
}
|
||||
result
|
||||
}}
|
||||
Ok(result)
|
||||
}
|
||||
),
|
||||
// property_binding with any pattern name (identifier or
|
||||
// destructuring). Reads outer modifiers / chained tag from `ctx`.
|
||||
@@ -299,24 +309,27 @@ fn translation_rules() -> Vec<Rule<SwiftContext>> {
|
||||
// inner declaration rules (`property_binding` variants,
|
||||
// accessor inner rules) read these fields and emit complete
|
||||
// `modifier:` lists from the start.
|
||||
rule!(
|
||||
manual_rule!(
|
||||
(property_declaration
|
||||
binding: (value_binding_pattern mutability: @@binding_kind)
|
||||
declarator: _* @@decls
|
||||
binding: (value_binding_pattern mutability: @binding_kind)
|
||||
declarator: _* @decls
|
||||
(modifiers)* @mods)
|
||||
=>
|
||||
{..{
|
||||
let binding_text = ctx.ast.source_text(binding_kind.into());
|
||||
ctx.binding_modifier = Some(ctx.literal("modifier", &binding_text).into());
|
||||
ctx.outer_modifiers = mods;
|
||||
{
|
||||
let binding_text = ctx.ast.source_text(binding_kind.0);
|
||||
ctx.binding_modifier = Some(ctx.literal("modifier", &binding_text));
|
||||
let mut modifiers = Vec::new();
|
||||
for m in mods {
|
||||
modifiers.extend(ctx.translate(m)?);
|
||||
}
|
||||
ctx.outer_modifiers = modifiers;
|
||||
|
||||
let mut result = Vec::new();
|
||||
for (i, decl) in decls.into_iter().enumerate() {
|
||||
ctx.is_chained = i > 0;
|
||||
result.extend(ctx.translate(decl)?);
|
||||
}
|
||||
result
|
||||
}}
|
||||
Ok(result)
|
||||
}
|
||||
),
|
||||
// ---- Enums ----
|
||||
// enum_type_parameter → parameter (with optional name as pattern).
|
||||
@@ -373,19 +386,22 @@ fn translation_rules() -> Vec<Rule<SwiftContext>> {
|
||||
// into `ctx` and translate each case with `ctx.is_chained`
|
||||
// toggled per iteration so the inner `enum_case_entry` rules
|
||||
// emit complete `modifier:` lists from the start.
|
||||
rule!(
|
||||
(enum_entry case: _+ @@cases (modifiers)* @mods)
|
||||
=>
|
||||
{..{
|
||||
ctx.outer_modifiers = mods;
|
||||
manual_rule!(
|
||||
(enum_entry case: _+ @cases (modifiers)* @mods)
|
||||
{
|
||||
let mut modifiers = Vec::new();
|
||||
for m in mods {
|
||||
modifiers.extend(ctx.translate(m)?);
|
||||
}
|
||||
ctx.outer_modifiers = modifiers;
|
||||
|
||||
let mut result = Vec::new();
|
||||
for (i, case) in cases.into_iter().enumerate() {
|
||||
ctx.is_chained = i > 0;
|
||||
result.extend(ctx.translate(case)?);
|
||||
}
|
||||
result
|
||||
}}
|
||||
Ok(result)
|
||||
}
|
||||
),
|
||||
// Plain assignment: `x = expr`
|
||||
rule!(
|
||||
@@ -460,13 +476,12 @@ fn translation_rules() -> Vec<Rule<SwiftContext>> {
|
||||
// optional default values. Publishes the default value into `ctx`
|
||||
// before translating the inner `parameter` so the `parameter`
|
||||
// rules can include it as a `default:` field directly.
|
||||
rule!(
|
||||
(function_parameter parameter: @@p default_value: _? @def)
|
||||
=>
|
||||
{..{
|
||||
ctx.default_value = def;
|
||||
ctx.translate(p)?
|
||||
}}
|
||||
manual_rule!(
|
||||
(function_parameter parameter: @p default_value: _? @def)
|
||||
{
|
||||
ctx.default_value = ctx.translate_opt(def)?;
|
||||
ctx.translate(p)
|
||||
}
|
||||
),
|
||||
// Parameter with external name and type
|
||||
rule!(
|
||||
@@ -1011,25 +1026,28 @@ fn translation_rules() -> Vec<Rule<SwiftContext>> {
|
||||
// inner `getter_specifier`/`setter_specifier` rules emit
|
||||
// complete nodes from the start (including the
|
||||
// `chained_declaration` tag for non-first accessors).
|
||||
rule!(
|
||||
manual_rule!(
|
||||
(protocol_property_declaration
|
||||
name: (pattern bound_identifier: @name)
|
||||
requirements: (protocol_property_requirements accessor: _+ @@accessors)
|
||||
requirements: (protocol_property_requirements accessor: _+ @accessors)
|
||||
type: _? @ty
|
||||
(modifiers)* @mods)
|
||||
=>
|
||||
{..{
|
||||
ctx.property_name = Some(tree!((identifier #{name})).into());
|
||||
ctx.property_type = ty;
|
||||
ctx.outer_modifiers = mods;
|
||||
{
|
||||
ctx.property_name = Some(tree!((identifier #{name})));
|
||||
ctx.property_type = ctx.translate_opt(ty)?;
|
||||
let mut modifiers = Vec::new();
|
||||
for m in mods {
|
||||
modifiers.extend(ctx.translate(m)?);
|
||||
}
|
||||
ctx.outer_modifiers = modifiers;
|
||||
|
||||
let mut result = Vec::new();
|
||||
for (i, acc) in accessors.into_iter().enumerate() {
|
||||
ctx.is_chained = i > 0;
|
||||
result.extend(ctx.translate(acc)?);
|
||||
}
|
||||
result
|
||||
}}
|
||||
Ok(result)
|
||||
}
|
||||
),
|
||||
// getter_specifier / setter_specifier → bodyless accessor_declaration
|
||||
// getter_specifier / setter_specifier → bodyless
|
||||
|
||||
Reference in New Issue
Block a user