From 39e6bfc894d4352779f23cceec84af6fa42ecf88 Mon Sep 17 00:00:00 2001 From: yoff Date: Tue, 2 Jun 2026 14:09:28 +0000 Subject: [PATCH] Python: add shared-CFG AstSig adapter (AstNodeImpl) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Preparatory refactor for the shared-CFG dataflow migration. Adds the adapter that mediates between the Python AST and the shared codeql.controlflow.ControlFlowGraph signature, plus the test suites that validate the new CFG directly against this adapter. The public facade is added in the following commit. Library additions: - semmle.python.controlflow.internal.AstNodeImpl — wraps Python's Stmt/Expr/Scope/Pattern and adds two synthetic kinds of node (BlockStmt for body slots, intermediate nodes for multi-operand boolean expressions) to satisfy the shared CFG signature. - lib/printCfgNew.ql — debug/visualisation query for the new CFG. - consistency-queries/CfgConsistency.ql — consistency query running the shared CFG's standard checks against Python. Test additions (all driven directly off AstNodeImpl): - ControlFlow/bindings/* — annotation-driven SSA-binding tests (annassign, compound, comprehension, decorated, except_handler, imports, match_pattern, parameters, simple, type_params, walrus_starred, with_stmt, dead_under_no_raise). - ControlFlow/evaluation-order/NewCfg*.ql — mirrors of the existing OldCfg evaluation-order self-validation suite, run against the new CFG via NewCfgImpl.qll. - Minor extensions to existing test_if.py / test_boolean.py + cosmetic .expected churn on a handful of OldCfg tests. No dataflow, SSA, or production query is migrated yet. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../ql/consistency-queries/CfgConsistency.ql | 2 + python/ql/lib/printCfgNew.ql | 45 + .../controlflow/internal/AstNodeImpl.qll | 1610 +++++++++++++++++ .../CONSISTENCY/CfgConsistency.expected | 4 + .../bindings/BindingsTest.expected | 0 .../ControlFlow/bindings/BindingsTest.ql | 32 + .../ControlFlow/bindings/annassign.py | 13 + .../ControlFlow/bindings/compound.py | 14 + .../ControlFlow/bindings/comprehension.py | 21 + .../bindings/dead_under_no_raise.py | 52 + .../ControlFlow/bindings/decorated.py | 30 + .../ControlFlow/bindings/except_handler.py | 19 + .../ControlFlow/bindings/imports.py | 14 + .../ControlFlow/bindings/match_pattern.py | 24 + .../ControlFlow/bindings/parameters.py | 42 + .../ControlFlow/bindings/simple.py | 14 + .../ControlFlow/bindings/type_params.py | 21 + .../ControlFlow/bindings/walrus_starred.py | 14 + .../ControlFlow/bindings/with_stmt.py | 21 + .../evaluation-order/AllLiveReachable.ql | 2 + .../evaluation-order/AnnotationHasCfgNode.ql | 2 + .../BasicBlockAnnotationGap.ql | 2 + .../BasicBlockOrdering.expected | 2 +- .../evaluation-order/BasicBlockOrdering.ql | 2 + .../evaluation-order/ConsecutiveTimestamps.ql | 2 + .../evaluation-order/ContiguousTimestamps.ql | 1 + .../evaluation-order/NeverReachable.ql | 2 + .../NewCfgAllLiveReachable.expected | 0 .../NewCfgAllLiveReachable.ql | 14 + .../NewCfgAnnotationHasCfgNode.expected | 1 + .../NewCfgAnnotationHasCfgNode.ql | 18 + .../NewCfgBasicBlockAnnotationGap.expected | 0 .../NewCfgBasicBlockAnnotationGap.ql | 26 + .../NewCfgBasicBlockOrdering.expected | 0 .../NewCfgBasicBlockOrdering.ql | 21 + .../NewCfgBranchTimestamps.expected | 0 .../NewCfgBranchTimestamps.ql | 80 + ...gConsecutivePredecessorTimestamps.expected | 1 + .../NewCfgConsecutivePredecessorTimestamps.ql | 22 + .../NewCfgConsecutiveTimestamps.expected | 0 .../NewCfgConsecutiveTimestamps.ql | 29 + .../evaluation-order/NewCfgImpl.qll | 101 ++ .../NewCfgNeverReachable.expected | 0 .../evaluation-order/NewCfgNeverReachable.ql | 21 + .../NewCfgNoBackwardFlow.expected | 0 .../evaluation-order/NewCfgNoBackwardFlow.ql | 22 + .../NewCfgNoBasicBlock.expected | 1 + .../evaluation-order/NewCfgNoBasicBlock.ql | 18 + .../NewCfgNoSharedReachable.expected | 0 .../NewCfgNoSharedReachable.ql | 21 + .../NewCfgStrictForward.expected | 0 .../evaluation-order/NewCfgStrictForward.ql | 22 + .../evaluation-order/NoBackwardFlow.expected | 2 +- .../evaluation-order/NoBackwardFlow.ql | 2 + .../evaluation-order/NoBasicBlock.ql | 2 + .../evaluation-order/NoSharedReachable.ql | 2 + .../evaluation-order/OldCfgImpl.qll | 8 +- .../evaluation-order/StrictForward.expected | 2 +- .../evaluation-order/StrictForward.ql | 2 + .../evaluation-order/test_boolean.py | 2 +- .../ControlFlow/evaluation-order/test_if.py | 2 +- 61 files changed, 2440 insertions(+), 9 deletions(-) create mode 100644 python/ql/consistency-queries/CfgConsistency.ql create mode 100644 python/ql/lib/printCfgNew.ql create mode 100644 python/ql/lib/semmle/python/controlflow/internal/AstNodeImpl.qll create mode 100644 python/ql/test/extractor-tests/syntax_error/CONSISTENCY/CfgConsistency.expected create mode 100644 python/ql/test/library-tests/ControlFlow/bindings/BindingsTest.expected create mode 100644 python/ql/test/library-tests/ControlFlow/bindings/BindingsTest.ql create mode 100644 python/ql/test/library-tests/ControlFlow/bindings/annassign.py create mode 100644 python/ql/test/library-tests/ControlFlow/bindings/compound.py create mode 100644 python/ql/test/library-tests/ControlFlow/bindings/comprehension.py create mode 100644 python/ql/test/library-tests/ControlFlow/bindings/dead_under_no_raise.py create mode 100644 python/ql/test/library-tests/ControlFlow/bindings/decorated.py create mode 100644 python/ql/test/library-tests/ControlFlow/bindings/except_handler.py create mode 100644 python/ql/test/library-tests/ControlFlow/bindings/imports.py create mode 100644 python/ql/test/library-tests/ControlFlow/bindings/match_pattern.py create mode 100644 python/ql/test/library-tests/ControlFlow/bindings/parameters.py create mode 100644 python/ql/test/library-tests/ControlFlow/bindings/simple.py create mode 100644 python/ql/test/library-tests/ControlFlow/bindings/type_params.py create mode 100644 python/ql/test/library-tests/ControlFlow/bindings/walrus_starred.py create mode 100644 python/ql/test/library-tests/ControlFlow/bindings/with_stmt.py create mode 100644 python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgAllLiveReachable.expected create mode 100644 python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgAllLiveReachable.ql create mode 100644 python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgAnnotationHasCfgNode.expected create mode 100644 python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgAnnotationHasCfgNode.ql create mode 100644 python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgBasicBlockAnnotationGap.expected create mode 100644 python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgBasicBlockAnnotationGap.ql create mode 100644 python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgBasicBlockOrdering.expected create mode 100644 python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgBasicBlockOrdering.ql create mode 100644 python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgBranchTimestamps.expected create mode 100644 python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgBranchTimestamps.ql create mode 100644 python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgConsecutivePredecessorTimestamps.expected create mode 100644 python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgConsecutivePredecessorTimestamps.ql create mode 100644 python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgConsecutiveTimestamps.expected create mode 100644 python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgConsecutiveTimestamps.ql create mode 100644 python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgImpl.qll create mode 100644 python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgNeverReachable.expected create mode 100644 python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgNeverReachable.ql create mode 100644 python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgNoBackwardFlow.expected create mode 100644 python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgNoBackwardFlow.ql create mode 100644 python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgNoBasicBlock.expected create mode 100644 python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgNoBasicBlock.ql create mode 100644 python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgNoSharedReachable.expected create mode 100644 python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgNoSharedReachable.ql create mode 100644 python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgStrictForward.expected create mode 100644 python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgStrictForward.ql diff --git a/python/ql/consistency-queries/CfgConsistency.ql b/python/ql/consistency-queries/CfgConsistency.ql new file mode 100644 index 00000000000..ab13eddf190 --- /dev/null +++ b/python/ql/consistency-queries/CfgConsistency.ql @@ -0,0 +1,2 @@ +import semmle.python.controlflow.internal.AstNodeImpl +import ControlFlow::Consistency diff --git a/python/ql/lib/printCfgNew.ql b/python/ql/lib/printCfgNew.ql new file mode 100644 index 00000000000..ba336de562a --- /dev/null +++ b/python/ql/lib/printCfgNew.ql @@ -0,0 +1,45 @@ +/** + * @name Print CFG (New) + * @description Produces a representation of a file's Control Flow Graph + * using the new shared control flow library. + * This query is used by the VS Code extension. + * @id python/print-cfg + * @kind graph + * @tags ide-contextual-queries/print-cfg + */ + +private import python as Py +import semmle.python.controlflow.internal.AstNodeImpl + +external string selectedSourceFile(); + +private predicate selectedSourceFileAlias = selectedSourceFile/0; + +external int selectedSourceLine(); + +private predicate selectedSourceLineAlias = selectedSourceLine/0; + +external int selectedSourceColumn(); + +private predicate selectedSourceColumnAlias = selectedSourceColumn/0; + +module ViewCfgQueryInput implements ControlFlow::ViewCfgQueryInputSig { + predicate selectedSourceFile = selectedSourceFileAlias/0; + + predicate selectedSourceLine = selectedSourceLineAlias/0; + + predicate selectedSourceColumn = selectedSourceColumnAlias/0; + + predicate cfgScopeSpan( + Ast::Callable callable, Py::File file, int startLine, int startColumn, int endLine, + int endColumn + ) { + exists(Py::Scope scope | + scope = callable.asScope() and + file = scope.getLocation().getFile() and + scope.getLocation().hasLocationInfo(_, startLine, startColumn, endLine, endColumn) + ) + } +} + +import ControlFlow::ViewCfgQuery diff --git a/python/ql/lib/semmle/python/controlflow/internal/AstNodeImpl.qll b/python/ql/lib/semmle/python/controlflow/internal/AstNodeImpl.qll new file mode 100644 index 00000000000..5dba3d96ea8 --- /dev/null +++ b/python/ql/lib/semmle/python/controlflow/internal/AstNodeImpl.qll @@ -0,0 +1,1610 @@ +/** + * Provides classes for the shared control-flow library, mediating between + * the Python AST and `AstSig`. + * + * The `Ast` module wraps Python's `Stmt`, `Expr`, `Scope`, and `Pattern`, + * and adds two synthetic kinds of node: + * - `BlockStmt`, identifying a body slot of a parent AST node (e.g. an + * `if`'s then or else branch). `Py::StmtList` itself is not directly + * wrapped. + * - Intermediate nodes for multi-operand boolean expressions. + */ +overlay[local?] +module; + +private import python as Py +private import codeql.controlflow.ControlFlowGraph +private import codeql.controlflow.SuccessorType +private import codeql.util.Void + +/** + * Gets the bound `Name` of a PEP 695 type parameter (`TypeVar`, + * `ParamSpec`, or `TypeVarTuple`). The base `TypeParameter` class does + * not expose `getName()`; this helper dispatches over the subtypes. + */ +private Py::Name typeParameterName(Py::TypeParameter tp) { + result = tp.(Py::TypeVar).getName() + or + result = tp.(Py::ParamSpec).getName() + or + result = tp.(Py::TypeVarTuple).getName() +} + +/** Provides the Python implementation of the shared CFG `AstSig`. */ +module Ast implements AstSig { + private newtype TAstNode = + TPyStmt(Py::Stmt s) or + TPyExpr(Py::Expr e) { not e instanceof Py::BoolExpr } or + TScope(Py::Scope sc) or + TPattern(Py::Pattern p) or + /** + * A synthetic node representing an operand pair of an `and`/`or` + * expression. For `a and b and c` (operands 0, 1, 2) we model the + * operation as a right-nested tree: pair 0 represents the whole + * expression with left=a and right=pair 1; pair 1 represents + * `b and c` with left=b and right=c. Each Python `Py::BoolExpr` + * with `n` operands has `n - 1` such pairs (indices `0 .. n - 2`). + */ + TBoolExprPair(Py::BoolExpr be, int index) { index = [0 .. count(be.getAValue()) - 2] } or + /** + * A synthetic block statement, wrapping a `Py::StmtList`. Each list of + * statements that represents an imperative block (a function/class/module + * body, an `if`/`while`/`for` branch, a `try`/`except`/`finally` body, + * etc.) becomes one `BlockStmt` node in the CFG. `Py::StmtList`s used + * in other roles - `Try.getHandlers()` (iterated via `getCatch`) and + * `MatchStmt.getCases()` (iterated via `getCase`) - are excluded, as + * the shared library's `Try`/`Switch` logic walks their items + * individually. + */ + TBlockStmt(Py::StmtList sl) { + not sl = any(Py::Try t).getHandlers() and + not sl = any(Py::MatchStmt m).getCases() + } + + /** + * The union of `TPyStmt` (wrapping `Py::Stmt`) and `TBlockStmt` (wrapping + * `Py::StmtList`). Both represent the kinds of node that can appear in + * a `Stmt` position in the CFG. + */ + private class TStmt = TPyStmt or TBlockStmt; + + /** + * The union of `TPyExpr` (wrapping non-boolean `Py::Expr`) and + * `TBoolExprPair` (synthetic operand pairs of `and`/`or` expressions). + * Both represent the kinds of node that can appear in an `Expr` + * position in the CFG. + */ + private class TExpr = TPyExpr or TBoolExprPair; + + /** + * An AST node visible to the shared CFG. + * + * This is the abstract implementation class. It enforces that each + * concrete subclass provides `toString`, `getLocation`, and + * `getEnclosingCallable` (one subclass per `TAstNode` newtype branch). + * The public alias `AstNode` is what users (and the `AstSig` signature) + * see; subclasses inside this module extend `AstNodeImpl` directly. + */ + abstract private class AstNodeImpl extends TAstNode { + /** Gets a textual representation of this AST node. */ + abstract string toString(); + + /** Gets the location of this AST node. */ + abstract Py::Location getLocation(); + + /** Gets the enclosing callable that contains this node, if any. */ + abstract Callable getEnclosingCallable(); + + /** Gets the underlying Python `Stmt`, if this node wraps one. */ + Py::Stmt asStmt() { this = TPyStmt(result) } + + /** + * Gets the underlying Python `Expr`, if this node wraps one. Boolean + * expressions are represented by `TBoolExprPair(_, 0)`; this + * predicate also recovers the underlying `Py::BoolExpr` from such a + * representation. + */ + Py::Expr asExpr() { + this = TPyExpr(result) + or + this = TBoolExprPair(result, 0) + } + + /** Gets the underlying Python `Scope`, if this node wraps one. */ + Py::Scope asScope() { this = TScope(result) } + + /** Gets the underlying Python `Pattern`, if this node wraps one. */ + Py::Pattern asPattern() { this = TPattern(result) } + + /** Gets the underlying Python `StmtList`, if this node is a `BlockStmt`. */ + Py::StmtList asStmtList() { this = TBlockStmt(result) } + + /** + * Gets the child of this AST node at the specified (zero-based) + * index, in evaluation order. Subclasses with children override + * this method. + */ + AstNode getChild(int index) { none() } + } + + /** An AST node visible to the shared CFG. */ + final class AstNode = AstNodeImpl; + + /** Gets the immediately enclosing callable that contains `node`. */ + Callable getEnclosingCallable(AstNode node) { result = node.getEnclosingCallable() } + + /** + * A callable: a function, class, or module scope. + * + * In Python, all three are executable scopes with statement bodies. + */ + class Callable extends AstNodeImpl, TScope { + private Py::Scope sc; + + Callable() { this = TScope(sc) } + + override string toString() { result = sc.toString() } + + override Py::Location getLocation() { result = sc.getLocation() } + + override Callable getEnclosingCallable() { result.asScope() = sc.getEnclosingScope() } + } + + /** Gets the body of callable `c`. */ + AstNode callableGetBody(Callable c) { result.asStmtList() = c.asScope().getBody() } + + /** + * A parameter of a callable. + * + * Modelled per the C# template (`csharp/.../ControlFlowGraph.qll:147-156`): + * each Python parameter (the `Py::Parameter` AST node, which is a `Name` + * or — Python 2 only — a `Tuple` in store context) becomes a CFG node + * at a stable position in the enclosing callable's entry sequence. + * + * Default-value expressions for positional and keyword-only parameters + * are wired separately on the `FunctionDefExpr` / `LambdaExpr` wrappers + * (they evaluate at function-definition time, not at call time). + * `Parameter::getDefaultValue()` returns `none()` here, signalling to + * the shared library that the parameter never falls back to a default + * during call binding. This mirrors C# for non-optional parameters. + */ + class Parameter extends Expr { + private Py::Parameter param; + + Parameter() { this = TPyExpr(param) } + + /** Gets the underlying Python parameter. */ + Py::Parameter asParameter() { result = param } + + /** + * Gets the default-value expression of this parameter, if any. + * + * Returns `none()`: defaults evaluate at function-definition time and + * are wired into the CFG via `FunctionDefExpr.getDefault` / + * `LambdaExpr.getDefault`. The shared library calls this predicate + * to model the "missing argument → evaluate default" fallback during + * call binding, which Python does not model at the CFG level. + */ + Expr getDefaultValue() { none() } + + /** + * Gets the pattern for this parameter. In Python, there is no destructuring + * pattern syntax for parameters, so the pattern is the parameter itself. + */ + AstNode getPattern() { result = this } + } + + /** + * Gets the `index`th parameter of callable `c`, ordered as Python binds + * them at call time: positional, then vararg (`*args`), then + * keyword-only, then kwarg (`**kwargs`). + */ + Parameter callableGetParameter(Callable c, int index) { + exists(Py::Function f | f = c.asScope() | + result.asParameter() = + rank[index + 1](Py::Parameter p, int subOrder, int subIndex | + // positional parameters first + p = f.getArg(subIndex) and subOrder = 0 + or + // then *args + p = f.getVararg() and subOrder = 1 and subIndex = 0 + or + // then keyword-only parameters + p = f.getKeywordOnlyArg(subIndex) and subOrder = 2 + or + // finally **kwargs + p = f.getKwarg() and subOrder = 3 and subIndex = 0 + | + p order by subOrder, subIndex + ) + ) + } + + /** A statement. */ + class Stmt extends AstNodeImpl, TStmt { + // For `TPyStmt` instances, delegate to the wrapped Python statement. + // `BlockStmt` (the only `TBlockStmt` subclass) provides its own overrides. + override string toString() { result = this.asStmt().toString() } + + override Py::Location getLocation() { result = this.asStmt().getLocation() } + + override Callable getEnclosingCallable() { result.asScope() = this.asStmt().getScope() } + } + + /** An expression. */ + class Expr extends AstNodeImpl, TExpr { + // For `TPyExpr` instances, delegate to the wrapped Python expression. + // `BinaryExpr` (the only `TBoolExprPair` subclass) provides its own overrides. + override string toString() { result = this.asExpr().toString() } + + override Py::Location getLocation() { result = this.asExpr().getLocation() } + + override Callable getEnclosingCallable() { result.asScope() = this.asExpr().getScope() } + } + + /** A pattern in a `match` statement. */ + additional class Pattern extends AstNodeImpl, TPattern { + private Py::Pattern p; + + Pattern() { this = TPattern(p) } + + override string toString() { result = p.toString() } + + override Py::Location getLocation() { result = p.getLocation() } + + override Callable getEnclosingCallable() { result.asScope() = p.getScope() } + } + + /** + * A `case x` pattern that binds `x` to the matched value. + */ + additional class MatchCapturePattern extends Pattern { + private Py::MatchCapturePattern cap; + + MatchCapturePattern() { this = TPattern(cap) } + + /** Gets the bound Name expression. */ + Expr getVariable() { result.asExpr() = cap.getVariable() } + + override AstNode getChild(int index) { index = 0 and result = this.getVariable() } + } + + /** + * A `case pattern as name` pattern. + */ + additional class MatchAsPattern extends Pattern { + private Py::MatchAsPattern asp; + + MatchAsPattern() { this = TPattern(asp) } + + /** Gets the inner pattern. */ + AstNode getPattern() { result.asPattern() = asp.getPattern() } + + /** Gets the bound Name expression. */ + Expr getAlias() { result.asExpr() = asp.getAlias() } + + override AstNode getChild(int index) { + index = 0 and result = this.getPattern() + or + index = 1 and result = this.getAlias() + } + } + + /** + * A `case [a, b, *rest]` star pattern. Binds `rest` to the remaining + * elements of the sequence. + */ + additional class MatchStarPattern extends Pattern { + private Py::MatchStarPattern starp; + + MatchStarPattern() { this = TPattern(starp) } + + /** Gets the target Pattern (a `MatchCapturePattern` if `*rest`). */ + AstNode getTarget() { result.asPattern() = starp.getTarget() } + + override AstNode getChild(int index) { index = 0 and result = this.getTarget() } + } + + /** + * A `case [a, b, ...]` sequence pattern. Recurses into the sub-patterns. + */ + additional class MatchSequencePattern extends Pattern { + private Py::MatchSequencePattern seqp; + + MatchSequencePattern() { this = TPattern(seqp) } + + /** Gets the `n`th sub-pattern. */ + AstNode getPattern(int n) { result.asPattern() = seqp.getPattern(n) } + + override AstNode getChild(int index) { result = this.getPattern(index) } + } + + /** + * A `case Cls(a, b, x=y)` class pattern. + */ + additional class MatchClassPattern extends Pattern { + private Py::MatchClassPattern clsp; + + MatchClassPattern() { this = TPattern(clsp) } + + /** Gets the class expression of this class pattern. */ + Expr getClass() { result.asExpr() = clsp.getClass() } + + /** Gets the `n`th positional sub-pattern. */ + AstNode getPositional(int n) { result.asPattern() = clsp.getPositional(n) } + + /** Gets the `n`th keyword sub-pattern. */ + AstNode getKeyword(int n) { result.asPattern() = clsp.getKeyword(n) } + + private int numPositional() { result = count(int i | exists(clsp.getPositional(i))) } + + override AstNode getChild(int index) { + index = 0 and result = this.getClass() + or + result = this.getPositional(index - 1) and index >= 1 + or + result = this.getKeyword(index - 1 - this.numPositional()) and + index >= 1 + this.numPositional() + } + } + + /** + * A `case {k: v}` mapping pattern. + */ + additional class MatchMappingPattern extends Pattern { + private Py::MatchMappingPattern mapp; + + MatchMappingPattern() { this = TPattern(mapp) } + + AstNode getMapping(int n) { result.asPattern() = mapp.getMapping(n) } + + override AstNode getChild(int index) { result = this.getMapping(index) } + } + + /** + * A key-value pair inside a `case {k: v}` mapping pattern. + */ + additional class MatchKeyValuePattern extends Pattern { + private Py::MatchKeyValuePattern kvp; + + MatchKeyValuePattern() { this = TPattern(kvp) } + + AstNode getKey() { result.asPattern() = kvp.getKey() } + + AstNode getValue() { result.asPattern() = kvp.getValue() } + + override AstNode getChild(int index) { + index = 0 and result = this.getKey() + or + index = 1 and result = this.getValue() + } + } + + /** + * A `case Cls(name=value)` keyword sub-pattern. + */ + additional class MatchKeywordPattern extends Pattern { + private Py::MatchKeywordPattern kwp; + + MatchKeywordPattern() { this = TPattern(kwp) } + + Expr getAttribute() { result.asExpr() = kwp.getAttribute() } + + AstNode getValue() { result.asPattern() = kwp.getValue() } + + override AstNode getChild(int index) { + index = 0 and result = this.getAttribute() + or + index = 1 and result = this.getValue() + } + } + + /** A `case **rest` double-star mapping sub-pattern. */ + additional class MatchDoubleStarPattern extends Pattern { + private Py::MatchDoubleStarPattern dsp; + + MatchDoubleStarPattern() { this = TPattern(dsp) } + + AstNode getTarget() { result.asPattern() = dsp.getTarget() } + + override AstNode getChild(int index) { index = 0 and result = this.getTarget() } + } + + /** A `case p1 | p2 | …` or-pattern. */ + additional class MatchOrPattern extends Pattern { + private Py::MatchOrPattern orp; + + MatchOrPattern() { this = TPattern(orp) } + + AstNode getPattern(int n) { result.asPattern() = orp.getPattern(n) } + + override AstNode getChild(int index) { result = this.getPattern(index) } + } + + /** A `case 1` literal pattern. */ + additional class MatchLiteralPattern extends Pattern { + private Py::MatchLiteralPattern litp; + + MatchLiteralPattern() { this = TPattern(litp) } + + Expr getLiteral() { result.asExpr() = litp.getLiteral() } + + override AstNode getChild(int index) { index = 0 and result = this.getLiteral() } + } + + /** A `case Cls.NAME` value pattern. */ + additional class MatchValuePattern extends Pattern { + private Py::MatchValuePattern vp; + + MatchValuePattern() { this = TPattern(vp) } + + Expr getValue() { result.asExpr() = vp.getValue() } + + override AstNode getChild(int index) { index = 0 and result = this.getValue() } + } + + /** + * A block statement, modeling the body of a parent AST node as a + * sequence of statements. + */ + class BlockStmt extends Stmt, TBlockStmt { + private Py::StmtList sl; + + BlockStmt() { this = TBlockStmt(sl) } + + /** Gets the `n`th (zero-based) statement in this block. */ + Stmt getStmt(int n) { result.asStmt() = sl.getItem(n) } + + /** Gets the last statement in this block. */ + Stmt getLastStmt() { result.asStmt() = sl.getLastItem() } + + override string toString() { result = sl.toString() } + + // `Py::StmtList` has no native location; approximate with the first + // item's location. + override Py::Location getLocation() { result = sl.getItem(0).getLocation() } + + override Callable getEnclosingCallable() { + result.asScope() = sl.getParent().(Py::Scope) + or + result.asScope() = sl.getParent().(Py::Stmt).getScope() + } + + override AstNode getChild(int index) { result = this.getStmt(index) } + } + + /** An expression statement. */ + class ExprStmt extends Stmt { + private Py::ExprStmt exprStmt; + + ExprStmt() { this = TPyStmt(exprStmt) } + + /** Gets the expression in this expression statement. */ + Expr getExpr() { result.asExpr() = exprStmt.getValue() } + + override AstNode getChild(int index) { index = 0 and result = this.getExpr() } + } + + /** An assignment statement (`x = y = expr`). */ + additional class AssignStmt extends Stmt { + private Py::Assign assign; + + AssignStmt() { this = TPyStmt(assign) } + + Expr getValue() { result.asExpr() = assign.getValue() } + + Expr getTarget(int n) { result.asExpr() = assign.getTarget(n) } + + int getNumberOfTargets() { result = count(assign.getATarget()) } + + override AstNode getChild(int index) { + index = 0 and result = this.getValue() + or + result = this.getTarget(index - 1) and index >= 1 + } + } + + /** An augmented assignment statement (`x += expr`). */ + additional class AugAssignStmt extends Stmt { + private Py::AugAssign augAssign; + + AugAssignStmt() { this = TPyStmt(augAssign) } + + Expr getOperation() { result.asExpr() = augAssign.getOperation() } + + override AstNode getChild(int index) { index = 0 and result = this.getOperation() } + } + + /** + * An annotated assignment statement (`x: T = expr`, or `x: T` without + * value). The evaluation order follows CPython: annotation first, then + * the optional value, then the target binding. + */ + additional class AnnAssignStmt extends Stmt { + private Py::AnnAssign annAssign; + + AnnAssignStmt() { this = TPyStmt(annAssign) } + + Expr getAnnotation() { result.asExpr() = annAssign.getAnnotation() } + + Expr getValue() { result.asExpr() = annAssign.getValue() } + + Expr getTarget() { result.asExpr() = annAssign.getTarget() } + + override AstNode getChild(int index) { + index = 0 and result = this.getAnnotation() + or + index = 1 and result = this.getValue() + or + index = 2 and result = this.getTarget() + } + } + + /** An assignment expression / walrus operator (`x := expr`). */ + additional class NamedExpr extends Expr { + private Py::AssignExpr assignExpr; + + NamedExpr() { this = TPyExpr(assignExpr) } + + Expr getValue() { result.asExpr() = assignExpr.getValue() } + + Expr getTarget() { result.asExpr() = assignExpr.getTarget() } + + override AstNode getChild(int index) { + index = 0 and result = this.getValue() + or + index = 1 and result = this.getTarget() + } + } + + /** + * An `if` statement. + * + * Python's `elif` chains are represented as nested `If` nodes in the + * else branch's `StmtList`. The shared CFG library handles this + * naturally: `getElse()` returns the `BlockStmt` wrapping the else + * branch, and if that block contains a single `If`, the result is + * a chained conditional. + */ + class IfStmt extends Stmt { + private Py::If ifStmt; + + IfStmt() { this = TPyStmt(ifStmt) } + + /** Gets the underlying Python `If` statement. */ + Py::If asIf() { result = ifStmt } + + /** Gets the condition of this `if` statement. */ + Expr getCondition() { result.asExpr() = ifStmt.getTest() } + + /** Gets the `then` (true) branch of this `if` statement. */ + Stmt getThen() { result.asStmtList() = ifStmt.getBody() } + + /** Gets the `else` (false) branch, if any. */ + Stmt getElse() { result.asStmtList() = ifStmt.getOrelse() } + + override AstNode getChild(int index) { + index = 0 and result = this.getCondition() + or + index = 1 and result = this.getThen() + or + index = 2 and result = this.getElse() + } + } + + /** A loop statement. */ + class LoopStmt extends Stmt { + LoopStmt() { + this = TPyStmt(any(Py::While w)) + or + this = TPyStmt(any(Py::For f)) + } + + /** Gets the body of this loop statement. */ + Stmt getBody() { none() } + } + + /** A `while` loop statement. */ + class WhileStmt extends LoopStmt { + private Py::While whileStmt; + + WhileStmt() { this = TPyStmt(whileStmt) } + + /** Gets the boolean condition of this `while` loop. */ + Expr getCondition() { result.asExpr() = whileStmt.getTest() } + + override Stmt getBody() { result.asStmtList() = whileStmt.getBody() } + + /** Gets the `else` branch of this `while` loop, if any. */ + Stmt getElse() { result.asStmtList() = whileStmt.getOrelse() } + + override AstNode getChild(int index) { + index = 0 and result = this.getCondition() + or + index = 1 and result = this.getBody() + or + index = 2 and result = this.getElse() + } + } + + /** + * A `do-while` loop statement. Python has no do-while construct. + */ + class DoStmt extends LoopStmt { + DoStmt() { none() } + + Expr getCondition() { none() } + } + + /** An `until` loop. Python has no `until` loop. */ + class UntilStmt extends LoopStmt { + UntilStmt() { none() } + + Expr getCondition() { none() } + } + + /** A C-style `for` loop. Python has no C-style for loop. */ + class ForStmt extends LoopStmt { + ForStmt() { none() } + + AstNode getInit(int index) { none() } + + Expr getCondition() { none() } + + AstNode getUpdate(int index) { none() } + } + + /** A for-each loop (`for x in iterable:`). */ + class ForeachStmt extends LoopStmt { + private Py::For forStmt; + + ForeachStmt() { this = TPyStmt(forStmt) } + + /** Gets the loop variable. */ + Expr getVariable() { result.asExpr() = forStmt.getTarget() } + + /** Gets the collection being iterated. */ + Expr getCollection() { result.asExpr() = forStmt.getIter() } + + override Stmt getBody() { result.asStmtList() = forStmt.getBody() } + + /** Gets the `else` branch of this `for` loop, if any. */ + Stmt getElse() { result.asStmtList() = forStmt.getOrelse() } + + override AstNode getChild(int index) { + index = 0 and result = this.getCollection() + or + index = 1 and result = this.getVariable() + or + index = 2 and result = this.getBody() + or + index = 3 and result = this.getElse() + } + } + + /** A `break` statement. */ + class BreakStmt extends Stmt { + BreakStmt() { this = TPyStmt(any(Py::Break b)) } + } + + /** A `continue` statement. */ + class ContinueStmt extends Stmt { + ContinueStmt() { this = TPyStmt(any(Py::Continue c)) } + } + + /** A `goto` statement. Python has no goto. */ + class GotoStmt extends Stmt { + GotoStmt() { none() } + } + + /** A `return` statement. */ + class ReturnStmt extends Stmt { + private Py::Return ret; + + ReturnStmt() { this = TPyStmt(ret) } + + /** Gets the expression being returned, if any. */ + Expr getExpr() { result.asExpr() = ret.getValue() } + + override AstNode getChild(int index) { index = 0 and result = this.getExpr() } + } + + /** A `raise` statement (mapped to `Throw`). */ + class Throw extends Stmt { + private Py::Raise raise; + + Throw() { this = TPyStmt(raise) } + + /** Gets the expression being raised. */ + Expr getExpr() { result.asExpr() = raise.getException() } + + /** Gets the cause of this `raise`, if any. */ + Expr getCause() { result.asExpr() = raise.getCause() } + + override AstNode getChild(int index) { + index = 0 and result = this.getExpr() + or + index = 1 and result = this.getCause() + } + } + + /** + * An `import` statement (`import a, b` or `from m import a, b`). + * + * Each alias contributes two children in evaluation order: first the + * value expression (which performs the import side-effect), then the + * bound `asname` Name (the in-scope binding). This makes both reachable + * from the CFG and allows `Name.defines(v)` for `asname` Names to have + * corresponding CFG nodes — which is essential for SSA to see import + * bindings. + */ + additional class ImportStmt extends Stmt { + private Py::Import imp; + + ImportStmt() { this = TPyStmt(imp) } + + /** Gets the value (module/member expression) of the `n`th alias. */ + Expr getValue(int n) { result.asExpr() = imp.getName(n).getValue() } + + /** Gets the bound `asname` of the `n`th alias. */ + Expr getAsname(int n) { result.asExpr() = imp.getName(n).getAsname() } + + /** Gets the number of aliases in this import statement. */ + int getNumberOfAliases() { result = count(int i | exists(imp.getName(i))) } + + override AstNode getChild(int index) { + exists(int i | + index = 2 * i and result = this.getValue(i) + or + index = 2 * i + 1 and result = this.getAsname(i) + ) + } + } + + /** + * A `from m import *` statement. Evaluates the module expression but + * binds no name (the bindings happen by side-effect at runtime, which + * is not modelled at the CFG level). + */ + additional class ImportStarStmt extends Stmt { + private Py::ImportStar imp; + + ImportStarStmt() { this = TPyStmt(imp) } + + Expr getModule() { result.asExpr() = imp.getModule() } + + override AstNode getChild(int index) { index = 0 and result = this.getModule() } + } + + /** A `with` statement. */ + additional class WithStmt extends Stmt { + private Py::With withStmt; + + WithStmt() { this = TPyStmt(withStmt) } + + Expr getContextExpr() { result.asExpr() = withStmt.getContextExpr() } + + Expr getOptionalVars() { result.asExpr() = withStmt.getOptionalVars() } + + Stmt getBody() { result.asStmtList() = withStmt.getBody() } + + override AstNode getChild(int index) { + index = 0 and result = this.getContextExpr() + or + index = 1 and result = this.getOptionalVars() + or + index = 2 and result = this.getBody() + } + } + + /** An `assert` statement. */ + additional class AssertStmt extends Stmt { + private Py::Assert assertStmt; + + AssertStmt() { this = TPyStmt(assertStmt) } + + Expr getTest() { result.asExpr() = assertStmt.getTest() } + + Expr getMsg() { result.asExpr() = assertStmt.getMsg() } + + override AstNode getChild(int index) { + index = 0 and result = this.getTest() + or + index = 1 and result = this.getMsg() + } + } + + /** A `delete` statement. */ + additional class DeleteStmt extends Stmt { + private Py::Delete del; + + DeleteStmt() { this = TPyStmt(del) } + + Expr getTarget(int n) { result.asExpr() = del.getTarget(n) } + + override AstNode getChild(int index) { result = this.getTarget(index) } + } + + /** + * A PEP 695 `type` statement (`type Alias[T1, T2] = value`). + * + * The type parameters bind at statement-evaluation time. The value + * expression is captured for lazy evaluation but the alias `Name` + * itself binds the resulting `TypeAliasType` object — so the CFG must + * visit at minimum the type-parameter names and the alias name. + */ + additional class TypeAliasStmt extends Stmt { + private Py::TypeAlias ta; + + TypeAliasStmt() { this = TPyStmt(ta) } + + /** Gets the alias `Name` bound by this statement. */ + Expr getName() { result.asExpr() = ta.getName() } + + /** + * Gets the `n`th PEP 695 type-parameter name (a `Name` in store + * context), in declaration order. + */ + Expr getTypeParamName(int n) { result.asExpr() = typeParameterName(ta.getTypeParameter(n)) } + + int getNumberOfTypeParams() { result = count(ta.getATypeParameter()) } + + override AstNode getChild(int index) { + result = this.getTypeParamName(index) + or + index = this.getNumberOfTypeParams() and result = this.getName() + } + } + + /** A `try` statement. */ + class TryStmt extends Stmt { + private Py::Try tryStmt; + + TryStmt() { this = TPyStmt(tryStmt) } + + AstNode getBody(int index) { index = 0 and result.asStmtList() = tryStmt.getBody() } + + /** Gets the `else` branch of this `try` statement, if any. */ + Stmt getElse() { result.asStmtList() = tryStmt.getOrelse() } + + Stmt getFinally() { result.asStmtList() = tryStmt.getFinalbody() } + + CatchClause getCatch(int index) { result.asStmt() = tryStmt.getHandler(index) } + + override AstNode getChild(int index) { + index = 0 and result = this.getBody(0) + or + result = this.getCatch(index - 1) and index >= 1 + or + index = -1 and result = this.getFinally() + or + index = -2 and result = this.getElse() + } + } + + /** + * Gets the `else` branch of `try` statement `try`, if any. + */ + AstNode getTryElse(TryStmt try) { result = try.getElse() } + + /** + * Gets the `else` branch of loop `loop`, if any. + * + * Python's `while`/`for` loops may have an `else` block that runs when the + * loop completes without `break`. + */ + AstNode getLoopElse(LoopStmt loop) { + result = loop.(WhileStmt).getElse() + or + result = loop.(ForeachStmt).getElse() + } + + /** An exception handler (`except` or `except*`). */ + class CatchClause extends Stmt { + private Py::ExceptionHandler handler; + + CatchClause() { this = TPyStmt(handler) } + + /** Gets the type expression of this exception handler. */ + Expr getType() { result.asExpr() = handler.getType() } + + /** Gets the variable name of this exception handler, if any. */ + AstNode getVariable() { result.asExpr() = handler.getName() } + + /** Holds: catch clauses do not have a `Condition` in Python's model. */ + Expr getCondition() { none() } + + /** Gets the body of this exception handler. */ + Stmt getBody() { + result.asStmtList() = handler.(Py::ExceptStmt).getBody() + or + result.asStmtList() = handler.(Py::ExceptGroupStmt).getBody() + } + + override AstNode getChild(int index) { + index = 0 and result = this.getType() + or + index = 1 and result = this.getVariable() + or + index = 2 and result = this.getBody() + } + } + + /** A `match` statement, mapped to the shared CFG's `Switch`. */ + class Switch extends Stmt { + private Py::MatchStmt matchStmt; + + Switch() { this = TPyStmt(matchStmt) } + + Expr getExpr() { result.asExpr() = matchStmt.getSubject() } + + Case getCase(int index) { result.asStmt() = matchStmt.getCase(index) } + + Stmt getStmt(int index) { none() } + + override AstNode getChild(int index) { + index = 0 and result = this.getExpr() + or + result = this.getCase(index - 1) and index >= 1 + } + } + + /** A `case` clause in a match statement. */ + class Case extends Stmt { + private Py::Case caseStmt; + + Case() { this = TPyStmt(caseStmt) } + + AstNode getPattern(int index) { index = 0 and result.asPattern() = caseStmt.getPattern() } + + Expr getGuard() { result.asExpr() = caseStmt.getGuard().(Py::Guard).getTest() } + + AstNode getBody() { result.asStmtList() = caseStmt.getBody() } + + /** Holds if this case is a wildcard pattern (`case _:`). */ + predicate isWildcard() { caseStmt.getPattern() instanceof Py::MatchWildcardPattern } + + override AstNode getChild(int index) { + index = 0 and result = this.getPattern(0) + or + index = 1 and result = this.getGuard() + or + index = 2 and result = this.getBody() + } + } + + /** A wildcard case (`case _:`). */ + class DefaultCase extends Case { + DefaultCase() { this.isWildcard() } + } + + /** A conditional expression (`x if cond else y`). */ + class ConditionalExpr extends Expr { + private Py::IfExp ifExp; + + ConditionalExpr() { this = TPyExpr(ifExp) } + + /** Gets the condition of this expression. */ + Expr getCondition() { result.asExpr() = ifExp.getTest() } + + /** Gets the true branch of this expression. */ + Expr getThen() { result.asExpr() = ifExp.getBody() } + + /** Gets the false branch of this expression. */ + Expr getElse() { result.asExpr() = ifExp.getOrelse() } + + override AstNode getChild(int index) { + index = 0 and result = this.getCondition() + or + index = 1 and result = this.getThen() + or + index = 2 and result = this.getElse() + } + } + + /** + * A binary expression for the shared CFG. In Python, this covers all + * `and`/`or` expression operand pairs. + */ + class BinaryExpr extends Expr, TBoolExprPair { + private Py::BoolExpr be; + private int index; + + BinaryExpr() { this = TBoolExprPair(be, index) } + + /** Gets the underlying Python `BoolExpr`. */ + Py::BoolExpr getBoolExpr() { result = be } + + /** Gets the (zero-based) index of this pair within its `BoolExpr`. */ + int getIndex() { result = index } + + override string toString() { result = be.getOperator() } + + override Py::Location getLocation() { result = be.getValue(index).getLocation() } + + override Callable getEnclosingCallable() { result.asScope() = be.getScope() } + + /** Gets the left operand of this binary expression. */ + Expr getLeftOperand() { result.asExpr() = be.getValue(index) } + + /** Gets the right operand of this binary expression. */ + Expr getRightOperand() { + // Last pair: right operand is the final value. + index = count(be.getAValue()) - 2 and result.asExpr() = be.getValue(index + 1) + or + // Non-last pair: right operand is the next synthetic pair. + index < count(be.getAValue()) - 2 and + exists(BinaryExpr next | + next.getBoolExpr() = be and next.getIndex() = index + 1 and result = next + ) + } + + override AstNode getChild(int childIndex) { + childIndex = 0 and result = this.getLeftOperand() + or + childIndex = 1 and result = this.getRightOperand() + } + } + + /** A short-circuiting logical `and` expression. */ + class LogicalAndExpr extends BinaryExpr { + LogicalAndExpr() { this.getBoolExpr().getOp() instanceof Py::And } + } + + /** A short-circuiting logical `or` expression. */ + class LogicalOrExpr extends BinaryExpr { + LogicalOrExpr() { this.getBoolExpr().getOp() instanceof Py::Or } + } + + /** A null-coalescing expression. Python has no null-coalescing operator. */ + class NullCoalescingExpr extends BinaryExpr { + NullCoalescingExpr() { none() } + } + + /** + * A unary expression. Currently only used for the `not` subclass. + */ + class UnaryExpr extends Expr { + UnaryExpr() { exists(Py::UnaryExpr u | this = TPyExpr(u) and u.getOp() instanceof Py::Not) } + + /** Gets the operand of this unary expression. */ + Expr getOperand() { result.asExpr() = this.asExpr().(Py::UnaryExpr).getOperand() } + + override AstNode getChild(int index) { index = 0 and result = this.getOperand() } + } + + /** A logical `not` expression. */ + class LogicalNotExpr extends UnaryExpr { } + + /** + * An assignment expression. + * + * Empty in Python: `x = y` and `x += y` are statements (`AssignStmt` and + * `AugAssignStmt`), not expressions, and the walrus `x := y` is modeled + * separately as `NamedExpr`. The shared library's `Assignment` extends + * `BinaryExpr`, so it cannot share instances with our `Stmt`-based + * assignment forms. + */ + class Assignment extends BinaryExpr { + Assignment() { none() } + } + + /** A simple assignment expression. Empty in Python (see `Assignment`). */ + class AssignExpr extends Assignment { } + + /** A compound assignment expression. Empty in Python (see `Assignment`). */ + class CompoundAssignment extends Assignment { } + + /** + * A short-circuiting logical AND compound assignment expression (`&&=`). + * Python has no such operator. + */ + class AssignLogicalAndExpr extends CompoundAssignment { } + + /** + * A short-circuiting logical OR compound assignment expression (`||=`). + * Python has no such operator. + */ + class AssignLogicalOrExpr extends CompoundAssignment { } + + /** + * A short-circuiting null-coalescing compound assignment expression + * (`??=`). Python has no such operator. + */ + class AssignNullCoalescingExpr extends CompoundAssignment { } + + /** A boolean literal expression (`True` or `False`). */ + class BooleanLiteral extends Expr { + BooleanLiteral() { this = TPyExpr(any(Py::True t)) or this = TPyExpr(any(Py::False f)) } + + /** Gets the boolean value of this literal. */ + boolean getValue() { + this.asExpr() instanceof Py::True and result = true + or + this.asExpr() instanceof Py::False and result = false + } + } + + /** A pattern match expression. Python has no `instanceof`-style pattern match expression. */ + class PatternMatchExpr extends Expr { + PatternMatchExpr() { none() } + + Expr getExpr() { none() } + + AstNode getPattern() { none() } + } + + // ===== Python-specific expression classes (used by `getChild`) ===== + /** A Python binary expression (arithmetic, bitwise, matmul, etc.). */ + additional class ArithBinaryExpr extends Expr { + private Py::BinaryExpr binExpr; + + ArithBinaryExpr() { this = TPyExpr(binExpr) } + + Expr getLeft() { result.asExpr() = binExpr.getLeft() } + + Expr getRight() { result.asExpr() = binExpr.getRight() } + + override AstNode getChild(int index) { + index = 0 and result = this.getLeft() + or + index = 1 and result = this.getRight() + } + } + + /** A call expression (`func(args...)`). */ + additional class CallExpr extends Expr { + private Py::Call call; + + CallExpr() { this = TPyExpr(call) } + + Expr getFunc() { result.asExpr() = call.getFunc() } + + Expr getPositionalArg(int n) { result.asExpr() = call.getPositionalArg(n) } + + int getNumberOfPositionalArgs() { result = count(call.getAPositionalArg()) } + + Expr getKeywordValue(int n) { + result.asExpr() = call.getNamedArg(n).(Py::Keyword).getValue() + or + result.asExpr() = call.getNamedArg(n).(Py::DictUnpacking).getValue() + } + + int getNumberOfNamedArgs() { result = count(call.getANamedArg()) } + + override AstNode getChild(int index) { + index = 0 and result = this.getFunc() + or + result = this.getPositionalArg(index - 1) and index >= 1 + or + result = this.getKeywordValue(index - 1 - this.getNumberOfPositionalArgs()) and + index >= 1 + this.getNumberOfPositionalArgs() + } + } + + /** A subscript expression (`obj[index]`). */ + additional class SubscriptExpr extends Expr { + private Py::Subscript sub; + + SubscriptExpr() { this = TPyExpr(sub) } + + Expr getObject() { result.asExpr() = sub.getObject() } + + Expr getIndex() { result.asExpr() = sub.getIndex() } + + override AstNode getChild(int index) { + index = 0 and result = this.getObject() + or + index = 1 and result = this.getIndex() + } + } + + /** An attribute access (`obj.name`). */ + additional class AttributeExpr extends Expr { + private Py::Attribute attr; + + AttributeExpr() { this = TPyExpr(attr) } + + Expr getObject() { result.asExpr() = attr.getObject() } + + override AstNode getChild(int index) { index = 0 and result = this.getObject() } + } + + /** + * An `import x.y` module expression. Modelled as a leaf — the dotted + * name is just a string. + */ + additional class ImportExpression extends Expr { + ImportExpression() { this.asExpr() instanceof Py::ImportExpr } + } + + /** + * A `from m import x` member access. The module sub-expression is a + * child so that the CFG visits both the module load and this + * attribute selection. + */ + additional class ImportMemberExpr extends Expr { + private Py::ImportMember im; + + ImportMemberExpr() { this = TPyExpr(im) } + + /** Gets the module expression `m` in `from m import x`. */ + Expr getModule() { result.asExpr() = im.getModule() } + + override AstNode getChild(int index) { index = 0 and result = this.getModule() } + } + + /** A tuple literal. */ + additional class TupleExpr extends Expr { + private Py::Tuple tuple; + + TupleExpr() { this = TPyExpr(tuple) } + + Expr getElt(int n) { result.asExpr() = tuple.getElt(n) } + + override AstNode getChild(int index) { result = this.getElt(index) } + } + + /** A list literal. */ + additional class ListExpr extends Expr { + private Py::List list; + + ListExpr() { this = TPyExpr(list) } + + Expr getElt(int n) { result.asExpr() = list.getElt(n) } + + override AstNode getChild(int index) { result = this.getElt(index) } + } + + /** A set literal. */ + additional class SetExpr extends Expr { + private Py::Set set; + + SetExpr() { this = TPyExpr(set) } + + Expr getElt(int n) { result.asExpr() = set.getElt(n) } + + override AstNode getChild(int index) { result = this.getElt(index) } + } + + /** A dict literal. */ + additional class DictExpr extends Expr { + private Py::Dict dict; + + DictExpr() { this = TPyExpr(dict) } + + /** + * Gets the key of the `n`th item (at child index `2*n`); the value is + * at child index `2*n + 1`. + */ + Expr getKey(int n) { result.asExpr() = dict.getItem(n).(Py::KeyValuePair).getKey() } + + Expr getValue(int n) { result.asExpr() = dict.getItem(n).(Py::KeyValuePair).getValue() } + + int getNumberOfItems() { result = count(dict.getAnItem()) } + + override AstNode getChild(int index) { + exists(int item | + index = 2 * item and result = this.getKey(item) + or + index = 2 * item + 1 and result = this.getValue(item) + ) + } + } + + /** A unary expression other than `not` (e.g., `-x`, `+x`, `~x`). */ + additional class ArithUnaryExpr extends Expr { + private Py::UnaryExpr unaryExpr; + + ArithUnaryExpr() { this = TPyExpr(unaryExpr) and not unaryExpr.getOp() instanceof Py::Not } + + Expr getOperand() { result.asExpr() = unaryExpr.getOperand() } + + override AstNode getChild(int index) { index = 0 and result = this.getOperand() } + } + + /** + * A comprehension or generator expression. The iterable is evaluated in + * the enclosing scope; the body runs in a nested synthetic function + * scope handled by its own CFG. + */ + additional class Comprehension extends Expr { + private Py::Expr iterable; + + Comprehension() { + exists(Py::Expr c | this = TPyExpr(c) | + iterable = c.(Py::ListComp).getIterable() + or + iterable = c.(Py::SetComp).getIterable() + or + iterable = c.(Py::DictComp).getIterable() + or + iterable = c.(Py::GeneratorExp).getIterable() + ) + } + + Expr getIterable() { result.asExpr() = iterable } + + override AstNode getChild(int index) { index = 0 and result = this.getIterable() } + } + + /** A comparison expression (`a < b`, `a < b < c`, etc.). */ + additional class CompareExpr extends Expr { + private Py::Compare cmp; + + CompareExpr() { this = TPyExpr(cmp) } + + Expr getLeft() { result.asExpr() = cmp.getLeft() } + + Expr getComparator(int n) { result.asExpr() = cmp.getComparator(n) } + + override AstNode getChild(int index) { + index = 0 and result = this.getLeft() + or + result = this.getComparator(index - 1) and index >= 1 + } + } + + /** A slice expression (`start:stop:step`). */ + additional class SliceExpr extends Expr { + private Py::Slice slice; + + SliceExpr() { this = TPyExpr(slice) } + + Expr getStart() { result.asExpr() = slice.getStart() } + + Expr getStop() { result.asExpr() = slice.getStop() } + + Expr getStep() { result.asExpr() = slice.getStep() } + + override AstNode getChild(int index) { + index = 0 and result = this.getStart() + or + index = 1 and result = this.getStop() + or + index = 2 and result = this.getStep() + } + } + + /** A starred expression (`*x`). */ + additional class StarredExpr extends Expr { + private Py::Starred starred; + + StarredExpr() { this = TPyExpr(starred) } + + Expr getValue() { result.asExpr() = starred.getValue() } + + override AstNode getChild(int index) { index = 0 and result = this.getValue() } + } + + /** A formatted string literal (`f"...{expr}..."`). */ + additional class FstringExpr extends Expr { + private Py::Fstring fstring; + + FstringExpr() { this = TPyExpr(fstring) } + + Expr getValue(int n) { result.asExpr() = fstring.getValue(n) } + + override AstNode getChild(int index) { result = this.getValue(index) } + } + + /** A formatted value inside an f-string (`{expr}` or `{expr:spec}`). */ + additional class FormattedValueExpr extends Expr { + private Py::FormattedValue fv; + + FormattedValueExpr() { this = TPyExpr(fv) } + + Expr getValue() { result.asExpr() = fv.getValue() } + + Expr getFormatSpec() { result.asExpr() = fv.getFormatSpec() } + + override AstNode getChild(int index) { + index = 0 and result = this.getValue() + or + index = 1 and result = this.getFormatSpec() + } + } + + /** A `yield` expression. */ + additional class YieldExpr extends Expr { + private Py::Yield yield; + + YieldExpr() { this = TPyExpr(yield) } + + Expr getValue() { result.asExpr() = yield.getValue() } + + override AstNode getChild(int index) { index = 0 and result = this.getValue() } + } + + /** A `yield from` expression. */ + additional class YieldFromExpr extends Expr { + private Py::YieldFrom yieldFrom; + + YieldFromExpr() { this = TPyExpr(yieldFrom) } + + Expr getValue() { result.asExpr() = yieldFrom.getValue() } + + override AstNode getChild(int index) { index = 0 and result = this.getValue() } + } + + /** An `await` expression. */ + additional class AwaitExpr extends Expr { + private Py::Await await; + + AwaitExpr() { this = TPyExpr(await) } + + Expr getValue() { result.asExpr() = await.getValue() } + + override AstNode getChild(int index) { index = 0 and result = this.getValue() } + } + + /** + * A class definition expression (visits bases, but NOT PEP 695 type + * parameters — those bind in an annotation scope that nests the class + * body, so they belong to the inner scope's CFG, not the enclosing + * scope's; the legacy CFG also omitted them). + */ + additional class ClassDefExpr extends Expr { + private Py::ClassExpr classExpr; + + ClassDefExpr() { this = TPyExpr(classExpr) } + + Expr getBase(int n) { result.asExpr() = classExpr.getBase(n) } + + override AstNode getChild(int index) { result = this.getBase(index) } + } + + /** + * A function definition expression (visits positional and keyword + * defaults, but NOT PEP 695 type parameters — those bind in an + * annotation scope that nests the function body, so they belong to + * the inner scope's CFG, not the enclosing scope's; the legacy CFG + * also omitted them). + */ + additional class FunctionDefExpr extends Expr { + private Py::FunctionExpr funcExpr; + + FunctionDefExpr() { this = TPyExpr(funcExpr) } + + /** + * Gets the `n`th default for a positional argument, in evaluation + * order. Note that `Args.getDefault(int)` is indexed by argument + * position (with gaps for arguments without defaults), so we must + * renumber here to obtain contiguous indices. + */ + Expr getDefault(int n) { + result.asExpr() = + rank[n + 1](Py::Expr d, int i | d = funcExpr.getArgs().getDefault(i) | d order by i) + } + + /** Gets the `n`th default for a keyword-only argument, in evaluation order. */ + Expr getKwDefault(int n) { + result.asExpr() = + rank[n + 1](Py::Expr d, int i | d = funcExpr.getArgs().getKwDefault(i) | d order by i) + } + + int getNumberOfDefaults() { result = count(funcExpr.getArgs().getADefault()) } + + override AstNode getChild(int index) { + result = this.getDefault(index) + or + result = this.getKwDefault(index - this.getNumberOfDefaults()) + } + } + + /** A lambda expression (has default args evaluated at definition time). */ + additional class LambdaExpr extends Expr { + private Py::Lambda lambda; + + LambdaExpr() { this = TPyExpr(lambda) } + + /** Gets the `n`th default for a positional argument, in evaluation order. */ + Expr getDefault(int n) { + result.asExpr() = + rank[n + 1](Py::Expr d, int i | d = lambda.getArgs().getDefault(i) | d order by i) + } + + /** Gets the `n`th default for a keyword-only argument, in evaluation order. */ + Expr getKwDefault(int n) { + result.asExpr() = + rank[n + 1](Py::Expr d, int i | d = lambda.getArgs().getKwDefault(i) | d order by i) + } + + int getNumberOfDefaults() { result = count(lambda.getArgs().getADefault()) } + + override AstNode getChild(int index) { + result = this.getDefault(index) + or + result = this.getKwDefault(index - this.getNumberOfDefaults()) + } + } + + /** Gets the child of `n` at the specified (zero-based) index. */ + AstNode getChild(AstNode n, int index) { result = n.getChild(index) } +} + +private module Cfg0 = Make0; + +private import Cfg0 + +private module Cfg1 = Make1; + +private import Cfg1 + +private module Cfg2 = Make2; + +private import Cfg2 + +private module Input implements InputSig1, InputSig2 { + predicate cfgCachedStageRef() { CfgCachedStage::ref() } + + private newtype TLabel = TNone() + + class Label extends TLabel { + string toString() { result = "label" } + } + + class CallableContext = Void; + + predicate inConditionalContext(Ast::AstNode n, ConditionKind kind) { + kind.isBoolean() and + n = any(Ast::AssertStmt a).getTest() + } + + private string assertThrowTag() { result = "[assert-throw]" } + + predicate additionalNode(Ast::AstNode n, string tag, NormalSuccessor t) { + n instanceof Ast::AssertStmt and tag = assertThrowTag() and t instanceof DirectSuccessor + } + + predicate beginAbruptCompletion( + Ast::AstNode ast, PreControlFlowNode n, AbruptCompletion c, boolean always + ) { + ast instanceof Ast::AssertStmt and + n.isAdditional(ast, assertThrowTag()) and + c.asSimpleAbruptCompletion() instanceof ExceptionSuccessor and + always = true + } + + predicate endAbruptCompletion(Ast::AstNode ast, PreControlFlowNode n, AbruptCompletion c) { + none() + } + + predicate step(PreControlFlowNode n1, PreControlFlowNode n2) { + exists(Ast::AssertStmt assertStmt | + n1.isBefore(assertStmt) and + n2.isBefore(assertStmt.getTest()) + or + n1.isAfterTrue(assertStmt.getTest()) and + n2.isAfter(assertStmt) + or + n1.isAfterFalse(assertStmt.getTest()) and + ( + n2.isBefore(assertStmt.getMsg()) + or + not exists(assertStmt.getMsg()) and + n2.isAdditional(assertStmt, assertThrowTag()) + ) + or + n1.isAfter(assertStmt.getMsg()) and + n2.isAdditional(assertStmt, assertThrowTag()) + ) + } +} + +import CfgCachedStage +import Public + +/** + * Maps a CFG AST wrapper node to the corresponding Python AST node, if any. + * Entry, exit, and synthetic nodes have no corresponding Python AST node. + */ +Py::AstNode astNodeToPyNode(Ast::AstNode n) { + result = n.asExpr() + or + result = n.asStmt() + or + result = n.asScope() + or + result = n.asPattern() +} diff --git a/python/ql/test/extractor-tests/syntax_error/CONSISTENCY/CfgConsistency.expected b/python/ql/test/extractor-tests/syntax_error/CONSISTENCY/CfgConsistency.expected new file mode 100644 index 00000000000..91a01a3a3d9 --- /dev/null +++ b/python/ql/test/extractor-tests/syntax_error/CONSISTENCY/CfgConsistency.expected @@ -0,0 +1,4 @@ +consistencyOverview +| deadEnd | 1 | +deadEnd +| without_loop.py:7:5:7:9 | Break | diff --git a/python/ql/test/library-tests/ControlFlow/bindings/BindingsTest.expected b/python/ql/test/library-tests/ControlFlow/bindings/BindingsTest.expected new file mode 100644 index 00000000000..e69de29bb2d diff --git a/python/ql/test/library-tests/ControlFlow/bindings/BindingsTest.ql b/python/ql/test/library-tests/ControlFlow/bindings/BindingsTest.ql new file mode 100644 index 00000000000..a507878911b --- /dev/null +++ b/python/ql/test/library-tests/ControlFlow/bindings/BindingsTest.ql @@ -0,0 +1,32 @@ +/** + * Phase -1 of the dataflow CFG migration: verifies that every variable + * binding visible to the AST (`Name.defines(v)`) corresponds to a CFG node + * in the new CFG (`semmle.python.controlflow.internal.AstNodeImpl`). + * + * The expected tag is `cfgdefines=`. Each binding annotation in the + * test sources looks like `# $ cfgdefines=x` for a binding currently + * covered by the new CFG, or `# $ MISSING: cfgdefines=x` for a binding + * that is known to be uncovered (a "red" test case that should be + * green-flipped once the corresponding `cfg-ext-*` extension lands). + */ + +import python +import semmle.python.controlflow.internal.AstNodeImpl as CfgImpl +import utils.test.InlineExpectationsTest + +module CfgBindingsTest implements TestSig { + string getARelevantTag() { result = "cfgdefines" } + + predicate hasActualResult(Location location, string element, string tag, string value) { + exists(Name n, Variable v, CfgImpl::ControlFlowNode cfg | + n.defines(v) and + cfg.getAstNode().asExpr() = n and + location = n.getLocation() and + element = n.toString() and + tag = "cfgdefines" and + value = v.getId() + ) + } +} + +import MakeTest diff --git a/python/ql/test/library-tests/ControlFlow/bindings/annassign.py b/python/ql/test/library-tests/ControlFlow/bindings/annassign.py new file mode 100644 index 00000000000..7a9ae3ab6c7 --- /dev/null +++ b/python/ql/test/library-tests/ControlFlow/bindings/annassign.py @@ -0,0 +1,13 @@ +# Annotated assignment (PEP 526). Both with and without an initializer. + +a: int = 1 # $ cfgdefines=a +b: str = "hi" # $ cfgdefines=b + +# Annotation without value: the AST records `c` as defined, +# and the new CFG now visits it via the AnnAssignStmt wrapper. +c: int # $ cfgdefines=c + +class K: # $ cfgdefines=K + field: int = 0 # $ cfgdefines=field + + diff --git a/python/ql/test/library-tests/ControlFlow/bindings/compound.py b/python/ql/test/library-tests/ControlFlow/bindings/compound.py new file mode 100644 index 00000000000..cb2f36f12ff --- /dev/null +++ b/python/ql/test/library-tests/ControlFlow/bindings/compound.py @@ -0,0 +1,14 @@ +# Compound (tuple/list) assignment targets — actually wired in the new CFG. + +a, b = (1, 2) # $ cfgdefines=a cfgdefines=b +[c, d] = [3, 4] # $ cfgdefines=c cfgdefines=d + +# Nested unpacking. +(e, (f, g)) = (1, (2, 3)) # $ cfgdefines=e cfgdefines=f cfgdefines=g + +# Star unpacking. +h, *i = [1, 2, 3] # $ cfgdefines=h cfgdefines=i + +# Chained assignment with compound target. +j = k, l = (5, 6) # $ cfgdefines=j cfgdefines=k cfgdefines=l + diff --git a/python/ql/test/library-tests/ControlFlow/bindings/comprehension.py b/python/ql/test/library-tests/ControlFlow/bindings/comprehension.py new file mode 100644 index 00000000000..6b5f722c1f7 --- /dev/null +++ b/python/ql/test/library-tests/ControlFlow/bindings/comprehension.py @@ -0,0 +1,21 @@ +# Comprehension and `for` loop targets — wired in the new CFG. +# Comprehensions are nested function scopes with a synthetic `.0` parameter +# bound to the iterable. + +# Bare-name `for` target. +for i in range(3): # $ cfgdefines=i + pass + +# Compound `for` target. +for k, v in [(1, 2)]: # $ cfgdefines=k cfgdefines=v + pass + +# Comprehension targets. +_ = [x for x in range(3)] # $ cfgdefines=_ cfgdefines=x cfgdefines=.0 +_ = {y: z for y, z in []} # $ cfgdefines=_ cfgdefines=y cfgdefines=z cfgdefines=.0 +_ = (a for a in []) # $ cfgdefines=_ cfgdefines=a cfgdefines=.0 + +# Nested comprehensions. +_ = [b for c in [] for b in c] # $ cfgdefines=_ cfgdefines=c cfgdefines=b cfgdefines=.0 + + diff --git a/python/ql/test/library-tests/ControlFlow/bindings/dead_under_no_raise.py b/python/ql/test/library-tests/ControlFlow/bindings/dead_under_no_raise.py new file mode 100644 index 00000000000..dbfb857b536 --- /dev/null +++ b/python/ql/test/library-tests/ControlFlow/bindings/dead_under_no_raise.py @@ -0,0 +1,52 @@ +# Dead bindings under the "no expressions raise" CFG abstraction. +# +# The new CFG does not currently model raise edges from arbitrary +# expressions. As a consequence, code that is only reachable through +# exception flow is (correctly) classified as dead and has no CFG node. +# Variable bindings in dead code do not need CFG nodes - SSA / dataflow +# over dead code is moot. +# +# These tests act as a regression guard: the bindings below intentionally +# have no `cfgdefines=` annotations. If raise modelling is later added, +# the BindingsTest infrastructure will surface the new CFG nodes as +# unexpected results, and this file will need to be revisited. + + +def f(obj): # $ cfgdefines=f cfgdefines=obj + try: + return len(obj) + except TypeError: + pass + + # The first try's body always returns; its except handler does not + # raise or otherwise transfer control, so under "no expressions + # raise" the only paths out of the try-statement are dead. Everything + # below is unreachable. + try: + hint = type(obj).__length_hint__ + except AttributeError: + return None + return hint + + +def g(): # $ cfgdefines=g + try: + raise Exception("inner") + except: + raise Exception("outer") + else: + # Unreachable: the inner try body always raises, so the `else:` + # clause never runs. + hit_inner_else = True + + +def h(cache, key): # $ cfgdefines=h cfgdefines=cache cfgdefines=key + try: + return cache[key] + except KeyError: + pass + + # Same pattern as `f`: dead under "no expressions raise". + value = compute(key) + cache[key] = value + return value diff --git a/python/ql/test/library-tests/ControlFlow/bindings/decorated.py b/python/ql/test/library-tests/ControlFlow/bindings/decorated.py new file mode 100644 index 00000000000..9b93c166ace --- /dev/null +++ b/python/ql/test/library-tests/ControlFlow/bindings/decorated.py @@ -0,0 +1,30 @@ +# Decorated `def`/`class` — wired in the new CFG. + + +def deco(f): # $ cfgdefines=deco cfgdefines=f + return f + + +@deco +def decorated_func(): # $ cfgdefines=decorated_func + pass + + +@deco +class DecoratedClass: # $ cfgdefines=DecoratedClass + pass + + +# Stacked decorators. +@deco +@deco +def doubly(): # $ cfgdefines=doubly + pass + + +# Inside a class body. +class Outer: # $ cfgdefines=Outer + @staticmethod + def inner(): # $ cfgdefines=inner + pass + diff --git a/python/ql/test/library-tests/ControlFlow/bindings/except_handler.py b/python/ql/test/library-tests/ControlFlow/bindings/except_handler.py new file mode 100644 index 00000000000..57b6c99fe9b --- /dev/null +++ b/python/ql/test/library-tests/ControlFlow/bindings/except_handler.py @@ -0,0 +1,19 @@ +# Exception-handler name bindings. These are already wired in the new +# CFG provided the try body can raise; `raise` statements are reliably +# treated as exception sources. + +try: + raise ValueError("oops") +except ValueError as e: # $ cfgdefines=e + pass + +try: + raise TypeError("oops") +except (TypeError, KeyError) as err: # $ cfgdefines=err + pass + +# Exception groups (Python 3.11+). +try: + raise ValueError("oops") +except* ValueError as eg: # $ cfgdefines=eg + pass diff --git a/python/ql/test/library-tests/ControlFlow/bindings/imports.py b/python/ql/test/library-tests/ControlFlow/bindings/imports.py new file mode 100644 index 00000000000..c8834b5332a --- /dev/null +++ b/python/ql/test/library-tests/ControlFlow/bindings/imports.py @@ -0,0 +1,14 @@ +# Import aliases — all bound names below are now reachable via the new +# CFG's `ImportStmt` wrapper. + +import os # $ cfgdefines=os +import os.path # $ cfgdefines=os +import os as o # $ cfgdefines=o +from os import path # $ cfgdefines=path +from os import path as p # $ cfgdefines=p +from os import sep, linesep # $ cfgdefines=sep cfgdefines=linesep +from os import ( + getcwd, # $ cfgdefines=getcwd + getcwdb, # $ cfgdefines=getcwdb +) + diff --git a/python/ql/test/library-tests/ControlFlow/bindings/match_pattern.py b/python/ql/test/library-tests/ControlFlow/bindings/match_pattern.py new file mode 100644 index 00000000000..0868a2680d0 --- /dev/null +++ b/python/ql/test/library-tests/ControlFlow/bindings/match_pattern.py @@ -0,0 +1,24 @@ +# Match-statement pattern bindings — wired in the new CFG. + +def f(subject): # $ cfgdefines=f cfgdefines=subject + match subject: + case x: # $ cfgdefines=x + pass + case [a, b]: # $ cfgdefines=a cfgdefines=b + pass + case {"k": v}: # $ cfgdefines=v + pass + case Point(p, q): # $ cfgdefines=p cfgdefines=q + pass + case [_, *rest]: # $ cfgdefines=rest + pass + case (1 | 2) as n: # $ cfgdefines=n + pass + + +class Point: # $ cfgdefines=Point + __match_args__ = ("x", "y") # $ cfgdefines=__match_args__ + x: int # $ cfgdefines=x + y: int # $ cfgdefines=y + + diff --git a/python/ql/test/library-tests/ControlFlow/bindings/parameters.py b/python/ql/test/library-tests/ControlFlow/bindings/parameters.py new file mode 100644 index 00000000000..7fe5e01e4c4 --- /dev/null +++ b/python/ql/test/library-tests/ControlFlow/bindings/parameters.py @@ -0,0 +1,42 @@ +# Function parameters. + +def positional(a, b): # $ cfgdefines=positional cfgdefines=a cfgdefines=b + pass + + +def with_default(x=1, y=2): # $ cfgdefines=with_default cfgdefines=x cfgdefines=y + pass + + +def with_vararg(*args): # $ cfgdefines=with_vararg cfgdefines=args + pass + + +def with_kwarg(**kwargs): # $ cfgdefines=with_kwarg cfgdefines=kwargs + pass + + +def with_kwonly(*, k1, k2=5): # $ cfgdefines=with_kwonly cfgdefines=k1 cfgdefines=k2 + pass + + +def kitchen_sink(a, b=2, *args, k1, k2=5, **kw): # $ cfgdefines=kitchen_sink cfgdefines=a cfgdefines=b cfgdefines=args cfgdefines=k1 cfgdefines=k2 cfgdefines=kw + pass + + +# Methods get `self` / `cls`. +class C: # $ cfgdefines=C + def method(self, x): # $ cfgdefines=method cfgdefines=self cfgdefines=x + pass + + @classmethod + def cmethod(cls, x): # $ cfgdefines=cmethod cfgdefines=cls cfgdefines=x + pass + + +# Lambda parameter. +_ = lambda p: p + 1 # $ cfgdefines=_ cfgdefines=p + +# PEP 570 positional-only. +def pos_only(a, b, /, c): # $ cfgdefines=pos_only cfgdefines=a cfgdefines=b cfgdefines=c + pass diff --git a/python/ql/test/library-tests/ControlFlow/bindings/simple.py b/python/ql/test/library-tests/ControlFlow/bindings/simple.py new file mode 100644 index 00000000000..51cb7d828c9 --- /dev/null +++ b/python/ql/test/library-tests/ControlFlow/bindings/simple.py @@ -0,0 +1,14 @@ +# Simple bindings that should already work in the new CFG. +# No MISSING annotations expected. + +x = 1 # $ cfgdefines=x +y = x + 1 # $ cfgdefines=y + +def f(): # $ cfgdefines=f + pass + +class C: # $ cfgdefines=C + pass + +# Re-assignment. +x = 2 # $ cfgdefines=x diff --git a/python/ql/test/library-tests/ControlFlow/bindings/type_params.py b/python/ql/test/library-tests/ControlFlow/bindings/type_params.py new file mode 100644 index 00000000000..2bd34dc3f0e --- /dev/null +++ b/python/ql/test/library-tests/ControlFlow/bindings/type_params.py @@ -0,0 +1,21 @@ +# PEP 695 type parameters (Python 3.12+). + +# PEP 695 type-param names on `def`/`class` bind in an annotation scope +# that nests the function/class body — they have no CFG node in the +# enclosing scope (matching the legacy CFG). +def func[T](x: T) -> T: # $ cfgdefines=func cfgdefines=x + return x + + +class Box[T]: # $ cfgdefines=Box + item: T # $ cfgdefines=item + + +# Multi-parameter, with bound and variadics. +def multi[T: int, *Ts, **P](x: T, *args: *Ts, **kwargs: P.kwargs) -> T: # $ cfgdefines=multi cfgdefines=x cfgdefines=args cfgdefines=kwargs + return x + + +# `type` statement (PEP 695). +type Alias[T] = list[T] # $ cfgdefines=Alias cfgdefines=T + diff --git a/python/ql/test/library-tests/ControlFlow/bindings/walrus_starred.py b/python/ql/test/library-tests/ControlFlow/bindings/walrus_starred.py new file mode 100644 index 00000000000..5c0c1bd8319 --- /dev/null +++ b/python/ql/test/library-tests/ControlFlow/bindings/walrus_starred.py @@ -0,0 +1,14 @@ +# Walrus and starred-target edge cases — wired in the new CFG. + +# Walrus in expression context. +if (y := 5) > 0: # $ cfgdefines=y + pass + +# Walrus in a comprehension. The comprehension introduces a synthetic +# `.0` parameter bound to the iterable. +_ = [w for _ in range(3) if (w := 1)] # $ cfgdefines=_ cfgdefines=w cfgdefines=.0 + +# Starred target in a Tuple LHS. +*head, tail = [1, 2, 3] # $ cfgdefines=head cfgdefines=tail + + diff --git a/python/ql/test/library-tests/ControlFlow/bindings/with_stmt.py b/python/ql/test/library-tests/ControlFlow/bindings/with_stmt.py new file mode 100644 index 00000000000..5fffe46c5d4 --- /dev/null +++ b/python/ql/test/library-tests/ControlFlow/bindings/with_stmt.py @@ -0,0 +1,21 @@ +# `with cm() as x:` bindings — wired in the new CFG. + +class CM: # $ cfgdefines=CM + def __enter__(self): return self # $ cfgdefines=__enter__ cfgdefines=self + def __exit__(self, *a): pass # $ cfgdefines=__exit__ cfgdefines=self cfgdefines=a + +with CM() as x: # $ cfgdefines=x + pass + +# Multiple items. +with CM() as a, CM() as b: # $ cfgdefines=a cfgdefines=b + pass + +# Parenthesised form (Python 3.10+). +with (CM() as p, CM() as q): # $ cfgdefines=p cfgdefines=q + pass + +# Compound target in `with`. +with CM() as (m, n): # $ cfgdefines=m cfgdefines=n + pass + diff --git a/python/ql/test/library-tests/ControlFlow/evaluation-order/AllLiveReachable.ql b/python/ql/test/library-tests/ControlFlow/evaluation-order/AllLiveReachable.ql index 886ccb4c348..de44daa3e2c 100644 --- a/python/ql/test/library-tests/ControlFlow/evaluation-order/AllLiveReachable.ql +++ b/python/ql/test/library-tests/ControlFlow/evaluation-order/AllLiveReachable.ql @@ -5,6 +5,8 @@ * have separate CFGs and are excluded from this check. */ +import python +import TimerUtils import OldCfgImpl private module Utils = EvalOrderCfgUtils; diff --git a/python/ql/test/library-tests/ControlFlow/evaluation-order/AnnotationHasCfgNode.ql b/python/ql/test/library-tests/ControlFlow/evaluation-order/AnnotationHasCfgNode.ql index 04c01abf8a6..5311d118576 100644 --- a/python/ql/test/library-tests/ControlFlow/evaluation-order/AnnotationHasCfgNode.ql +++ b/python/ql/test/library-tests/ControlFlow/evaluation-order/AnnotationHasCfgNode.ql @@ -2,6 +2,8 @@ * Checks that every timer annotation has a corresponding CFG node. */ +import python +import TimerUtils import OldCfgImpl private module Utils = EvalOrderCfgUtils; diff --git a/python/ql/test/library-tests/ControlFlow/evaluation-order/BasicBlockAnnotationGap.ql b/python/ql/test/library-tests/ControlFlow/evaluation-order/BasicBlockAnnotationGap.ql index 691144e06e4..0a2b08ff3fd 100644 --- a/python/ql/test/library-tests/ControlFlow/evaluation-order/BasicBlockAnnotationGap.ql +++ b/python/ql/test/library-tests/ControlFlow/evaluation-order/BasicBlockAnnotationGap.ql @@ -8,6 +8,8 @@ * edge leaves the basic block and the normal successor may be dead. */ +import python +import TimerUtils import OldCfgImpl private module Utils = EvalOrderCfgUtils; diff --git a/python/ql/test/library-tests/ControlFlow/evaluation-order/BasicBlockOrdering.expected b/python/ql/test/library-tests/ControlFlow/evaluation-order/BasicBlockOrdering.expected index 910fd3c8a80..c5ef1ba9394 100644 --- a/python/ql/test/library-tests/ControlFlow/evaluation-order/BasicBlockOrdering.expected +++ b/python/ql/test/library-tests/ControlFlow/evaluation-order/BasicBlockOrdering.expected @@ -1,7 +1,7 @@ | test_boolean.py:9:10:9:43 | ControlFlowNode for BoolExpr | Basic block ordering: $@ appears before $@ | test_boolean.py:9:59:9:59 | IntegerLiteral | timestamp 2 | test_boolean.py:9:19:9:19 | IntegerLiteral | timestamp 0 | | test_boolean.py:15:10:15:43 | ControlFlowNode for BoolExpr | Basic block ordering: $@ appears before $@ | test_boolean.py:15:50:15:50 | IntegerLiteral | timestamp 1 | test_boolean.py:15:20:15:20 | IntegerLiteral | timestamp 0 | | test_boolean.py:21:10:21:42 | ControlFlowNode for BoolExpr | Basic block ordering: $@ appears before $@ | test_boolean.py:21:49:21:49 | IntegerLiteral | timestamp 1 | test_boolean.py:21:19:21:19 | IntegerLiteral | timestamp 0 | -| test_boolean.py:27:10:27:34 | ControlFlowNode for BoolExpr | Basic block ordering: $@ appears before $@ | test_boolean.py:27:50:27:50 | IntegerLiteral | timestamp 2 | test_boolean.py:27:20:27:20 | IntegerLiteral | timestamp 0 | +| test_boolean.py:27:10:27:43 | ControlFlowNode for BoolExpr | Basic block ordering: $@ appears before $@ | test_boolean.py:27:59:27:59 | IntegerLiteral | timestamp 2 | test_boolean.py:27:20:27:20 | IntegerLiteral | timestamp 0 | | test_boolean.py:40:10:40:61 | ControlFlowNode for BoolExpr | Basic block ordering: $@ appears before $@ | test_boolean.py:40:86:40:86 | IntegerLiteral | timestamp 3 | test_boolean.py:40:16:40:16 | IntegerLiteral | timestamp 0 | | test_boolean.py:46:10:46:61 | ControlFlowNode for BoolExpr | Basic block ordering: $@ appears before $@ | test_boolean.py:46:86:46:86 | IntegerLiteral | timestamp 3 | test_boolean.py:46:16:46:16 | IntegerLiteral | timestamp 0 | | test_boolean.py:52:10:52:95 | ControlFlowNode for BoolExpr | Basic block ordering: $@ appears before $@ | test_boolean.py:52:120:52:120 | IntegerLiteral | timestamp 4 | test_boolean.py:52:20:52:20 | IntegerLiteral | timestamp 0 | diff --git a/python/ql/test/library-tests/ControlFlow/evaluation-order/BasicBlockOrdering.ql b/python/ql/test/library-tests/ControlFlow/evaluation-order/BasicBlockOrdering.ql index 6c08d44a5a5..30697f1403e 100644 --- a/python/ql/test/library-tests/ControlFlow/evaluation-order/BasicBlockOrdering.ql +++ b/python/ql/test/library-tests/ControlFlow/evaluation-order/BasicBlockOrdering.ql @@ -3,6 +3,8 @@ * increasing minimum-timestamp order. */ +import python +import TimerUtils import OldCfgImpl private module Utils = EvalOrderCfgUtils; diff --git a/python/ql/test/library-tests/ControlFlow/evaluation-order/ConsecutiveTimestamps.ql b/python/ql/test/library-tests/ControlFlow/evaluation-order/ConsecutiveTimestamps.ql index 01ff59b49bf..709fd5665ea 100644 --- a/python/ql/test/library-tests/ControlFlow/evaluation-order/ConsecutiveTimestamps.ql +++ b/python/ql/test/library-tests/ControlFlow/evaluation-order/ConsecutiveTimestamps.ql @@ -11,6 +11,8 @@ * lambdas that have annotations in nested scopes). */ +import python +import TimerUtils import OldCfgImpl private module Utils = EvalOrderCfgUtils; diff --git a/python/ql/test/library-tests/ControlFlow/evaluation-order/ContiguousTimestamps.ql b/python/ql/test/library-tests/ControlFlow/evaluation-order/ContiguousTimestamps.ql index f18c52750b5..456ebf447da 100644 --- a/python/ql/test/library-tests/ControlFlow/evaluation-order/ContiguousTimestamps.ql +++ b/python/ql/test/library-tests/ControlFlow/evaluation-order/ContiguousTimestamps.ql @@ -4,6 +4,7 @@ * in at least one annotation (live or dead). */ +import python import TimerUtils from TestFunction f, int missing, int maxTs, TimerAnnotation maxAnn diff --git a/python/ql/test/library-tests/ControlFlow/evaluation-order/NeverReachable.ql b/python/ql/test/library-tests/ControlFlow/evaluation-order/NeverReachable.ql index 9fbb9115814..b09a936a0a4 100644 --- a/python/ql/test/library-tests/ControlFlow/evaluation-order/NeverReachable.ql +++ b/python/ql/test/library-tests/ControlFlow/evaluation-order/NeverReachable.ql @@ -4,6 +4,8 @@ * entry (including within the same basic block). */ +import python +import TimerUtils import OldCfgImpl private module Utils = EvalOrderCfgUtils; diff --git a/python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgAllLiveReachable.expected b/python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgAllLiveReachable.expected new file mode 100644 index 00000000000..e69de29bb2d diff --git a/python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgAllLiveReachable.ql b/python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgAllLiveReachable.ql new file mode 100644 index 00000000000..75f02d14a9c --- /dev/null +++ b/python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgAllLiveReachable.ql @@ -0,0 +1,14 @@ +/** New-CFG version of AllLiveReachable. */ + +import python +import TimerUtils +import NewCfgImpl + +private module Utils = EvalOrderCfgUtils; + +private import Utils +private import Utils::CfgTests + +from TimerCfgNode a, TestFunction f +where allLiveReachable(a, f) +select a, "Unreachable live annotation; entry of $@ does not reach this node", f, f.getName() diff --git a/python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgAnnotationHasCfgNode.expected b/python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgAnnotationHasCfgNode.expected new file mode 100644 index 00000000000..8b137891791 --- /dev/null +++ b/python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgAnnotationHasCfgNode.expected @@ -0,0 +1 @@ + diff --git a/python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgAnnotationHasCfgNode.ql b/python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgAnnotationHasCfgNode.ql new file mode 100644 index 00000000000..4b1d82e27e6 --- /dev/null +++ b/python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgAnnotationHasCfgNode.ql @@ -0,0 +1,18 @@ +/** + * New-CFG version of AnnotationHasCfgNode. + * + * Checks that every timer annotation has a corresponding CFG node. + */ + +import python +import TimerUtils +import NewCfgImpl + +private module Utils = EvalOrderCfgUtils; + +private import Utils::CfgTests + +from TimerAnnotation ann +where annotationWithoutCfgNode(ann) +select ann, "Annotation in $@ has no CFG node", ann.getTestFunction(), + ann.getTestFunction().getName() diff --git a/python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgBasicBlockAnnotationGap.expected b/python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgBasicBlockAnnotationGap.expected new file mode 100644 index 00000000000..e69de29bb2d diff --git a/python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgBasicBlockAnnotationGap.ql b/python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgBasicBlockAnnotationGap.ql new file mode 100644 index 00000000000..80dd759a365 --- /dev/null +++ b/python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgBasicBlockAnnotationGap.ql @@ -0,0 +1,26 @@ +/** + * New-CFG version of BasicBlockAnnotationGap. + * + * Original: + * Checks that within a basic block, if a node is annotated then its + * successor is also annotated (or excluded). A gap in annotations + * within a basic block indicates a missing annotation, since there + * are no branches to justify the gap. + * + * Nodes with exceptional successors are excluded, as the exception + * edge leaves the basic block and the normal successor may be dead. + */ + +import python +import TimerUtils +import NewCfgImpl + +private module Utils = EvalOrderCfgUtils; + +private import Utils +private import Utils::CfgTests + +from TimerCfgNode a, CfgNode succ +where basicBlockAnnotationGap(a, succ) +select a, "Annotated node followed by unannotated $@ in the same basic block", succ, + succ.getNode().toString() diff --git a/python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgBasicBlockOrdering.expected b/python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgBasicBlockOrdering.expected new file mode 100644 index 00000000000..e69de29bb2d diff --git a/python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgBasicBlockOrdering.ql b/python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgBasicBlockOrdering.ql new file mode 100644 index 00000000000..f06d08d937e --- /dev/null +++ b/python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgBasicBlockOrdering.ql @@ -0,0 +1,21 @@ +/** + * New-CFG version of BasicBlockOrdering. + * + * Original: + * Checks that within a single basic block, annotations appear in + * increasing minimum-timestamp order. + */ + +import python +import TimerUtils +import NewCfgImpl + +private module Utils = EvalOrderCfgUtils; + +private import Utils +private import Utils::CfgTests + +from TimerCfgNode a, TimerCfgNode b, int minA, int minB +where basicBlockOrdering(a, b, minA, minB) +select a, "Basic block ordering: $@ appears before $@", a.getTimestampExpr(minA), + "timestamp " + minA, b.getTimestampExpr(minB), "timestamp " + minB diff --git a/python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgBranchTimestamps.expected b/python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgBranchTimestamps.expected new file mode 100644 index 00000000000..e69de29bb2d diff --git a/python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgBranchTimestamps.ql b/python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgBranchTimestamps.ql new file mode 100644 index 00000000000..cfd8ffb4e4b --- /dev/null +++ b/python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgBranchTimestamps.ql @@ -0,0 +1,80 @@ +/** + * New-CFG version of BranchTimestamps. + * + * Checks that when a node has both a true and false successor, the + * live timestamps on one branch are marked as dead on the other. + * This ensures that boolean branches are fully annotated with dead() + * markers for the paths not taken. + * + * Limitation: the `@ t[ts, ...]` / `dead(ts)` annotation scheme can only + * model branch-dead-ness for plain boolean control flow that reconverges + * linearly after the split — i.e. `if`-with-else and `if`-expression. + * It cannot model: + * + * * loops (`while` / `for`): body timestamps repeat across iterations, + * so the loop-exit annotation can't list them as dead; + * * `match` statements: each `case` body is a syntactically distinct + * sub-tree, and the branches don't reconverge through a common + * annotation point in the timeline; + * * `try` / `with` and `raise` / `assert`: exception edges are modelled + * as true/false but flow to syntactically distinct handlers, with no + * reconvergence in the linear annotation order; + * * short-circuit `and` / `or` (`BoolExpr`): the branches reconverge at + * the BoolExpr's after-node, so timestamps on one branch are live + * downstream of the other rather than dead; + * * `if` without an `else` clause, and `if`/`elif` chains: the false + * branch reconverges with the true branch at the post-if statement + * (no-else) or fans out across multiple elif-test annotations, + * neither of which fit the binary annotation scheme. + * + * Branch nodes inside those constructs are therefore whitelisted out + * below. The check still fires (and is useful) for plain `if`/`else` + * and conditional-expression branching. + */ + +import python +import TimerUtils +import NewCfgImpl + +private module Utils = EvalOrderCfgUtils; + +private import Utils +private import Utils::CfgTests + +/** + * Holds if `f` contains a construct whose branches the linear-timestamp + * annotation scheme cannot describe (see file-level comment). + */ +private predicate hasUnmodellableBranching(Function f) { + exists(AstNode bad | + bad.getScope() = f and + ( + bad instanceof While + or + bad instanceof For + or + bad instanceof MatchStmt + or + bad instanceof Try + or + bad instanceof With + or + bad instanceof Raise + or + bad instanceof Assert + or + bad instanceof BoolExpr + or + bad instanceof If and + (not exists(bad.(If).getAnOrelse()) or bad.(If).isElif()) + ) + ) +} + +from TimerCfgNode node, int ts, string branch +where + missingBranchTimestamp(node, ts, branch) and + not hasUnmodellableBranching(node.getTestFunction()) +select node, + "Timestamp " + ts + " on true/false branch is missing a dead() annotation on the " + branch + + " successor in $@", node.getTestFunction(), node.getTestFunction().getName() diff --git a/python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgConsecutivePredecessorTimestamps.expected b/python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgConsecutivePredecessorTimestamps.expected new file mode 100644 index 00000000000..8b137891791 --- /dev/null +++ b/python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgConsecutivePredecessorTimestamps.expected @@ -0,0 +1 @@ + diff --git a/python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgConsecutivePredecessorTimestamps.ql b/python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgConsecutivePredecessorTimestamps.ql new file mode 100644 index 00000000000..3feacae264e --- /dev/null +++ b/python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgConsecutivePredecessorTimestamps.ql @@ -0,0 +1,22 @@ +/** + * New-CFG version of ConsecutivePredecessorTimestamps. + * + * Checks that each annotated node (except the minimum timestamp) has + * a predecessor annotation with timestamp `a - 1`. This is the reverse + * of ConsecutiveTimestamps: it catches nodes that are reachable but + * arrived at from the wrong place (skipping an intermediate node). + */ + +import python +import TimerUtils +import NewCfgImpl + +private module Utils = EvalOrderCfgUtils; + +private import Utils +private import Utils::CfgTests + +from TimerAnnotation ann, int a +where consecutivePredecessorTimestamps(ann, a) +select ann, "$@ in $@ has no consecutive predecessor (expected " + (a - 1) + ")", + ann.getTimestampExpr(a), "Timestamp " + a, ann.getTestFunction(), ann.getTestFunction().getName() diff --git a/python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgConsecutiveTimestamps.expected b/python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgConsecutiveTimestamps.expected new file mode 100644 index 00000000000..e69de29bb2d diff --git a/python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgConsecutiveTimestamps.ql b/python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgConsecutiveTimestamps.ql new file mode 100644 index 00000000000..8e52663d6ea --- /dev/null +++ b/python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgConsecutiveTimestamps.ql @@ -0,0 +1,29 @@ +/** + * New-CFG version of ConsecutiveTimestamps. + * + * Original: + * Checks that consecutive annotated nodes have consecutive timestamps: + * for each annotation with timestamp `a`, some CFG node for that annotation + * must have a next annotation containing `a + 1`. + * + * Handles CFG splitting (e.g., finally blocks duplicated for normal/exceptional + * flow) by checking that at least one split has the required successor. + * + * Only applies to functions where all annotations are in the function's + * own scope (excludes tests with generators, async, comprehensions, or + * lambdas that have annotations in nested scopes). + */ + +import python +import TimerUtils +import NewCfgImpl + +private module Utils = EvalOrderCfgUtils; + +private import Utils +private import Utils::CfgTests + +from TimerAnnotation ann, int a +where consecutiveTimestamps(ann, a) +select ann, "$@ in $@ has no consecutive successor (expected " + (a + 1) + ")", + ann.getTimestampExpr(a), "Timestamp " + a, ann.getTestFunction(), ann.getTestFunction().getName() diff --git a/python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgImpl.qll b/python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgImpl.qll new file mode 100644 index 00000000000..1da80d2ee0d --- /dev/null +++ b/python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgImpl.qll @@ -0,0 +1,101 @@ +/** + * Implementation of the evaluation-order CFG signature using the new + * shared control flow graph from AstNodeImpl. + */ + +private import python as Py +import TimerUtils +private import semmle.python.controlflow.internal.AstNodeImpl as CfgImpl +private import codeql.controlflow.SuccessorType + +private class NewControlFlowNode = CfgImpl::ControlFlowNode; + +private class NewBasicBlock = CfgImpl::BasicBlock; + +/** New (shared) CFG implementation of the evaluation-order signature. */ +module NewCfg implements EvalOrderCfgSig { + class CfgNode instanceof NewControlFlowNode { + // Use the post-order representative for each AST node: the "after" node. + // For simple leaf nodes this is the merged before/after node. For + // post-order expressions this is the TAstNode. For pre-order expressions + // (and/or/not/ternary) this uses an AfterValueNode, which places the + // expression after its operands — matching the timer test expectations. + CfgNode() { NewControlFlowNode.super.isAfter(_) } + + string toString() { result = NewControlFlowNode.super.toString() } + + Py::Location getLocation() { result = NewControlFlowNode.super.getLocation() } + + Py::AstNode getNode() { + result = CfgImpl::astNodeToPyNode(NewControlFlowNode.super.getAstNode()) + } + + CfgNode getASuccessor() { nextCfgNode(this, result) } + + CfgNode getATrueSuccessor() { + NewControlFlowNode.super.isAfterTrue(_) and + // Only where there's also a false branch (true boolean split) + exists(NewControlFlowNode other | other.isAfterFalse(NewControlFlowNode.super.getAstNode())) and + nextCfgNodeFrom(this, result) + } + + CfgNode getAFalseSuccessor() { + NewControlFlowNode.super.isAfterFalse(_) and + // Only where there's also a true branch (true boolean split) + exists(NewControlFlowNode other | other.isAfterTrue(NewControlFlowNode.super.getAstNode())) and + nextCfgNodeFrom(this, result) + } + + CfgNode getAnExceptionalSuccessor() { + exists(NewControlFlowNode mid | + mid = NewControlFlowNode.super.getAnExceptionSuccessor() and + nextCfgNodeFrom(mid, result) + ) + } + + Py::Scope getScope() { result = NewControlFlowNode.super.getEnclosingCallable().asScope() } + + BasicBlock getBasicBlock() { + exists(NewBasicBlock bb, int i | bb.getNode(i) = this and result = bb) + } + } + + /** + * Holds if `next` is the nearest CfgNode reachable from `n` via + * one or more raw CFG successor edges, skipping non-CfgNode intermediaries. + */ + private predicate nextCfgNodeFrom(NewControlFlowNode n, CfgNode next) { + next = n.getASuccessor() + or + exists(NewControlFlowNode mid | + mid = n.getASuccessor() and + not mid instanceof CfgNode and + nextCfgNodeFrom(mid, next) + ) + } + + /** + * Holds if `next` is the nearest CfgNode successor of `n`, + * skipping synthetic intermediate nodes. + */ + private predicate nextCfgNode(CfgNode n, CfgNode next) { nextCfgNodeFrom(n, next) } + + class BasicBlock instanceof NewBasicBlock { + string toString() { result = NewBasicBlock.super.toString() } + + CfgNode getNode(int n) { result = NewBasicBlock.super.getNode(n) } + + predicate reaches(BasicBlock bb) { this = bb or this.strictlyReaches(bb) } + + predicate strictlyReaches(BasicBlock bb) { NewBasicBlock.super.getASuccessor+() = bb } + + predicate strictlyDominates(BasicBlock bb) { NewBasicBlock.super.strictlyDominates(bb) } + } + + CfgNode scopeGetEntryNode(Py::Scope s) { + exists(CfgImpl::ControlFlow::EntryNode entry | + entry.getEnclosingCallable().asScope() = s and + nextCfgNodeFrom(entry, result) + ) + } +} diff --git a/python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgNeverReachable.expected b/python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgNeverReachable.expected new file mode 100644 index 00000000000..e69de29bb2d diff --git a/python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgNeverReachable.ql b/python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgNeverReachable.ql new file mode 100644 index 00000000000..6949b2cc6e9 --- /dev/null +++ b/python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgNeverReachable.ql @@ -0,0 +1,21 @@ +/** + * New-CFG version of NeverReachable. + * + * Original: + * Checks that expressions annotated with `t.never` either have no CFG + * node, or if they do, that the node is not reachable from its scope's + * entry (including within the same basic block). + */ + +import python +import TimerUtils +import NewCfgImpl + +private module Utils = EvalOrderCfgUtils; + +private import Utils::CfgTests + +from TimerAnnotation ann +where neverReachable(ann) +select ann, "Node annotated with t.never is reachable in $@", ann.getTestFunction(), + ann.getTestFunction().getName() diff --git a/python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgNoBackwardFlow.expected b/python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgNoBackwardFlow.expected new file mode 100644 index 00000000000..e69de29bb2d diff --git a/python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgNoBackwardFlow.ql b/python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgNoBackwardFlow.ql new file mode 100644 index 00000000000..442ca5f5456 --- /dev/null +++ b/python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgNoBackwardFlow.ql @@ -0,0 +1,22 @@ +/** + * New-CFG version of NoBackwardFlow. + * + * Original: + * Checks that time never flows backward between consecutive timer annotations + * in the CFG. For each pair of consecutive annotated nodes (A -> B), there must + * exist timestamps a in A and b in B with a < b. + */ + +import python +import TimerUtils +import NewCfgImpl + +private module Utils = EvalOrderCfgUtils; + +private import Utils +private import Utils::CfgTests + +from TimerCfgNode a, TimerCfgNode b, int minA, int maxB +where noBackwardFlow(a, b, minA, maxB) +select a, "Backward flow: $@ flows to $@ (max timestamp $@)", a.getTimestampExpr(minA), + minA.toString(), b, b.getNode().toString(), b.getTimestampExpr(maxB), maxB.toString() diff --git a/python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgNoBasicBlock.expected b/python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgNoBasicBlock.expected new file mode 100644 index 00000000000..8b137891791 --- /dev/null +++ b/python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgNoBasicBlock.expected @@ -0,0 +1 @@ + diff --git a/python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgNoBasicBlock.ql b/python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgNoBasicBlock.ql new file mode 100644 index 00000000000..e07890f7250 --- /dev/null +++ b/python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgNoBasicBlock.ql @@ -0,0 +1,18 @@ +/** + * New-CFG version of NoBasicBlock. + * + * Checks that every annotated CFG node belongs to a basic block. + */ + +import python +import TimerUtils +import NewCfgImpl + +private module Utils = EvalOrderCfgUtils; + +private import Utils +private import Utils::CfgTests + +from CfgNode n, TestFunction f +where noBasicBlock(n, f) +select n, "CFG node in $@ does not belong to any basic block", f, f.getName() diff --git a/python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgNoSharedReachable.expected b/python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgNoSharedReachable.expected new file mode 100644 index 00000000000..e69de29bb2d diff --git a/python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgNoSharedReachable.ql b/python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgNoSharedReachable.ql new file mode 100644 index 00000000000..5a1a1aba2a7 --- /dev/null +++ b/python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgNoSharedReachable.ql @@ -0,0 +1,21 @@ +/** + * New-CFG version of NoSharedReachable. + * + * Original: + * Checks that two annotations sharing a timestamp value are on + * mutually exclusive CFG paths (neither can reach the other). + */ + +import python +import TimerUtils +import NewCfgImpl + +private module Utils = EvalOrderCfgUtils; + +private import Utils +private import Utils::CfgTests + +from TimerCfgNode a, TimerCfgNode b, int ts +where noSharedReachable(a, b, ts) +select a, "Shared timestamp $@ but this node reaches $@", a.getTimestampExpr(ts), ts.toString(), b, + b.getNode().toString() diff --git a/python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgStrictForward.expected b/python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgStrictForward.expected new file mode 100644 index 00000000000..e69de29bb2d diff --git a/python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgStrictForward.ql b/python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgStrictForward.ql new file mode 100644 index 00000000000..ebbc60346db --- /dev/null +++ b/python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgStrictForward.ql @@ -0,0 +1,22 @@ +/** + * New-CFG version of StrictForward. + * + * Original: + * Stronger version of NoBackwardFlow: for consecutive annotated nodes + * A -> B that both have a single timestamp (non-loop code) and B does + * NOT dominate A (forward edge), requires max(A) < min(B). + */ + +import python +import TimerUtils +import NewCfgImpl + +private module Utils = EvalOrderCfgUtils; + +private import Utils +private import Utils::CfgTests + +from TimerCfgNode a, TimerCfgNode b, int maxA, int minB +where strictForward(a, b, maxA, minB) +select a, "Strict forward violation: $@ flows to $@", a.getTimestampExpr(maxA), "timestamp " + maxA, + b.getTimestampExpr(minB), "timestamp " + minB diff --git a/python/ql/test/library-tests/ControlFlow/evaluation-order/NoBackwardFlow.expected b/python/ql/test/library-tests/ControlFlow/evaluation-order/NoBackwardFlow.expected index 6e8ea12c9dd..775cc7bdbbf 100644 --- a/python/ql/test/library-tests/ControlFlow/evaluation-order/NoBackwardFlow.expected +++ b/python/ql/test/library-tests/ControlFlow/evaluation-order/NoBackwardFlow.expected @@ -1,7 +1,7 @@ | test_boolean.py:9:10:9:43 | ControlFlowNode for BoolExpr | Backward flow: $@ flows to $@ (max timestamp $@) | test_boolean.py:9:59:9:59 | IntegerLiteral | 2 | test_boolean.py:9:10:9:13 | ControlFlowNode for True | True | test_boolean.py:9:19:9:19 | IntegerLiteral | 0 | | test_boolean.py:15:10:15:43 | ControlFlowNode for BoolExpr | Backward flow: $@ flows to $@ (max timestamp $@) | test_boolean.py:15:50:15:50 | IntegerLiteral | 1 | test_boolean.py:15:10:15:14 | ControlFlowNode for False | False | test_boolean.py:15:20:15:20 | IntegerLiteral | 0 | | test_boolean.py:21:10:21:42 | ControlFlowNode for BoolExpr | Backward flow: $@ flows to $@ (max timestamp $@) | test_boolean.py:21:49:21:49 | IntegerLiteral | 1 | test_boolean.py:21:10:21:13 | ControlFlowNode for True | True | test_boolean.py:21:19:21:19 | IntegerLiteral | 0 | -| test_boolean.py:27:10:27:34 | ControlFlowNode for BoolExpr | Backward flow: $@ flows to $@ (max timestamp $@) | test_boolean.py:27:50:27:50 | IntegerLiteral | 2 | test_boolean.py:27:10:27:14 | ControlFlowNode for False | False | test_boolean.py:27:20:27:20 | IntegerLiteral | 0 | +| test_boolean.py:27:10:27:43 | ControlFlowNode for BoolExpr | Backward flow: $@ flows to $@ (max timestamp $@) | test_boolean.py:27:59:27:59 | IntegerLiteral | 2 | test_boolean.py:27:10:27:14 | ControlFlowNode for False | False | test_boolean.py:27:20:27:20 | IntegerLiteral | 0 | | test_boolean.py:40:10:40:61 | ControlFlowNode for BoolExpr | Backward flow: $@ flows to $@ (max timestamp $@) | test_boolean.py:40:86:40:86 | IntegerLiteral | 3 | test_boolean.py:40:10:40:10 | ControlFlowNode for IntegerLiteral | IntegerLiteral | test_boolean.py:40:16:40:16 | IntegerLiteral | 0 | | test_boolean.py:46:10:46:61 | ControlFlowNode for BoolExpr | Backward flow: $@ flows to $@ (max timestamp $@) | test_boolean.py:46:86:46:86 | IntegerLiteral | 3 | test_boolean.py:46:10:46:10 | ControlFlowNode for IntegerLiteral | IntegerLiteral | test_boolean.py:46:16:46:16 | IntegerLiteral | 0 | | test_boolean.py:52:10:52:95 | ControlFlowNode for BoolExpr | Backward flow: $@ flows to $@ (max timestamp $@) | test_boolean.py:52:120:52:120 | IntegerLiteral | 4 | test_boolean.py:52:11:52:47 | ControlFlowNode for BoolExpr | BoolExpr | test_boolean.py:52:63:52:63 | IntegerLiteral | 2 | diff --git a/python/ql/test/library-tests/ControlFlow/evaluation-order/NoBackwardFlow.ql b/python/ql/test/library-tests/ControlFlow/evaluation-order/NoBackwardFlow.ql index e9926284295..4acf45db3cd 100644 --- a/python/ql/test/library-tests/ControlFlow/evaluation-order/NoBackwardFlow.ql +++ b/python/ql/test/library-tests/ControlFlow/evaluation-order/NoBackwardFlow.ql @@ -4,6 +4,8 @@ * exist timestamps a in A and b in B with a < b. */ +import python +import TimerUtils import OldCfgImpl private module Utils = EvalOrderCfgUtils; diff --git a/python/ql/test/library-tests/ControlFlow/evaluation-order/NoBasicBlock.ql b/python/ql/test/library-tests/ControlFlow/evaluation-order/NoBasicBlock.ql index 82d9589a975..5568bd2a9a4 100644 --- a/python/ql/test/library-tests/ControlFlow/evaluation-order/NoBasicBlock.ql +++ b/python/ql/test/library-tests/ControlFlow/evaluation-order/NoBasicBlock.ql @@ -2,6 +2,8 @@ * Checks that every annotated CFG node belongs to a basic block. */ +import python +import TimerUtils import OldCfgImpl private module Utils = EvalOrderCfgUtils; diff --git a/python/ql/test/library-tests/ControlFlow/evaluation-order/NoSharedReachable.ql b/python/ql/test/library-tests/ControlFlow/evaluation-order/NoSharedReachable.ql index e9f685e8ffa..1fcceb2aca9 100644 --- a/python/ql/test/library-tests/ControlFlow/evaluation-order/NoSharedReachable.ql +++ b/python/ql/test/library-tests/ControlFlow/evaluation-order/NoSharedReachable.ql @@ -3,6 +3,8 @@ * mutually exclusive CFG paths (neither can reach the other). */ +import python +import TimerUtils import OldCfgImpl private module Utils = EvalOrderCfgUtils; diff --git a/python/ql/test/library-tests/ControlFlow/evaluation-order/OldCfgImpl.qll b/python/ql/test/library-tests/ControlFlow/evaluation-order/OldCfgImpl.qll index cb7bbb495b8..fc52c8dd3ed 100644 --- a/python/ql/test/library-tests/ControlFlow/evaluation-order/OldCfgImpl.qll +++ b/python/ql/test/library-tests/ControlFlow/evaluation-order/OldCfgImpl.qll @@ -3,14 +3,14 @@ * Python control flow graph. */ -private import python as PY +private import python as Py import TimerUtils /** Existing Python CFG implementation of the evaluation-order signature. */ module OldCfg implements EvalOrderCfgSig { - class CfgNode = PY::ControlFlowNode; + class CfgNode = Py::ControlFlowNode; - class BasicBlock = PY::BasicBlock; + class BasicBlock = Py::BasicBlock; - CfgNode scopeGetEntryNode(PY::Scope s) { result = s.getEntryNode() } + CfgNode scopeGetEntryNode(Py::Scope s) { result = s.getEntryNode() } } diff --git a/python/ql/test/library-tests/ControlFlow/evaluation-order/StrictForward.expected b/python/ql/test/library-tests/ControlFlow/evaluation-order/StrictForward.expected index 6562ff9f7b2..34e050b0f8a 100644 --- a/python/ql/test/library-tests/ControlFlow/evaluation-order/StrictForward.expected +++ b/python/ql/test/library-tests/ControlFlow/evaluation-order/StrictForward.expected @@ -1,7 +1,7 @@ | test_boolean.py:9:10:9:43 | ControlFlowNode for BoolExpr | Strict forward violation: $@ flows to $@ | test_boolean.py:9:59:9:59 | IntegerLiteral | timestamp 2 | test_boolean.py:9:19:9:19 | IntegerLiteral | timestamp 0 | | test_boolean.py:15:10:15:43 | ControlFlowNode for BoolExpr | Strict forward violation: $@ flows to $@ | test_boolean.py:15:50:15:50 | IntegerLiteral | timestamp 1 | test_boolean.py:15:20:15:20 | IntegerLiteral | timestamp 0 | | test_boolean.py:21:10:21:42 | ControlFlowNode for BoolExpr | Strict forward violation: $@ flows to $@ | test_boolean.py:21:49:21:49 | IntegerLiteral | timestamp 1 | test_boolean.py:21:19:21:19 | IntegerLiteral | timestamp 0 | -| test_boolean.py:27:10:27:34 | ControlFlowNode for BoolExpr | Strict forward violation: $@ flows to $@ | test_boolean.py:27:50:27:50 | IntegerLiteral | timestamp 2 | test_boolean.py:27:20:27:20 | IntegerLiteral | timestamp 0 | +| test_boolean.py:27:10:27:43 | ControlFlowNode for BoolExpr | Strict forward violation: $@ flows to $@ | test_boolean.py:27:59:27:59 | IntegerLiteral | timestamp 2 | test_boolean.py:27:20:27:20 | IntegerLiteral | timestamp 0 | | test_boolean.py:40:10:40:61 | ControlFlowNode for BoolExpr | Strict forward violation: $@ flows to $@ | test_boolean.py:40:86:40:86 | IntegerLiteral | timestamp 3 | test_boolean.py:40:16:40:16 | IntegerLiteral | timestamp 0 | | test_boolean.py:46:10:46:61 | ControlFlowNode for BoolExpr | Strict forward violation: $@ flows to $@ | test_boolean.py:46:86:46:86 | IntegerLiteral | timestamp 3 | test_boolean.py:46:16:46:16 | IntegerLiteral | timestamp 0 | | test_boolean.py:52:10:52:95 | ControlFlowNode for BoolExpr | Strict forward violation: $@ flows to $@ | test_boolean.py:52:120:52:120 | IntegerLiteral | timestamp 4 | test_boolean.py:52:63:52:63 | IntegerLiteral | timestamp 2 | diff --git a/python/ql/test/library-tests/ControlFlow/evaluation-order/StrictForward.ql b/python/ql/test/library-tests/ControlFlow/evaluation-order/StrictForward.ql index 79b383a4acf..9e64770bab4 100644 --- a/python/ql/test/library-tests/ControlFlow/evaluation-order/StrictForward.ql +++ b/python/ql/test/library-tests/ControlFlow/evaluation-order/StrictForward.ql @@ -4,6 +4,8 @@ * NOT dominate A (forward edge), requires max(A) < min(B). */ +import python +import TimerUtils import OldCfgImpl private module Utils = EvalOrderCfgUtils; diff --git a/python/ql/test/library-tests/ControlFlow/evaluation-order/test_boolean.py b/python/ql/test/library-tests/ControlFlow/evaluation-order/test_boolean.py index a12975634f4..a3b2268a831 100644 --- a/python/ql/test/library-tests/ControlFlow/evaluation-order/test_boolean.py +++ b/python/ql/test/library-tests/ControlFlow/evaluation-order/test_boolean.py @@ -24,7 +24,7 @@ def test_or_short_circuit(t): @test def test_or_both_sides(t): # False or X — both operands evaluated, result is X - x = (False @ t[0] or 42 @ t[1]) @ t[dead(1), 2] + x = (False @ t[0] or 42 @ t[1, dead(2)]) @ t[dead(1), 2] @test diff --git a/python/ql/test/library-tests/ControlFlow/evaluation-order/test_if.py b/python/ql/test/library-tests/ControlFlow/evaluation-order/test_if.py index 8880aaaef34..a6eb6c7d5ca 100644 --- a/python/ql/test/library-tests/ControlFlow/evaluation-order/test_if.py +++ b/python/ql/test/library-tests/ControlFlow/evaluation-order/test_if.py @@ -85,7 +85,7 @@ def test_nested_if_else(t): else: z = 2 @ t[dead(4)] else: - z = 3 @ t[dead(4)] + z = 3 @ t[dead(3), dead(4)] w = 0 @ t[5]