From 89180671c8b2ffe0adc7836db963df1361d91d7e Mon Sep 17 00:00:00 2001 From: Copilot <223556219+Copilot@users.noreply.github.com> Date: Tue, 12 May 2026 12:35:47 +0000 Subject: [PATCH] Python: wire import-statement bindings into the shared CFG (green) Adds `ImportStmt` and `ImportStarStmt` wrappers in `AstNodeImpl.qll`. For each `Alias` in an import statement, both the value (module/member expression) and the bound `asname` Name become children of the CFG node for the import statement, in evaluation order. Without this, every `Name` introduced by `import` / `from .. import ..` lacked a CFG node, even though `Name.defines(v)` returns true for it on the AST side. This was the highest-volume gap: 20,332 missing import aliases across CPython. Removes the corresponding MISSING: annotations from imports.py. Verified: all 24 ControlFlow/evaluation-order tests still pass. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../controlflow/internal/AstNodeImpl.qll | 48 +++++++++++++++++++ .../ControlFlow/bindings/imports.py | 20 ++++---- 2 files changed, 59 insertions(+), 9 deletions(-) diff --git a/python/ql/lib/semmle/python/controlflow/internal/AstNodeImpl.qll b/python/ql/lib/semmle/python/controlflow/internal/AstNodeImpl.qll index cc014440291..9797f727e47 100644 --- a/python/ql/lib/semmle/python/controlflow/internal/AstNodeImpl.qll +++ b/python/ql/lib/semmle/python/controlflow/internal/AstNodeImpl.qll @@ -512,6 +512,54 @@ module Ast implements AstSig { } } + /** + * An `import` statement (`import a, b` or `from m import a, b`). + * + * Each alias contributes two children in evaluation order: first the + * value expression (which performs the import side-effect), then the + * bound `asname` Name (the in-scope binding). This makes both reachable + * from the CFG and allows `Name.defines(v)` for `asname` Names to have + * corresponding CFG nodes — which is essential for SSA to see import + * bindings. + */ + additional class ImportStmt extends Stmt { + private Py::Import imp; + + ImportStmt() { this = TPyStmt(imp) } + + /** Gets the value (module/member expression) of the `n`th alias. */ + Expr getValue(int n) { result.asExpr() = imp.getName(n).getValue() } + + /** Gets the bound `asname` of the `n`th alias. */ + Expr getAsname(int n) { result.asExpr() = imp.getName(n).getAsname() } + + /** Gets the number of aliases in this import statement. */ + int getNumberOfAliases() { result = count(int i | exists(imp.getName(i))) } + + override AstNode getChild(int index) { + exists(int i | + index = 2 * i and result = this.getValue(i) + or + index = 2 * i + 1 and result = this.getAsname(i) + ) + } + } + + /** + * A `from m import *` statement. Evaluates the module expression but + * binds no name (the bindings happen by side-effect at runtime, which + * is not modelled at the CFG level). + */ + additional class ImportStarStmt extends Stmt { + private Py::ImportStar imp; + + ImportStarStmt() { this = TPyStmt(imp) } + + Expr getModule() { result.asExpr() = imp.getModule() } + + override AstNode getChild(int index) { index = 0 and result = this.getModule() } + } + /** A `with` statement. */ additional class WithStmt extends Stmt { private Py::With withStmt; diff --git a/python/ql/test/library-tests/ControlFlow/bindings/imports.py b/python/ql/test/library-tests/ControlFlow/bindings/imports.py index 1b657c7db6c..c8834b5332a 100644 --- a/python/ql/test/library-tests/ControlFlow/bindings/imports.py +++ b/python/ql/test/library-tests/ControlFlow/bindings/imports.py @@ -1,12 +1,14 @@ -# Import aliases. All bound names below currently lack a CFG node. +# Import aliases — all bound names below are now reachable via the new +# CFG's `ImportStmt` wrapper. -import os # $ MISSING: cfgdefines=os -import os.path # $ MISSING: cfgdefines=os -import os as o # $ MISSING: cfgdefines=o -from os import path # $ MISSING: cfgdefines=path -from os import path as p # $ MISSING: cfgdefines=p -from os import sep, linesep # $ MISSING: cfgdefines=sep MISSING: cfgdefines=linesep +import os # $ cfgdefines=os +import os.path # $ cfgdefines=os +import os as o # $ cfgdefines=o +from os import path # $ cfgdefines=path +from os import path as p # $ cfgdefines=p +from os import sep, linesep # $ cfgdefines=sep cfgdefines=linesep from os import ( - getcwd, # $ MISSING: cfgdefines=getcwd - getcwdb, # $ MISSING: cfgdefines=getcwdb + getcwd, # $ cfgdefines=getcwd + getcwdb, # $ cfgdefines=getcwdb ) +