Python: Port cyclic import queries

The new CyclicImports.qll is a fairly straight port of Cyclic.qll, with
the main changes being:

- We now use Module instead of ModuleValue everywhere
- We use getModuleReference instead of pointsTo
- is_import_time was replaced with a use of `ImportTimeScope`

The predicate that changed the most is `stmt_imports`, which in the
original just did `s.getASubExpression().pointsTo(result)`. The new
version has three branches, one for each kind of import, and with
special handling of imports from within a submodule (which is not
something that should be flagged).

No test changes.
This commit is contained in:
Taus
2026-02-26 14:40:12 +00:00
parent 970349bc1f
commit 6b64443c49
3 changed files with 146 additions and 10 deletions

View File

@@ -12,16 +12,16 @@
*/
import python
import Cyclic
private import LegacyPointsTo
import CyclicImports
private import semmle.python.dataflow.new.internal.ImportResolution
from ModuleValue m1, ModuleValue m2, Stmt imp
from Module m1, Module m2, Stmt imp
where
imp.getEnclosingModule() = m1.getScope() and
imp.getEnclosingModule() = m1 and
stmt_imports(imp) = m2 and
circular_import(m1, m2) and
m1 != m2 and
// this query finds all cyclic imports that are *not* flagged by ModuleLevelCyclicImport
not failing_import_due_to_cycle(m2, m1, _, _, _, _) and
not exists(If i | i.isNameEqMain() and i.contains(imp))
select imp, "Import of module $@ begins an import cycle.", m2, m2.getName()
select imp, "Import of module $@ begins an import cycle.", m2, ImportResolution::moduleName(m2)

View File

@@ -0,0 +1,135 @@
import python
private import semmle.python.dataflow.new.internal.ImportResolution
private import semmle.python.dataflow.new.internal.DataFlowDispatch
private import semmle.python.types.ImportTime
Module module_imported_by(Module m) {
exists(ImportingStmt imp |
result = stmt_imports(imp) and
imp.getEnclosingModule() = m and
// Import must reach exit to be part of a cycle
imp.getAnEntryNode().getBasicBlock().reachesExit()
)
}
/** Is there a circular import of 'm1' beginning with 'm2'? */
predicate circular_import(Module m1, Module m2) {
m1 != m2 and
m2 = module_imported_by(m1) and
m1 = module_imported_by+(m2)
}
Module stmt_imports(ImportingStmt s) {
(
// `import m` — the alias target refers to the imported module
exists(Alias a | a = s.(Import).getAName() |
ImportResolution::getImmediateModuleReference(result).asExpr() = a.getAsname()
)
or
// `from m import x` — the source module `m` is also imported,
// but only if the imported member `x` is not a submodule of `m`
exists(ImportMember im | im = s.(Import).getAName().getValue() |
ImportResolution::getImmediateModuleReference(result).asExpr() = im.getModule() and
not ImportResolution::getImmediateModuleReference(_).asExpr() = im
)
or
// `from m import *`
ImportResolution::getImmediateModuleReference(result).asExpr() =
s.(ImportStar).getModule().(ImportExpr)
) and
not result.isPackage() and
not result.isPackageInit() and
Reachability::likelyReachable(s.getAnEntryNode().getBasicBlock())
}
predicate import_time_imported_module(Module m1, Module m2, Stmt imp) {
imp.(ImportingStmt).getEnclosingModule() = m1 and
imp.getScope() instanceof ImportTimeScope and
m2 = stmt_imports(imp)
}
/** Is there a cyclic import of 'm1' beginning with an import 'm2' at 'imp' where all the imports are top-level? */
predicate import_time_circular_import(Module m1, Module m2, Stmt imp) {
m1 != m2 and
import_time_imported_module(m1, m2, imp) and
import_time_transitive_import(m2, _, m1)
}
predicate import_time_transitive_import(Module base, Stmt imp, Module last) {
last != base and
(
import_time_imported_module(base, last, imp)
or
exists(Module mid |
import_time_transitive_import(base, imp, mid) and
import_time_imported_module(mid, last, _)
)
) and
// Import must reach exit to be part of a cycle
imp.getAnEntryNode().getBasicBlock().reachesExit()
}
/**
* Returns import-time usages of module 'm' in module 'enclosing'
*/
predicate import_time_module_use(Module m, Module enclosing, Expr use, string attr) {
exists(Expr mod |
use.getEnclosingModule() = enclosing and
use.getScope() instanceof ImportTimeScope and
ImportResolution::getModuleReference(m).asExpr() = mod and
not is_annotation_with_from_future_import_annotations(use)
|
// either 'M.foo'
use.(Attribute).getObject() = mod and use.(Attribute).getName() = attr
or
// or 'from M import foo'
use.(ImportMember).getModule() = mod and use.(ImportMember).getName() = attr
)
}
/**
* Holds if `use` appears inside an annotation.
*/
predicate is_used_in_annotation(Expr use) {
exists(FunctionExpr f |
f.getReturns().getASubExpression*() = use or
f.getArgs().getAnAnnotation().getASubExpression*() = use
)
or
exists(AnnAssign a | a.getAnnotation().getASubExpression*() = use)
}
/**
* Holds if `use` appears as a subexpression of an annotation, _and_ if the
* postponed evaluation of annotations presented in PEP 563 is in effect.
* See https://www.python.org/dev/peps/pep-0563/
*/
predicate is_annotation_with_from_future_import_annotations(Expr use) {
exists(ImportMember i | i.getScope() = use.getEnclosingModule() |
i.getModule().(ImportExpr).getImportedModuleName() = "__future__" and
i.getName() = "annotations"
) and
is_used_in_annotation(use)
}
/**
* Whether importing module 'first' before importing module 'other' will fail at runtime, due to an
* AttributeError at 'use' (in module 'other') caused by 'first.attr' not being defined as its definition can
* occur after the import 'other' in 'first'.
*/
predicate failing_import_due_to_cycle(
Module first, Module other, Stmt imp, ControlFlowNode defn, Expr use, string attr
) {
import_time_imported_module(other, first, _) and
import_time_transitive_import(first, imp, other) and
import_time_module_use(first, other, use, attr) and
exists(ImportTimeScope n, SsaVariable v |
defn = v.getDefinition() and
n = first and
v.getVariable().getScope() = n and
v.getId() = attr
|
not defn.strictlyDominates(imp.getAnEntryNode())
) and
not exists(If i | i.isNameEqMain() and i.contains(use))
}

View File

@@ -13,8 +13,8 @@
*/
import python
import Cyclic
private import LegacyPointsTo
import CyclicImports
private import semmle.python.dataflow.new.internal.ImportResolution
// This is a potentially crashing bug if
// 1. the imports in the whole cycle are lexically outside a def (and so executed at import time)
@@ -22,10 +22,11 @@ private import LegacyPointsTo
// 3. 'foo' is defined in M after the import in M which completes the cycle.
// then if we import the 'used' module, we will reach the cyclic import, start importing the 'using'
// module, hit the 'use', and then crash due to the imported symbol not having been defined yet
from ModuleValue m1, Stmt imp, ModuleValue m2, string attr, Expr use, ControlFlowNode defn
from Module m1, Stmt imp, Module m2, string attr, Expr use, ControlFlowNode defn
where failing_import_due_to_cycle(m1, m2, imp, defn, use, attr)
select use,
"'" + attr + "' may not be defined if module $@ is imported before module $@, as the $@ of " +
attr + " occurs after the cyclic $@ of " + m2.getName() + ".",
attr + " occurs after the cyclic $@ of " + ImportResolution::moduleName(m2) + ".",
// Arguments for the placeholders in the above message:
m1, m1.getName(), m2, m2.getName(), defn, "definition", imp, "import"
m1, ImportResolution::moduleName(m1), m2, ImportResolution::moduleName(m2), defn, "definition",
imp, "import"