Compare commits

..

7 Commits

Author SHA1 Message Date
Taus
28eec77cd8 Python: Port UnusedImport.ql
Changes the "has points-to value" check into a "is reachable" check
instead. No test changes.
2026-03-09 17:22:07 +00:00
Taus
09fc9f0bf2 Python: Port UnintentionalImport.ql
No test changes.
2026-03-09 17:22:07 +00:00
Taus
330dba6ed7 Python: Port FromImportOfMutableAttribute.ql
A fairly straightforward port. No test changes.
2026-03-09 17:22:07 +00:00
Taus
6b64443c49 Python: Port cyclic import queries
The new CyclicImports.qll is a fairly straight port of Cyclic.qll, with
the main changes being:

- We now use Module instead of ModuleValue everywhere
- We use getModuleReference instead of pointsTo
- is_import_time was replaced with a use of `ImportTimeScope`

The predicate that changed the most is `stmt_imports`, which in the
original just did `s.getASubExpression().pointsTo(result)`. The new
version has three branches, one for each kind of import, and with
special handling of imports from within a submodule (which is not
something that should be flagged).

No test changes.
2026-03-09 17:22:07 +00:00
Taus
970349bc1f Python: Extend reachability analysis with common guards
Adds `if False: ...` and `if typing.TYPE_CHECKING: ...` to the set of
nodes that are unlikely to be reachable.
2026-03-09 17:22:07 +00:00
Taus
47421a63a4 Python: Port import metrics queries 2026-03-09 17:22:06 +00:00
Taus
603d37cd60 Python: Port ModuleImportsItself.ql
Uses the existing machinery in ImportResolution.qll, after adding a few
convenience predicates.

The new modelling actually manages to find a result that the old
points-to analysis did not. Apart from that there are no test changes.
2026-03-09 17:22:06 +00:00
12 changed files with 236 additions and 57 deletions

View File

@@ -2417,6 +2417,24 @@ module Reachability {
// Exception edge from a node that is unlikely to raise
unlikelyToRaise(node) and
succ = node.getAnExceptionalSuccessor()
or
// True branch of `if False:` or `if TYPE_CHECKING:`
isAlwaysFalseGuard(node) and
succ = node.getATrueSuccessor()
}
/**
* Holds if `node` is a condition that is always `False` at runtime.
* This covers `if False:` and `if typing.TYPE_CHECKING:`.
*/
private predicate isAlwaysFalseGuard(ControlFlowNode node) {
node.getNode() instanceof False
or
node =
API::moduleImport("typing")
.getMember("TYPE_CHECKING")
.getAValueReachableFromSource()
.asCfgNode()
}
private predicate startBbLikelyReachable(BasicBlock b) {

View File

@@ -377,4 +377,30 @@ module ImportResolution {
}
Module getModule(DataFlow::CfgNode node) { node = getModuleReference(result) }
/** Holds if module `importer` directly imports module `imported`. */
predicate imports(Module importer, Module imported) {
getImmediateModuleReference(imported).getScope() = importer
}
/**
* Holds if the import statement `i` causes module `imported` to be imported.
* For `from pkg import submodule`, both `pkg` and `pkg.submodule` are considered imported.
*/
predicate importedBy(ImportingStmt i, Module imported) {
exists(Alias a | a = i.(Import).getAName() |
getImmediateModuleReference(imported).asExpr() = a.getAsname()
)
or
exists(ImportMember im | im = i.(Import).getAName().getValue() |
getImmediateModuleReference(imported).asExpr() = im.getModule()
)
or
getImmediateModuleReference(imported).asExpr() = i.(ImportStar).getModule().(ImportExpr)
}
/** Gets a user-friendly name for module `m`, using the package name for `__init__` modules. */
string moduleName(Module m) {
if m.isPackageInit() then result = m.getPackageName() else result = m.getName()
}
}

View File

@@ -12,16 +12,16 @@
*/
import python
import Cyclic
private import LegacyPointsTo
import CyclicImports
private import semmle.python.dataflow.new.internal.ImportResolution
from ModuleValue m1, ModuleValue m2, Stmt imp
from Module m1, Module m2, Stmt imp
where
imp.getEnclosingModule() = m1.getScope() and
imp.getEnclosingModule() = m1 and
stmt_imports(imp) = m2 and
circular_import(m1, m2) and
m1 != m2 and
// this query finds all cyclic imports that are *not* flagged by ModuleLevelCyclicImport
not failing_import_due_to_cycle(m2, m1, _, _, _, _) and
not exists(If i | i.isNameEqMain() and i.contains(imp))
select imp, "Import of module $@ begins an import cycle.", m2, m2.getName()
select imp, "Import of module $@ begins an import cycle.", m2, ImportResolution::moduleName(m2)

View File

@@ -0,0 +1,135 @@
import python
private import semmle.python.dataflow.new.internal.ImportResolution
private import semmle.python.dataflow.new.internal.DataFlowDispatch
private import semmle.python.types.ImportTime
Module module_imported_by(Module m) {
exists(ImportingStmt imp |
result = stmt_imports(imp) and
imp.getEnclosingModule() = m and
// Import must reach exit to be part of a cycle
imp.getAnEntryNode().getBasicBlock().reachesExit()
)
}
/** Is there a circular import of 'm1' beginning with 'm2'? */
predicate circular_import(Module m1, Module m2) {
m1 != m2 and
m2 = module_imported_by(m1) and
m1 = module_imported_by+(m2)
}
Module stmt_imports(ImportingStmt s) {
(
// `import m` — the alias target refers to the imported module
exists(Alias a | a = s.(Import).getAName() |
ImportResolution::getImmediateModuleReference(result).asExpr() = a.getAsname()
)
or
// `from m import x` — the source module `m` is also imported,
// but only if the imported member `x` is not a submodule of `m`
exists(ImportMember im | im = s.(Import).getAName().getValue() |
ImportResolution::getImmediateModuleReference(result).asExpr() = im.getModule() and
not ImportResolution::getImmediateModuleReference(_).asExpr() = im
)
or
// `from m import *`
ImportResolution::getImmediateModuleReference(result).asExpr() =
s.(ImportStar).getModule().(ImportExpr)
) and
not result.isPackage() and
not result.isPackageInit() and
Reachability::likelyReachable(s.getAnEntryNode().getBasicBlock())
}
predicate import_time_imported_module(Module m1, Module m2, Stmt imp) {
imp.(ImportingStmt).getEnclosingModule() = m1 and
imp.getScope() instanceof ImportTimeScope and
m2 = stmt_imports(imp)
}
/** Is there a cyclic import of 'm1' beginning with an import 'm2' at 'imp' where all the imports are top-level? */
predicate import_time_circular_import(Module m1, Module m2, Stmt imp) {
m1 != m2 and
import_time_imported_module(m1, m2, imp) and
import_time_transitive_import(m2, _, m1)
}
predicate import_time_transitive_import(Module base, Stmt imp, Module last) {
last != base and
(
import_time_imported_module(base, last, imp)
or
exists(Module mid |
import_time_transitive_import(base, imp, mid) and
import_time_imported_module(mid, last, _)
)
) and
// Import must reach exit to be part of a cycle
imp.getAnEntryNode().getBasicBlock().reachesExit()
}
/**
* Returns import-time usages of module 'm' in module 'enclosing'
*/
predicate import_time_module_use(Module m, Module enclosing, Expr use, string attr) {
exists(Expr mod |
use.getEnclosingModule() = enclosing and
use.getScope() instanceof ImportTimeScope and
ImportResolution::getModuleReference(m).asExpr() = mod and
not is_annotation_with_from_future_import_annotations(use)
|
// either 'M.foo'
use.(Attribute).getObject() = mod and use.(Attribute).getName() = attr
or
// or 'from M import foo'
use.(ImportMember).getModule() = mod and use.(ImportMember).getName() = attr
)
}
/**
* Holds if `use` appears inside an annotation.
*/
predicate is_used_in_annotation(Expr use) {
exists(FunctionExpr f |
f.getReturns().getASubExpression*() = use or
f.getArgs().getAnAnnotation().getASubExpression*() = use
)
or
exists(AnnAssign a | a.getAnnotation().getASubExpression*() = use)
}
/**
* Holds if `use` appears as a subexpression of an annotation, _and_ if the
* postponed evaluation of annotations presented in PEP 563 is in effect.
* See https://www.python.org/dev/peps/pep-0563/
*/
predicate is_annotation_with_from_future_import_annotations(Expr use) {
exists(ImportMember i | i.getScope() = use.getEnclosingModule() |
i.getModule().(ImportExpr).getImportedModuleName() = "__future__" and
i.getName() = "annotations"
) and
is_used_in_annotation(use)
}
/**
* Whether importing module 'first' before importing module 'other' will fail at runtime, due to an
* AttributeError at 'use' (in module 'other') caused by 'first.attr' not being defined as its definition can
* occur after the import 'other' in 'first'.
*/
predicate failing_import_due_to_cycle(
Module first, Module other, Stmt imp, ControlFlowNode defn, Expr use, string attr
) {
import_time_imported_module(other, first, _) and
import_time_transitive_import(first, imp, other) and
import_time_module_use(first, other, use, attr) and
exists(ImportTimeScope n, SsaVariable v |
defn = v.getDefinition() and
n = first and
v.getVariable().getScope() = n and
v.getId() = attr
|
not defn.strictlyDominates(imp.getAnEntryNode())
) and
not exists(If i | i.isNameEqMain() and i.contains(use))
}

View File

@@ -12,24 +12,26 @@
*/
import python
private import LegacyPointsTo
import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.internal.ImportResolution
import semmle.python.filters.Tests
from ImportMember im, ModuleValue m, AttrNode store_attr, string name
from ImportMember im, Module m, DataFlow::AttrWrite store_attr, string name
where
m.importedAs(im.getModule().(ImportExpr).getImportedModuleName()) and
ImportResolution::getImmediateModuleReference(m).asExpr() = im.getModule() and
im.getName() = name and
/* Modification must be in a function, so it can occur during lifetime of the import value */
store_attr.getScope() instanceof Function and
store_attr.getObject().getScope() instanceof Function and
/* variable resulting from import must have a long lifetime */
not im.getScope() instanceof Function and
store_attr.isStore() and
store_attr.getObject(name).(ControlFlowNodeWithPointsTo).pointsTo(m) and
store_attr.getAttributeName() = name and
ImportResolution::getModuleReference(m) = store_attr.getObject() and
/* Import not in same module as modification. */
not im.getEnclosingModule() = store_attr.getScope().getEnclosingModule() and
not im.getEnclosingModule() = store_attr.getObject().getScope().getEnclosingModule() and
/* Modification is not in a test */
not store_attr.getScope().getScope*() instanceof TestScope
not store_attr.getObject().getScope().getScope*() instanceof TestScope
select im,
"Importing the value of '" + name +
"' from $@ means that any change made to $@ will be not be observed locally.", m,
"module " + m.getName(), store_attr, m.getName() + "." + store_attr.getName()
"module " + ImportResolution::moduleName(m), store_attr,
ImportResolution::moduleName(m) + "." + store_attr.getAttributeName()

View File

@@ -12,19 +12,19 @@
*/
import python
private import LegacyPointsTo
import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.internal.ImportResolution
predicate modules_imports_itself(ImportingStmt i, ModuleValue m) {
i.getEnclosingModule() = m.getScope() and
m =
max(string s, ModuleValue m_ |
s = i.getAnImportedModuleName() and
m_.importedAs(s)
|
m_ order by s.length()
)
predicate modules_imports_itself(ImportingStmt i, Module m) {
m = i.getEnclosingModule() and
ImportResolution::importedBy(i, m) and
// Exclude `from m import submodule` where the imported member is a submodule of m
not exists(ImportMember im | im = i.(Import).getAName().getValue() |
ImportResolution::getImmediateModuleReference(m).asExpr() = im.getModule() and
ImportResolution::importedBy(i, any(Module sub | sub != m))
)
}
from ImportingStmt i, ModuleValue m
from ImportingStmt i, Module m
where modules_imports_itself(i, m)
select i, "The module '" + m.getName() + "' imports itself."
select i, "The module '" + ImportResolution::moduleName(m) + "' imports itself."

View File

@@ -13,8 +13,8 @@
*/
import python
import Cyclic
private import LegacyPointsTo
import CyclicImports
private import semmle.python.dataflow.new.internal.ImportResolution
// This is a potentially crashing bug if
// 1. the imports in the whole cycle are lexically outside a def (and so executed at import time)
@@ -22,10 +22,11 @@ private import LegacyPointsTo
// 3. 'foo' is defined in M after the import in M which completes the cycle.
// then if we import the 'used' module, we will reach the cyclic import, start importing the 'using'
// module, hit the 'use', and then crash due to the imported symbol not having been defined yet
from ModuleValue m1, Stmt imp, ModuleValue m2, string attr, Expr use, ControlFlowNode defn
from Module m1, Stmt imp, Module m2, string attr, Expr use, ControlFlowNode defn
where failing_import_due_to_cycle(m1, m2, imp, defn, use, attr)
select use,
"'" + attr + "' may not be defined if module $@ is imported before module $@, as the $@ of " +
attr + " occurs after the cyclic $@ of " + m2.getName() + ".",
attr + " occurs after the cyclic $@ of " + ImportResolution::moduleName(m2) + ".",
// Arguments for the placeholders in the above message:
m1, m1.getName(), m2, m2.getName(), defn, "definition", imp, "import"
m1, ImportResolution::moduleName(m1), m2, ImportResolution::moduleName(m2), defn, "definition",
imp, "import"

View File

@@ -13,23 +13,19 @@
*/
import python
private import LegacyPointsTo
private import semmle.python.dataflow.new.internal.ImportResolution
private import semmle.python.types.ImportTime
predicate import_star(ImportStar imp, ModuleValue exporter) {
exporter.importedAs(imp.getImportedModuleName())
predicate all_defined(Module exporter) {
exporter.(ImportTimeScope).definesName("__all__")
or
exporter.getInitModule().(ImportTimeScope).definesName("__all__")
}
predicate all_defined(ModuleValue exporter) {
exporter.isBuiltin()
or
exporter.getScope().(ImportTimeScope).definesName("__all__")
or
exporter.getScope().getInitModule().(ImportTimeScope).definesName("__all__")
}
from ImportStar imp, ModuleValue exporter
where import_star(imp, exporter) and not all_defined(exporter) and not exporter.isAbsent()
from ImportStar imp, Module exporter
where
exporter = ImportResolution::getModuleImportedByImportStar(imp) and
not all_defined(exporter)
select imp,
"Import pollutes the enclosing namespace, as the imported module $@ does not define '__all__'.",
exporter, exporter.getName()

View File

@@ -12,9 +12,10 @@
*/
import python
private import LegacyPointsTo
import Variables.Definition
import semmle.python.ApiGraphs
private import semmle.python.dataflow.new.internal.ImportResolution
private import semmle.python.dataflow.new.internal.DataFlowDispatch
private predicate is_pytest_fixture(Import imp, Variable name) {
exists(Alias a, API::Node pytest_fixture, API::Node decorator |
@@ -95,7 +96,7 @@ private string typehint_annotation_in_module(Module module_scope) {
or
annotation = any(FunctionExpr f).getReturns().getASubExpression*()
|
annotation.(ExprWithPointsTo).pointsTo(Value::forString(result)) and
annotation.getText() = result and
annotation.getEnclosingModule() = module_scope
)
}
@@ -143,9 +144,8 @@ predicate unused_import(Import imp, Variable name) {
not imported_module_used_in_doctest(imp) and
not imported_alias_used_in_typehint(imp, name) and
not is_pytest_fixture(imp, name) and
// Only consider import statements that actually point-to something (possibly an unknown module).
// If this is not the case, it's likely that the import statement never gets executed.
imp.getAName().getValue().(ExprWithPointsTo).pointsTo(_)
// Only consider import statements in reachable code.
Reachability::likelyReachable(imp.getAName().getValue().getAFlowNode().getBasicBlock())
}
from Stmt s, Variable name

View File

@@ -11,8 +11,8 @@
*/
import python
private import LegacyPointsTo
private import semmle.python.dataflow.new.internal.ImportResolution
from ModuleValue m, int n
where n = count(ModuleValue imp | imp = m.getAnImportedModule())
select m.getScope(), n
from Module m, int n
where n = count(Module imp | ImportResolution::imports(m, imp))
select m, n

View File

@@ -11,8 +11,8 @@
*/
import python
private import LegacyPointsTo
private import semmle.python.dataflow.new.internal.ImportResolution
from ModuleValue m, int n
where n = count(ModuleValue imp | imp = m.getAnImportedModule+() and imp != m)
select m.getScope(), n
from Module m, int n
where n = count(Module imp | ImportResolution::imports+(m, imp) and imp != m)
select m, n

View File

@@ -1,5 +1,6 @@
| imports_test.py:8:1:8:19 | Import | The module 'imports_test' imports itself. |
| pkg_notok/__init__.py:4:1:4:16 | Import | The module 'pkg_notok' imports itself. |
| pkg_notok/__init__.py:10:1:10:20 | Import | The module 'pkg_notok' imports itself. |
| pkg_notok/__init__.py:12:1:12:25 | Import | The module 'pkg_notok' imports itself. |
| pkg_notok/__init__.py:13:1:13:37 | Import | The module 'pkg_notok' imports itself. |
| pkg_notok/__init__.py:14:1:14:23 | from pkg_notok import * | The module 'pkg_notok' imports itself. |