Files
codeql/python/ql/src/Variables/UndefinedGlobal.ql
yoff 3b3bec8825 Python: remove getAFlowNode() — bridge AST→CFG only via CFG-side getNode()
Option 2: eliminates the AST→CFG bridge from the AST layer. Previously
'AstNode.getAFlowNode()' returned a 'ControlFlowNode' from the legacy
'Flow.qll' CFG via 'py_flow_bb_node' — this hardcoded the AST to know
about the legacy CFG, preventing files from cleanly switching to the
new shared CFG.

Removes:
  * 'AstNode.getAFlowNode()' from 'AstExtended.qll'
  * Type-narrowing overrides on 'Attribute' / 'Subscript' / 'Call' /
    'IfExp' / 'Name' / 'NameConstant' / 'ImportMember' (in Exprs.qll
    and Import.qll)

Rewrites ~130 call sites across 'python/ql/lib/' and 'python/ql/src/'
to bridge from the CFG side instead:

  Before:  node = expr.getAFlowNode()
  After:   node.getNode() = expr

  Before:  expr.getAFlowNode().(DefinitionNode).getValue()
  After:   exists(DefinitionNode d | d.getNode() = expr | d.getValue())

  Before:  cn.operands(const.getAFlowNode(), op, x)
  After:   exists(ControlFlowNode c | c.getNode() = const | cn.operands(c, op, x))

This is semantically a no-op — both forms are duals of the same predicate.
Verified by passing all library tests:
  * 64 dataflow tests
  * 28 ControlFlow + dataflow-new-ssa tests
  * 1 essa SSA-compute test
  * 93 tests total in the focused suite

Once committed, files that want to switch from the legacy 'Flow' CFG
to the new 'Cfg' facade only need to change their imports — the
bridge sites are CFG-side and respect whichever ControlFlowNode is in
scope.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
2026-05-26 16:32:44 +00:00

123 lines
4.2 KiB
Plaintext

/**
* @name Use of an undefined global variable
* @description Using a global variable before it is initialized causes an exception.
* @kind problem
* @tags reliability
* correctness
* @problem.severity error
* @sub-severity low
* @precision low
* @id py/undefined-global-variable
*/
import python
private import LegacyPointsTo
private import semmle.python.types.ImportTime
import Variables.MonkeyPatched
import Loop
predicate guarded_against_name_error(Name u) {
exists(Try t | t.getBody().getAnItem().contains(u) |
t.getAHandler().getType().(Name).getId() = "NameError"
)
or
exists(ConditionBlock guard, BasicBlock controlled, Call globals |
guard.getLastNode().getNode().contains(globals) or
guard.getLastNode().getNode() = globals
|
globals.getFunc().(Name).getId() = "globals" and
guard.controls(controlled, _) and
exists(ControlFlowNode uCfg | uCfg.getNode() = u | controlled.contains(uCfg))
)
}
predicate contains_unknown_import_star(Module m) {
exists(ImportStar imp | imp.getScope() = m |
exists(ModuleValue imported | imported.importedAs(imp.getImportedModuleName()) |
not imported.hasCompleteExportInfo()
)
)
}
predicate undefined_use_in_function(Name u) {
exists(Function f |
u.getScope().getScope*() = f and
// Either function is a method or inner function or it is live at the end of the module scope
(
not f.getScope() = u.getEnclosingModule() or
u.getEnclosingModule().(ImportTimeScope).definesName(f.getName())
) and
// There is a use, but not a definition of this global variable in the function or enclosing scope
exists(GlobalVariable v | u.uses(v) |
not exists(Assign a, Scope defnScope |
a.getATarget() = v.getAnAccess() and a.getScope() = defnScope
|
defnScope = f
or
// Exclude modules as that case is handled more precisely below.
defnScope = f.getScope().getScope*() and not defnScope instanceof Module
)
)
) and
not u.getEnclosingModule().(ImportTimeScope).definesName(u.getId()) and
not exists(ModuleValue m | m.getScope() = u.getEnclosingModule() | m.hasAttribute(u.getId())) and
not globallyDefinedName(u.getId()) and
not exists(SsaVariableWithPointsTo var | var.getAUse().getNode() = u and not var.maybeUndefined()) and
not guarded_against_name_error(u) and
not (u.getEnclosingModule().isPackageInit() and u.getId() = "__path__")
}
predicate undefined_use_in_class_or_module(Name u) {
exists(GlobalVariable v | u.uses(v)) and
not u.getScope().getScope*() instanceof Function and
exists(SsaVariableWithPointsTo var | var.getAUse().getNode() = u | var.maybeUndefined()) and
not guarded_against_name_error(u) and
not exists(ModuleValue m | m.getScope() = u.getEnclosingModule() | m.hasAttribute(u.getId())) and
not (u.getEnclosingModule().isPackageInit() and u.getId() = "__path__") and
not globallyDefinedName(u.getId())
}
predicate use_of_exec(Module m) {
exists(Exec exec | exec.getScope() = m)
or
exists(CallNode call, FunctionValue exec | exec.getACall() = call and call.getScope() = m |
exec = Value::named("exec") or
exec = Value::named("execfile")
)
}
predicate undefined_use(Name u) {
(
undefined_use_in_class_or_module(u)
or
undefined_use_in_function(u)
) and
not monkey_patched_builtin(u.getId()) and
not contains_unknown_import_star(u.getEnclosingModule()) and
not use_of_exec(u.getEnclosingModule()) and
not exists(u.getVariable().getAStore()) and
not u.(ExprWithPointsTo).pointsTo(_) and
not probably_defined_in_loop(u)
}
private predicate first_use_in_a_block(Name use) {
exists(GlobalVariable v, BasicBlock b, int i, ControlFlowNode useCfg | useCfg.getNode() = use |
i = min(int j | b.getNode(j).getNode() = v.getALoad()) and b.getNode(i) = useCfg
)
}
predicate first_undefined_use(Name use) {
undefined_use(use) and
exists(GlobalVariable v, ControlFlowNode useCfg | v.getALoad() = use and useCfg.getNode() = use |
first_use_in_a_block(use) and
not exists(ControlFlowNode other |
other.getNode() = v.getALoad() and
other.getBasicBlock().strictlyDominates(useCfg.getBasicBlock())
)
)
}
from Name u
where first_undefined_use(u)
select u, "This use of global variable '" + u.getId() + "' may be undefined."