mirror of
https://github.com/github/codeql.git
synced 2026-06-09 06:58:50 +02:00
Flips the Python dataflow trunk from the legacy CFG (semmle/python/Flow.qll) and legacy ESSA SSA (semmle/python/essa/*) to the new shared CFG facade (semmle.python.controlflow.internal.Cfg) and the new SSA adapter (semmle.python.dataflow.new.internal.SsaImpl), both introduced additively in the preceding PRs in this stack. This is the trunk-flip equivalent of the original draft PR #21894 (kept around as documentation), rebased on top of the four preparatory PRs: P1: Remove AstNode.getAFlowNode() and rewrite callers (#21919). P2: Qualify Flow.qll's AST references with Py:: prefix (#21920). P3: Add new shared-CFG-backed control flow graph (#21921). P4: Add new shared-SSA-backed SSA adapter (#21923). The Python dataflow library (semmle/python/dataflow/new/) now imports the new CFG facade and SSA adapter. All CFG-typed predicates (ControlFlowNode, CallNode, BasicBlock, NameNode, AttrNode, ...) are qualified with the Cfg:: prefix; SSA references switch from EssaVariable/EssaDefinition to SsaImpl::Definition/SourceVariable. GuardNode is redesigned to use the new CFG's outcome-node model (isAfterTrue / isAfterFalse) instead of the legacy ConditionBlock + flipped indirection. Only BarrierGuard<...> is preserved as public API. Framework files (Bottle, FastApi, Django, Tornado, Pyramid, Stdlib, ...) are updated to take CFG nodes from the new facade. A handful of dataflow consistency tweaks for the new CFG: - Augmented-assignment targets are treated as both load and store. - 'from X import *' produces uncertain SSA writes for unknown names. - CFG nodes are canonicalised so dataflow does not see equivalent pre/post-order pairs as distinct nodes. Two AST tweaks for the new CFG: - AstNodeImpl: omit PEP 695 type-parameter names from FunctionDefExpr / ClassDefExpr children. - ImportResolution: drop the legacy essa import. Test churn (~175 files): reblessed library- and query-test .expected files reflect slightly different CFG granularity, different toString output, and a handful of true alert deltas in security queries. Verification: all 367 lib + src + consistency-queries compile clean. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
122 lines
2.9 KiB
Python
122 lines
2.9 KiB
Python
### Tests of global flow
|
|
|
|
# Simple assignment
|
|
|
|
g = [5] # $ writes=g
|
|
|
|
# Multiple assignment
|
|
|
|
g1, g2 = [6], [7] # $ writes=g1 writes=g2
|
|
|
|
# Assignment that's only referenced in this scope.
|
|
|
|
unreferenced_g = [8] # $ writes=unreferenced_g
|
|
print(unreferenced_g)
|
|
|
|
# Testing modifications of globals
|
|
|
|
# Modification by reassignment
|
|
|
|
g_mod = [] # $ SPURIOUS: writes=g_mod
|
|
# This assignment does not produce any flow, since `g_mod` is immediately reassigned.
|
|
|
|
# The following assignment should not be a `ModuleVariableNode`,
|
|
# but currently our analysis thinks `g_mod` might be used in the `print` call
|
|
g_mod = [10] # $ SPURIOUS: writes=g_mod
|
|
print("foo")
|
|
g_mod = [100] # $ writes=g_mod
|
|
|
|
# Modification by mutation
|
|
|
|
g_ins = [50] # $ writes=g_ins
|
|
print(g_ins)
|
|
g_ins.append(75)
|
|
|
|
# A global with multiple potential definitions
|
|
|
|
import unknown_module # $ writes=unknown_module
|
|
if unknown_module.attr:
|
|
g_mult = [200] # $ writes=g_mult
|
|
else:
|
|
g_mult = [300] # $ writes=g_mult
|
|
|
|
# A global variable that may be redefined depending on some unknown value
|
|
|
|
g_redef = [400] # $ writes=g_redef
|
|
if unknown_module.attr:
|
|
g_redef = [500] # $ writes=g_redef
|
|
|
|
def global_access(): # $ writes=global_access
|
|
l = 5
|
|
print(g) # $ reads=g
|
|
print(g1) # $ reads=g1
|
|
print(g2) # $ reads=g2
|
|
print(g_mod) # $ reads=g_mod
|
|
print(g_ins) # $ reads=g_ins
|
|
print(g_mult) # $ reads=g_mult
|
|
print(g_redef) # $ reads=g_redef
|
|
|
|
def print_g_mod(): # $ writes=print_g_mod
|
|
print(g_mod) # $ reads=g_mod
|
|
|
|
def global_mod(): # $ writes=global_mod
|
|
global g_mod
|
|
g_mod += [150] # $ reads,writes=g_mod
|
|
print_g_mod() # $ reads=print_g_mod
|
|
|
|
def global_inside_local_function(): # $ writes=global_inside_local_function
|
|
def local_function():
|
|
print(g) # $ reads=g
|
|
local_function()
|
|
|
|
## Imports
|
|
|
|
|
|
# Direct imports
|
|
|
|
import foo_module # $ writes=foo_module
|
|
|
|
def use_foo(): # $ writes=use_foo
|
|
print(foo_module.attr) # $ reads=foo_module
|
|
|
|
# Partial imports
|
|
|
|
from bar import baz_attr, quux_attr # $ writes=baz_attr writes=quux_attr
|
|
|
|
def use_partial_import(): # $ writes=use_partial_import
|
|
print(baz_attr, quux_attr) # $ reads=baz_attr reads=quux_attr
|
|
|
|
# Aliased imports
|
|
|
|
from spam_module import ham_attr as eggs_attr # $ writes=eggs_attr
|
|
|
|
def use_aliased_import(): # $ writes=use_aliased_import
|
|
print(eggs_attr) # $ reads=eggs_attr
|
|
|
|
# Import star (unlikely to work unless we happen to extract/model the referenced module)
|
|
|
|
# Unknown modules
|
|
|
|
from unknown import *
|
|
|
|
def secretly_use_unknown(): # $ writes=secretly_use_unknown
|
|
print(unknown_attr) # $ reads=unknown_attr
|
|
|
|
# Known modules
|
|
|
|
from known import *
|
|
|
|
def secretly_use_known(): # $ writes=secretly_use_known
|
|
print(known_attr) # $ reads=known_attr
|
|
|
|
# Local import in function
|
|
|
|
def imports_locally(): # $ writes=imports_locally
|
|
import mod1
|
|
|
|
# Global import hidden in function
|
|
|
|
def imports_stuff(): # $ writes=imports_stuff
|
|
global mod2
|
|
import mod2 # $ writes=mod2
|