diff --git a/python/ql/lib/semmle/python/controlflow/internal/AstNodeImpl.qll b/python/ql/lib/semmle/python/controlflow/internal/AstNodeImpl.qll index 5dba3d96ea8..fc289744b7f 100644 --- a/python/ql/lib/semmle/python/controlflow/internal/AstNodeImpl.qll +++ b/python/ql/lib/semmle/python/controlflow/internal/AstNodeImpl.qll @@ -1553,6 +1553,89 @@ private module Input implements InputSig1, InputSig2 { private string assertThrowTag() { result = "[assert-throw]" } + /** + * Holds if the AST node `n` may raise an exception at runtime as part of + * its normal evaluation (not via an explicit `raise`/`assert`, which are + * modelled separately). + * + * The set mirrors what the legacy CFG used to flag implicitly: function + * calls (anything can raise), attribute access (`AttributeError`), + * subscript access (`IndexError`/`KeyError`/`TypeError`), arithmetic and + * comparison operators (`TypeError`/`ZeroDivisionError`), imports + * (`ImportError`/`ModuleNotFoundError`), and generator/coroutine + * suspension points (`await`/`yield`/`yield from`). + * + * Bare `Name` reads are intentionally excluded — modelling every name + * read as `mayThrow` would explode CFG edge count for negligible + * analysis value. `BoolExpr`/`IfExp` containers are also excluded; the + * operands they evaluate contribute their own exception edges. + */ + private predicate exprMayThrow(Py::Expr e) { + e instanceof Py::Call + or + e instanceof Py::Attribute + or + e instanceof Py::Subscript + or + e instanceof Py::BinaryExpr + or + e instanceof Py::UnaryExpr + or + e instanceof Py::Compare + or + e instanceof Py::ImportExpr + or + e instanceof Py::ImportMember + or + e instanceof Py::Await + or + e instanceof Py::Yield + or + e instanceof Py::YieldFrom + } + + /** + * Holds if the statement `s` may raise an exception at runtime as part + * of its normal evaluation. Currently restricted to `from m import *` + * (which performs the import as a statement-level side effect). + */ + private predicate stmtMayThrow(Py::Stmt s) { s instanceof Py::ImportStar } + + /** + * Holds if `n` is syntactically inside the body, handlers, `else`, or + * `finally` of a `try` statement (or the body of a `with` statement, + * which compiles to an implicit try/finally for `__exit__`) in the + * same scope. + * + * This mirrors Java's `ControlFlowGraph::mayThrow`, which only emits + * exception edges when there is local exception handling that would + * observe them. Outside such contexts, exception edges would add CFG + * complexity (weakening BarrierGuard precision and breaking SSA + * continuity around augmented assignments and subscript stores) + * without any analysis benefit, since exceptions just propagate to + * the function exit anyway. + */ + private predicate inExceptionContext(Py::AstNode py) { + exists(Py::Try t | t.containsInScope(py)) + or + exists(Py::With w | w.containsInScope(py)) + } + + /** + * Holds if `n` may raise an exception during normal evaluation. See + * `exprMayThrow` and `stmtMayThrow` for the included AST classes. + * + * Restricted to nodes inside a `try`/`with` statement: matches Java's + * approach of only modelling exception flow where it can be observed + * by local handling. + */ + private predicate mayThrow(Ast::AstNode n) { + exists(Py::AstNode py | py = n.asExpr() or py = n.asStmt() | + (exprMayThrow(py) or stmtMayThrow(py)) and + inExceptionContext(py) + ) + } + predicate additionalNode(Ast::AstNode n, string tag, NormalSuccessor t) { n instanceof Ast::AssertStmt and tag = assertThrowTag() and t instanceof DirectSuccessor } @@ -1564,6 +1647,11 @@ private module Input implements InputSig1, InputSig2 { n.isAdditional(ast, assertThrowTag()) and c.asSimpleAbruptCompletion() instanceof ExceptionSuccessor and always = true + or + mayThrow(ast) and + n.isIn(ast) and + c.asSimpleAbruptCompletion() instanceof ExceptionSuccessor and + always = false } predicate endAbruptCompletion(Ast::AstNode ast, PreControlFlowNode n, AbruptCompletion c) { diff --git a/python/ql/test/library-tests/ControlFlow/bindings/dead_under_no_raise.py b/python/ql/test/library-tests/ControlFlow/bindings/dead_under_no_raise.py index dbfb857b536..9058f2b7116 100644 --- a/python/ql/test/library-tests/ControlFlow/bindings/dead_under_no_raise.py +++ b/python/ql/test/library-tests/ControlFlow/bindings/dead_under_no_raise.py @@ -1,15 +1,15 @@ -# Dead bindings under the "no expressions raise" CFG abstraction. +# Reachability of code following a try whose body always returns. # -# The new CFG does not currently model raise edges from arbitrary -# expressions. As a consequence, code that is only reachable through -# exception flow is (correctly) classified as dead and has no CFG node. -# Variable bindings in dead code do not need CFG nodes - SSA / dataflow -# over dead code is moot. +# The new CFG models exception edges for raise-prone expressions when +# they appear inside a `try` (or `with`) statement, mirroring Java's +# `mayThrow`. This means the body of a `try` has both a normal +# completion edge and an exception edge to its handlers, so code +# following the try-statement is reachable via the except-handler path +# even when the try-body would otherwise always return. # -# These tests act as a regression guard: the bindings below intentionally -# have no `cfgdefines=` annotations. If raise modelling is later added, -# the BindingsTest infrastructure will surface the new CFG nodes as -# unexpected results, and this file will need to be revisited. +# Code that is not reachable under either normal or exception flow +# (for example, the `else` clause of a try whose body unconditionally +# raises) remains correctly classified as dead. def f(obj): # $ cfgdefines=f cfgdefines=obj @@ -18,12 +18,12 @@ def f(obj): # $ cfgdefines=f cfgdefines=obj except TypeError: pass - # The first try's body always returns; its except handler does not - # raise or otherwise transfer control, so under "no expressions - # raise" the only paths out of the try-statement are dead. Everything - # below is unreachable. + # The try-body always returns, but `len(obj)` can raise (it is + # inside the try, so we model its exception edge). The + # `except TypeError: pass` handler falls through to here, making + # the code below reachable. try: - hint = type(obj).__length_hint__ + hint = type(obj).__length_hint__ # $ cfgdefines=hint except AttributeError: return None return hint @@ -35,7 +35,8 @@ def g(): # $ cfgdefines=g except: raise Exception("outer") else: - # Unreachable: the inner try body always raises, so the `else:` + # Unreachable: the inner try body always raises (via an explicit + # `raise`, which is modelled unconditionally), so the `else:` # clause never runs. hit_inner_else = True @@ -46,7 +47,7 @@ def h(cache, key): # $ cfgdefines=h cfgdefines=cache cfgdefines=key except KeyError: pass - # Same pattern as `f`: dead under "no expressions raise". - value = compute(key) + # Same pattern as `f`: reachable via the except-handler fall-through. + value = compute(key) # $ cfgdefines=value cache[key] = value return value