Compare commits

..

7 Commits

Author SHA1 Message Date
Taus
ca59ca0c2f Python: Add globallyDefinedName and extend monkeyPatchedBuiltin 2026-04-08 15:58:39 +00:00
Taus
205466d7ab Python: Model undefinedness
Adds `maybeUndefined` to the reachability module, modelling which
names/variables may be undefined at runtime. The approach is very close
to the one used in points-to, though it of course relies on our new
modelling of exceptions/reachability instead.
2026-04-08 15:58:39 +00:00
Taus
3e7986a14a Python: Extend reachability analysis with common guards
Adds `if False: ...` and `if typing.TYPE_CHECKING: ...` to the set of
nodes that are unlikely to be reachable.
2026-04-08 15:58:38 +00:00
Taus
ec9e72ee09 Python: Add getClassName for immutable literals
Used for queries where we mention the class of a literal in the alert
message.
2026-04-08 15:58:38 +00:00
Taus
6efedb7d00 Python: Extend ExceptionTypes API
Adds support for finding instances, and adds things like a
`BaseException` convenience class.
2026-04-08 15:58:38 +00:00
Taus
993311e436 Python: Add Reachability module
The implementation is essentially the same as the one from
`BasicBlockWithPointsTo`, with the main difference being that this one
uses the exception machinery we just added (and some extensions added in
this commit).
2026-04-08 15:54:48 +00:00
Taus
e14d493bcc Python: Move exception modelling to DataFlowDispatch.qll 2026-04-08 12:18:56 +00:00
6 changed files with 458 additions and 134 deletions

View File

@@ -1988,6 +1988,38 @@ OutNode getAnOutNode(DataFlowCall call, ReturnKind kind) { call = result.getCall
module DuckTyping {
private import semmle.python.ApiGraphs
/**
* Holds if `name` is a globally defined name (a builtin or VM-defined name).
*/
predicate globallyDefinedName(string name) {
exists(API::builtin(name))
or
name = "WindowsError"
or
name = "_" and exists(Module m | m.getName() = "gettext")
or
name in ["__file__", "__builtins__", "__name__"]
}
/**
* Holds if `name` is monkey-patched into the builtins module.
*/
predicate monkeyPatchedBuiltin(string name) {
any(DataFlow::AttrWrite aw)
.writes(API::moduleImport("builtins").getAValueReachableFromSource(), name, _)
or
// B.__dict__["name"] = value
exists(SubscriptNode subscr |
subscr.isStore() and
subscr.getObject() =
API::moduleImport("builtins")
.getMember("__dict__")
.getAValueReachableFromSource()
.asCfgNode() and
subscr.getIndex().getNode().(StringLiteral).getText() = name
)
}
/**
* Holds if `cls` or any of its resolved superclasses declares a method with the given `name`.
*/
@@ -2158,4 +2190,415 @@ module DuckTyping {
or
f.getADecorator().(Name).getId() = "property"
}
/** Gets the name of the builtin class of the immutable literal `lit`. */
string getClassName(ImmutableLiteral lit) {
lit instanceof IntegerLiteral and result = "int"
or
lit instanceof FloatLiteral and result = "float"
or
lit instanceof ImaginaryLiteral and result = "complex"
or
lit instanceof NegativeIntegerLiteral and result = "int"
or
lit instanceof StringLiteral and result = "str"
or
lit instanceof BooleanLiteral and result = "bool"
or
lit instanceof None and result = "NoneType"
}
}
/**
* Provides a class hierarchy for exception types, covering both builtin
* exceptions (from typeshed models) and user-defined exception classes.
*/
module ExceptionTypes {
private import semmle.python.ApiGraphs
private import semmle.python.frameworks.data.internal.ApiGraphModels
/** Holds if `name` is a builtin exception class name. */
predicate builtinException(string name) {
typeModel("builtins.BaseException~Subclass", "builtins." + name, "")
}
/** Holds if builtin exception `sub` is a direct subclass of builtin exception `base`. */
private predicate builtinExceptionSubclass(string base, string sub) {
typeModel("builtins." + base + "~Subclass", "builtins." + sub, "")
}
/** An exception type, either a builtin exception or a user-defined exception class. */
newtype TExceptType =
/** A user-defined exception class. */
TUserExceptType(Class c) or
/** A builtin exception class, identified by name. */
TBuiltinExceptType(string name) { builtinException(name) }
/** An exception type, either a builtin exception or a user-defined exception class. */
class ExceptType extends TExceptType {
/** Gets the name of this exception type. */
string getName() { none() }
/** Gets a data-flow node that refers to this exception type. */
DataFlow::Node getAUse() { none() }
/** Gets a direct superclass of this exception type. */
ExceptType getADirectSuperclass() { none() }
/** Gets a string representation of this exception type. */
string toString() { result = this.getName() }
/** Gets a data-flow node that refers to an instance of this exception type. */
DataFlow::Node getAnInstance() { none() }
/** Holds if this is a legal exception type (a subclass of `BaseException`). */
predicate isLegalExceptionType() { this.getADirectSuperclass*() instanceof BaseException }
/**
* Holds if this exception type is raised by `r`, either as a class reference
* (e.g. `raise ValueError`) or as an instantiation (e.g. `raise ValueError("msg")`).
*/
predicate isRaisedBy(Raise r) {
exists(Expr raised | raised = r.getRaised() |
this.getAUse().asExpr() in [raised, raised.(Call).getFunc()]
or
this.getAnInstance().asExpr() = raised
)
}
/** Holds if this exception type may be raised at control flow node `r`. */
predicate isRaisedAt(ControlFlowNode r) {
this.isRaisedBy(r.getNode())
or
exists(Function callee |
resolveCall(r, callee, _) and
this.isRaisedIn(callee)
)
}
/**
* Holds if this exception type may be raised in function `f`, either
* directly via `raise` statements or transitively through calls to other functions.
*/
predicate isRaisedIn(Function f) { this.isRaisedAt(any(ControlFlowNode r | r.getScope() = f)) }
/** Holds if this exception type is handled by the `except` clause at `handler`. */
predicate isHandledAt(ExceptFlowNode handler) {
exists(ExceptStmt ex, Expr typeExpr | ex = handler.getNode() |
(
typeExpr = ex.getType()
or
typeExpr = ex.getType().(Tuple).getAnElt()
) and
this.getAUse().asExpr() = typeExpr
)
or
// A bare `except:` handles everything
not exists(handler.getNode().(ExceptStmt).getType()) and
this instanceof BaseException
}
/**
* Holds if this element is at the specified location.
* The location spans column `startColumn` of line `startLine` to
* column `endColumn` of line `endLine` in file `filepath`.
* For more information, see
* [Providing locations in CodeQL queries](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
predicate hasLocationInfo(
string filePath, int startLine, int startColumn, int endLine, int endColumn
) {
none()
}
}
/** A user-defined exception class. */
class UserExceptType extends ExceptType, TUserExceptType {
Class cls;
UserExceptType() { this = TUserExceptType(cls) }
/** Gets the underlying class. */
Class asClass() { result = cls }
override string getName() { result = cls.getName() }
override DataFlow::Node getAUse() { result = classTracker(cls) }
override DataFlow::Node getAnInstance() { result = classInstanceTracker(cls) }
override ExceptType getADirectSuperclass() {
result.(UserExceptType).asClass() = getADirectSuperclass(cls)
or
result.(BuiltinExceptType).getAUse().asExpr() = cls.getABase()
}
override predicate hasLocationInfo(
string filePath, int startLine, int startColumn, int endLine, int endColumn
) {
cls.getLocation().hasLocationInfo(filePath, startLine, startColumn, endLine, endColumn)
}
}
/** A builtin exception class, identified by name. */
class BuiltinExceptType extends ExceptType, TBuiltinExceptType {
string name;
BuiltinExceptType() { this = TBuiltinExceptType(name) }
/** Gets the builtin name. */
string asBuiltinName() { result = name }
override string getName() { result = name }
override DataFlow::Node getAUse() { result = API::builtin(name).getAValueReachableFromSource() }
override DataFlow::Node getAnInstance() {
result = API::builtin(name).getAnInstance().getAValueReachableFromSource()
}
override ExceptType getADirectSuperclass() {
builtinExceptionSubclass(result.(BuiltinExceptType).asBuiltinName(), name) and
result != this
}
override predicate hasLocationInfo(
string filePath, int startLine, int startColumn, int endLine, int endColumn
) {
filePath = "" and
startLine = 0 and
startColumn = 0 and
endLine = 0 and
endColumn = 0
}
}
/** The builtin `BaseException` type. */
class BaseException extends BuiltinExceptType {
BaseException() { name = "BaseException" }
}
/** The builtin `NameError` exception type. */
class NameError extends BuiltinExceptType {
NameError() { name = "NameError" }
}
/**
* Holds if the exception edge from `r` to `handler` is unlikely because
* none of the exception types that `r` may raise are handled by `handler`.
*/
predicate unlikelyExceptionEdge(ControlFlowNode r, ExceptFlowNode handler) {
handler = r.getAnExceptionalSuccessor() and
// We can determine at least one raised type
exists(ExceptType t | t.isRaisedAt(r)) and
// But none of them are handled by this handler
not exists(ExceptType raised, ExceptType handled |
raised.isRaisedAt(r) and
handled.isHandledAt(handler) and
raised.getADirectSuperclass*() = handled
)
}
}
/**
* Provides predicates for reasoning about the reachability of control flow nodes
* and basic blocks.
*/
module Reachability {
private import semmle.python.ApiGraphs
import ExceptionTypes
/**
* Holds if `call` is a call to a function that is known to never return normally
* (e.g. `sys.exit()`, `os._exit()`, `os.abort()`).
*/
predicate isCallToNeverReturningFunction(CallNode call) {
// Known never-returning builtins/stdlib functions via API graphs
call = API::builtin("exit").getACall().asCfgNode()
or
call = API::builtin("quit").getACall().asCfgNode()
or
call = API::moduleImport("sys").getMember("exit").getACall().asCfgNode()
or
call = API::moduleImport("os").getMember("_exit").getACall().asCfgNode()
or
call = API::moduleImport("os").getMember("abort").getACall().asCfgNode()
or
// User-defined functions that only contain raise statements (no normal returns)
exists(Function target |
resolveCall(call, target, _) and
neverReturns(target)
)
}
/**
* Holds if function `f` never returns normally, because every normal exit
* is dominated by a call to a never-returning function or an unconditional raise.
*/
predicate neverReturns(Function f) {
exists(f.getANormalExit()) and
forall(BasicBlock exit | exit = f.getANormalExit().getBasicBlock() |
exists(BasicBlock raising |
raising.dominates(exit) and
(
isCallToNeverReturningFunction(raising.getLastNode())
or
raising.getLastNode().getNode() instanceof Raise
)
)
)
}
/**
* Holds if `node` is unlikely to raise an exception. This includes entry nodes
* and simple name lookups.
*/
private predicate unlikelyToRaise(ControlFlowNode node) {
exists(node.getAnExceptionalSuccessor()) and
(
node.getNode() instanceof Name
or
exists(Scope s | s.getEntryNode() = node)
)
}
/**
* Holds if it is highly unlikely for control to flow from `node` to `succ`.
*/
predicate unlikelySuccessor(ControlFlowNode node, ControlFlowNode succ) {
// Exceptional edge where the raised type doesn't match the handler
unlikelyExceptionEdge(node, succ)
or
// Normal successor of a never-returning call
isCallToNeverReturningFunction(node) and
succ = node.getASuccessor() and
not succ = node.getAnExceptionalSuccessor() and
not succ.getNode() instanceof Yield
or
// Exception edge from a node that is unlikely to raise
unlikelyToRaise(node) and
succ = node.getAnExceptionalSuccessor()
or
// True branch of `if False:` or `if TYPE_CHECKING:`
isAlwaysFalseGuard(node) and
succ = node.getATrueSuccessor()
}
/**
* Holds if `node` is a condition that is always `False` at runtime.
* This covers `if False:` and `if typing.TYPE_CHECKING:`.
*/
private predicate isAlwaysFalseGuard(ControlFlowNode node) {
node.getNode() instanceof False
or
node =
API::moduleImport("typing")
.getMember("TYPE_CHECKING")
.getAValueReachableFromSource()
.asCfgNode()
}
private predicate startBbLikelyReachable(BasicBlock b) {
exists(Scope s | s.getEntryNode() = b.getNode(_))
or
exists(BasicBlock pred |
pred = b.getAPredecessor() and
endBbLikelyReachable(pred) and
not unlikelySuccessor(pred.getLastNode(), b)
)
}
private predicate endBbLikelyReachable(BasicBlock b) {
startBbLikelyReachable(b) and
not exists(ControlFlowNode p, ControlFlowNode s |
unlikelySuccessor(p, s) and
p = b.getNode(_) and
s = b.getNode(_) and
not p = b.getLastNode()
)
}
/**
* Holds if basic block `b` is likely to be reachable from the entry of its
* enclosing scope.
*/
predicate likelyReachable(BasicBlock b) { startBbLikelyReachable(b) }
/**
* Holds if it is unlikely that `node` can be reached during execution.
*/
predicate unlikelyReachable(ControlFlowNode node) {
not startBbLikelyReachable(node.getBasicBlock())
or
exists(BasicBlock b |
startBbLikelyReachable(b) and
not endBbLikelyReachable(b) and
exists(ControlFlowNode p, int i, int j |
unlikelySuccessor(p, _) and
p = b.getNode(i) and
node = b.getNode(j) and
i < j
)
)
}
/**
* Holds if `var` is an SSA variable that is implicitly defined (a builtin,
* VM-defined name, or `__path__` in a package init).
*/
private predicate implicitlyDefined(SsaVariable var) {
not exists(var.getDefinition()) and
not py_ssa_phi(var, _) and
exists(GlobalVariable gv | var.getVariable() = gv |
DuckTyping::globallyDefinedName(gv.getId())
or
gv.getId() = "__path__" and gv.getScope().(Module).isPackageInit()
)
}
/**
* Gets a phi input of `var`, pruned of unlikely edges.
*/
private SsaVariable getAPrunedPhiInput(SsaVariable var) {
result = var.getAPhiInput() and
exists(BasicBlock incoming | incoming = var.getPredecessorBlockForPhiArgument(result) |
not unlikelySuccessor(incoming.getLastNode(), var.getDefinition().getBasicBlock().firstNode())
)
}
/**
* Gets a predecessor block for a phi node, pruned of unlikely edges.
*/
private BasicBlock getAPrunedPredecessorBlockForPhi(SsaVariable var) {
result = var.getAPredecessorBlockForPhi() and
not unlikelySuccessor(result.getLastNode(), var.getDefinition().getBasicBlock().firstNode())
}
/**
* Holds if the SSA variable `var` may be undefined at some use.
*/
private predicate ssaMaybeUndefined(SsaVariable var) {
// No definition, not a phi, not implicitly defined
not exists(var.getDefinition()) and not py_ssa_phi(var, _) and not implicitlyDefined(var)
or
// Defined by a deletion
var.getDefinition().isDelete()
or
// A phi input may be undefined
exists(SsaVariable input | input = getAPrunedPhiInput(var) | ssaMaybeUndefined(input))
or
// A phi predecessor has no dominating definition
exists(BasicBlock incoming |
likelyReachable(incoming) and
incoming = getAPrunedPredecessorBlockForPhi(var) and
not var.getAPhiInput().getDefinition().getBasicBlock().dominates(incoming)
)
}
/**
* Holds if the name `u` may be undefined at its use.
*/
predicate maybeUndefined(Name u) {
exists(SsaVariable var | var.getAUse().getNode() = u | ssaMaybeUndefined(var))
}
}

View File

@@ -15,74 +15,7 @@
import python
import semmle.python.dataflow.new.internal.DataFlowDispatch
import semmle.python.ApiGraphs
import semmle.python.frameworks.data.internal.ApiGraphModels
predicate builtinException(string name) {
typeModel("builtins.BaseException~Subclass", "builtins." + name, "")
}
predicate builtinExceptionSubclass(string base, string sub) {
typeModel("builtins." + base + "~Subclass", "builtins." + sub, "")
}
newtype TExceptType =
TClass(Class c) or
TBuiltin(string name) { builtinException(name) }
class ExceptType extends TExceptType {
Class asClass() { this = TClass(result) }
string asBuiltinName() { this = TBuiltin(result) }
predicate isBuiltin() { this = TBuiltin(_) }
string getName() {
result = this.asClass().getName()
or
result = this.asBuiltinName()
}
string toString() { result = this.getName() }
DataFlow::Node getAUse() {
result = classTracker(this.asClass())
or
API::builtin(this.asBuiltinName()).asSource().flowsTo(result)
}
ExceptType getADirectSuperclass() {
result.asClass() = getADirectSuperclass(this.asClass())
or
result.isBuiltin() and
result.getAUse().asExpr() = this.asClass().getABase()
or
builtinExceptionSubclass(result.asBuiltinName(), this.asBuiltinName()) and
this != result
}
/**
* Holds if this element is at the specified location.
* The location spans column `startColumn` of line `startLine` to
* column `endColumn` of line `endLine` in file `filepath`.
* For more information, see
* [Providing locations in CodeQL queries](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
predicate hasLocationInfo(
string filePath, int startLine, int startColumn, int endLine, int endColumn
) {
this.asClass()
.getLocation()
.hasLocationInfo(filePath, startLine, startColumn, endLine, endColumn)
or
this.isBuiltin() and
filePath = "" and
startLine = 0 and
startColumn = 0 and
endLine = 0 and
endColumn = 0
}
}
private import ExceptionTypes
predicate incorrectExceptOrder(ExceptStmt ex1, ExceptType cls1, ExceptStmt ex2, ExceptType cls2) {
exists(int i, int j, Try t |

View File

@@ -12,48 +12,16 @@
*/
import python
private import semmle.python.dataflow.new.internal.DataFlowDispatch
private import semmle.python.ApiGraphs
private import LegacyPointsTo
/**
* Holds if `cls_arg` references a known iterable builtin type, either directly
* (e.g. `list`) or as an element of a tuple (e.g. `(list, tuple)`).
*/
private predicate isIterableTypeArg(DataFlow::Node cls_arg) {
cls_arg =
API::builtin([
"list", "tuple", "set", "frozenset", "dict", "str", "bytes", "bytearray", "range",
"memoryview"
]).getAValueReachableFromSource()
or
isIterableTypeArg(DataFlow::exprNode(cls_arg.asExpr().(Tuple).getAnElt()))
}
/**
* Holds if `iter` is guarded by an `isinstance` check that tests for
* an iterable type (e.g. `list`, `tuple`, `set`, `dict`).
*/
predicate guardedByIsinstanceIterable(DataFlow::Node iter) {
exists(
DataFlow::GuardNode guard, DataFlow::CallCfgNode isinstance_call, DataFlow::LocalSourceNode src
|
isinstance_call = API::builtin("isinstance").getACall() and
src.flowsTo(isinstance_call.getArg(0)) and
src.flowsTo(iter) and
isIterableTypeArg(isinstance_call.getArg(1)) and
guard = isinstance_call.asCfgNode() and
guard.controlsBlock(iter.asCfgNode().getBasicBlock(), true)
)
}
from For loop, DataFlow::Node iter, Class cls
from For loop, ControlFlowNodeWithPointsTo iter, Value v, ClassValue t, ControlFlowNode origin
where
iter.asExpr() = loop.getIter() and
iter = classInstanceTracker(cls) and
not DuckTyping::isIterable(cls) and
not DuckTyping::isDescriptor(cls) and
not (loop.isAsync() and DuckTyping::hasMethod(cls, "__aiter__")) and
not DuckTyping::hasUnresolvedBase(getADirectSuperclass*(cls)) and
not guardedByIsinstanceIterable(iter)
select loop, "This for-loop may attempt to iterate over a $@ of class $@.", iter.asExpr(),
"non-iterable instance", cls, cls.getName()
loop.getIter().getAFlowNode() = iter and
iter.pointsTo(_, v, origin) and
v.getClass() = t and
not t.isIterable() and
not t.failedInference(_) and
not v = Value::named("None") and
not t.isDescriptorType()
select loop, "This for-loop may attempt to iterate over a $@ of class $@.", origin,
"non-iterable instance", t, t.getName()

View File

@@ -1 +1,2 @@
| async_iterator.py:26:11:26:34 | For | This for-loop may attempt to iterate over a $@ of class $@. | async_iterator.py:26:20:26:33 | MissingAiter() | non-iterable instance | async_iterator.py:13:1:13:19 | Class MissingAiter | MissingAiter |
| async_iterator.py:26:11:26:34 | For | This for-loop may attempt to iterate over a $@ of class $@. | async_iterator.py:26:20:26:33 | ControlFlowNode for MissingAiter() | non-iterable instance | async_iterator.py:13:1:13:19 | class MissingAiter | MissingAiter |
| statements_test.py:34:5:34:19 | For | This for-loop may attempt to iterate over a $@ of class $@. | statements_test.py:34:18:34:18 | ControlFlowNode for IntegerLiteral | non-iterable instance | file://:0:0:0:0 | builtin-class int | int |

View File

@@ -1 +1 @@
| test.py:50:1:50:23 | For | This for-loop may attempt to iterate over a $@ of class $@. | test.py:50:10:50:22 | NonIterator() | non-iterable instance | test.py:45:1:45:26 | Class NonIterator | NonIterator |
| test.py:50:1:50:23 | For | This for-loop may attempt to iterate over a $@ of class $@. | test.py:50:10:50:22 | ControlFlowNode for NonIterator() | non-iterable instance | test.py:45:1:45:26 | class NonIterator | NonIterator |

View File

@@ -174,24 +174,3 @@ def assert_ok(seq):
# False positive. ODASA-8042. Fixed in PR #2401.
class false_positive:
e = (x for x in [])
# isinstance guard should suppress non-iterable warning
def guarded_iteration(x):
ni = NonIterator()
if isinstance(ni, (list, tuple)):
for item in ni:
pass
def guarded_iteration_single(x):
ni = NonIterator()
if isinstance(ni, list):
for item in ni:
pass
# Negated isinstance guard: early return when NOT iterable
def guarded_iteration_negated(x):
ni = NonIterator()
if not isinstance(ni, list):
return
for item in ni: # OK: guarded by negated isinstance + early return
pass