From f4841e1f391e29200e30c8db3f0b29c27ab97e84 Mon Sep 17 00:00:00 2001 From: Taus Date: Thu, 19 Mar 2026 15:33:04 +0000 Subject: [PATCH] Python: Use API graphs instead of points-to for simple built-ins Also extends the list of known built-ins slightly, to add some that were missing. --- .../python/dataflow/new/internal/Builtins.qll | 8 +++--- python/ql/src/Expressions/UseofApply.ql | 9 ++++--- python/ql/src/Imports/DeprecatedModule.ql | 4 +-- .../ql/src/Statements/SideEffectInAssert.ql | 12 ++++----- python/ql/src/Statements/UnnecessaryDelete.ql | 8 +++--- .../SuspiciousUnusedLoopIterationVariable.ql | 27 +++++++------------ 6 files changed, 32 insertions(+), 36 deletions(-) diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/Builtins.qll b/python/ql/lib/semmle/python/dataflow/new/internal/Builtins.qll index 6a66d241083..764af5d9dc5 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/Builtins.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/Builtins.qll @@ -32,7 +32,9 @@ module Builtins { "UnicodeDecodeError", "UnicodeEncodeError", "UnicodeError", "UnicodeTranslateError", "UnicodeWarning", "UserWarning", "ValueError", "Warning", "ZeroDivisionError", // Added for compatibility - "exec" + "exec", + // Added by the `site` module (available by default unless `-S` is used) + "copyright", "credits", "exit", "quit" ] or // Built-in constants shared between Python 2 and 3 @@ -51,8 +53,8 @@ module Builtins { or // Python 2 only result in [ - "basestring", "cmp", "execfile", "file", "long", "raw_input", "reduce", "reload", "unichr", - "unicode", "xrange" + "apply", "basestring", "cmp", "execfile", "file", "long", "raw_input", "reduce", "reload", + "unichr", "unicode", "xrange" ] } diff --git a/python/ql/src/Expressions/UseofApply.ql b/python/ql/src/Expressions/UseofApply.ql index 2012f2d9361..f1068eca837 100644 --- a/python/ql/src/Expressions/UseofApply.ql +++ b/python/ql/src/Expressions/UseofApply.ql @@ -10,9 +10,10 @@ */ import python -private import LegacyPointsTo -private import semmle.python.types.Builtins +private import semmle.python.ApiGraphs -from CallNode call, ControlFlowNodeWithPointsTo func -where major_version() = 2 and call.getFunction() = func and func.pointsTo(Value::named("apply")) +from CallNode call +where + major_version() = 2 and + call = API::builtin("apply").getACall().asCfgNode() select call, "Call to the obsolete builtin function 'apply'." diff --git a/python/ql/src/Imports/DeprecatedModule.ql b/python/ql/src/Imports/DeprecatedModule.ql index 3d529943fb8..62d77231334 100644 --- a/python/ql/src/Imports/DeprecatedModule.ql +++ b/python/ql/src/Imports/DeprecatedModule.ql @@ -11,7 +11,7 @@ */ import python -private import LegacyPointsTo +private import semmle.python.ApiGraphs /** * Holds if the module `name` was deprecated in Python version `major`.`minor`, @@ -80,7 +80,7 @@ where name = imp.getName() and deprecated_module(name, instead, _, _) and not exists(Try try, ExceptStmt except | except = try.getAHandler() | - except.getType().(ExprWithPointsTo).pointsTo(ClassValue::importError()) and + except.getType() = API::builtin("ImportError").getAValueReachableFromSource().asExpr() and except.containsInScope(imp) ) select imp, deprecation_message(name) + replacement_message(name) diff --git a/python/ql/src/Statements/SideEffectInAssert.ql b/python/ql/src/Statements/SideEffectInAssert.ql index 902b6c4c9c1..7ac96030c04 100644 --- a/python/ql/src/Statements/SideEffectInAssert.ql +++ b/python/ql/src/Statements/SideEffectInAssert.ql @@ -13,7 +13,7 @@ */ import python -private import LegacyPointsTo +private import semmle.python.ApiGraphs predicate func_with_side_effects(Expr e) { exists(string name | name = e.(Attribute).getName() or name = e.(Name).getId() | @@ -24,11 +24,11 @@ predicate func_with_side_effects(Expr e) { } predicate call_with_side_effect(Call e) { - e.getAFlowNode() = Value::named("subprocess.call").getACall() - or - e.getAFlowNode() = Value::named("subprocess.check_call").getACall() - or - e.getAFlowNode() = Value::named("subprocess.check_output").getACall() + e.getAFlowNode() = + API::moduleImport("subprocess") + .getMember(["call", "check_call", "check_output"]) + .getACall() + .asCfgNode() } predicate probable_side_effect(Expr e) { diff --git a/python/ql/src/Statements/UnnecessaryDelete.ql b/python/ql/src/Statements/UnnecessaryDelete.ql index c7b80ecc66a..da239f81405 100644 --- a/python/ql/src/Statements/UnnecessaryDelete.ql +++ b/python/ql/src/Statements/UnnecessaryDelete.ql @@ -13,7 +13,7 @@ */ import python -private import LegacyPointsTo +private import semmle.python.ApiGraphs predicate isInsideLoop(AstNode node) { node.getParentNode() instanceof While @@ -33,9 +33,9 @@ where not isInsideLoop(del) and // False positive: calling `sys.exc_info` within a function results in a // reference cycle, and an explicit call to `del` helps break this cycle. - not exists(FunctionValue ex | - ex = Value::named("sys.exc_info") and - ex.getACall().getScope() = f + not exists(API::CallNode call | + call = API::moduleImport("sys").getMember("exc_info").getACall() and + call.getScope() = f ) select del, "Unnecessary deletion of local variable $@ in function $@.", e, e.toString(), f, f.getName() diff --git a/python/ql/src/Variables/SuspiciousUnusedLoopIterationVariable.ql b/python/ql/src/Variables/SuspiciousUnusedLoopIterationVariable.ql index 87900c48fc5..18c83240667 100644 --- a/python/ql/src/Variables/SuspiciousUnusedLoopIterationVariable.ql +++ b/python/ql/src/Variables/SuspiciousUnusedLoopIterationVariable.ql @@ -12,7 +12,7 @@ */ import python -private import LegacyPointsTo +private import semmle.python.ApiGraphs import Definition predicate is_increment(Stmt s) { @@ -41,23 +41,16 @@ predicate one_item_only(For f) { ) } -predicate points_to_call_to_range(ControlFlowNode f) { - /* (x)range is a function in Py2 and a class in Py3, so we must treat it as a plain object */ - exists(Value range | - range = Value::named("range") or - range = Value::named("xrange") - | - f = range.getACall() - ) +/** Holds if `node` is a call to `range`, `xrange`, or `list(range(...))`. */ +predicate call_to_range(DataFlow::Node node) { + node = API::builtin(["range", "xrange"]).getACall() or - /* In case points-to fails due to 'from six.moves import range' or similar. */ - exists(string range | f.getNode().(Call).getFunc().(Name).getId() = range | - range = "range" or range = "xrange" - ) + /* Handle 'from six.moves import range' or similar. */ + node = API::moduleImport("six").getMember("moves").getMember(["range", "xrange"]).getACall() or /* Handle list(range(...)) and list(list(range(...))) */ - f.(CallNode).(ControlFlowNodeWithPointsTo).pointsTo().getClass() = ClassValue::list() and - points_to_call_to_range(f.(CallNode).getArg(0)) + node = API::builtin("list").getACall() and + call_to_range(node.(DataFlow::CallCfgNode).getArg(0)) } /** Whether n is a use of a variable that is a not effectively a constant. */ @@ -102,8 +95,8 @@ from For f, Variable v, string msg where f.getTarget() = v.getAnAccess() and not f.getAStmt().contains(v.getAnAccess()) and - not points_to_call_to_range(f.getIter().getAFlowNode()) and - not points_to_call_to_range(get_comp_iterable(f)) and + not call_to_range(DataFlow::exprNode(f.getIter())) and + not call_to_range(DataFlow::exprNode(get_comp_iterable(f).getNode())) and not name_acceptable_for_unused_variable(v) and not f.getScope().getName() = "genexpr" and not empty_loop(f) and