Python: Use API graphs instead of points-to for simple built-ins

Also extends the list of known built-ins slightly, to add some that were
missing.
This commit is contained in:
Taus
2026-03-19 15:33:04 +00:00
parent 7a33e2f539
commit f4841e1f39
6 changed files with 32 additions and 36 deletions

View File

@@ -32,7 +32,9 @@ module Builtins {
"UnicodeDecodeError", "UnicodeEncodeError", "UnicodeError", "UnicodeTranslateError",
"UnicodeWarning", "UserWarning", "ValueError", "Warning", "ZeroDivisionError",
// Added for compatibility
"exec"
"exec",
// Added by the `site` module (available by default unless `-S` is used)
"copyright", "credits", "exit", "quit"
]
or
// Built-in constants shared between Python 2 and 3
@@ -51,8 +53,8 @@ module Builtins {
or
// Python 2 only
result in [
"basestring", "cmp", "execfile", "file", "long", "raw_input", "reduce", "reload", "unichr",
"unicode", "xrange"
"apply", "basestring", "cmp", "execfile", "file", "long", "raw_input", "reduce", "reload",
"unichr", "unicode", "xrange"
]
}

View File

@@ -10,9 +10,10 @@
*/
import python
private import LegacyPointsTo
private import semmle.python.types.Builtins
private import semmle.python.ApiGraphs
from CallNode call, ControlFlowNodeWithPointsTo func
where major_version() = 2 and call.getFunction() = func and func.pointsTo(Value::named("apply"))
from CallNode call
where
major_version() = 2 and
call = API::builtin("apply").getACall().asCfgNode()
select call, "Call to the obsolete builtin function 'apply'."

View File

@@ -11,7 +11,7 @@
*/
import python
private import LegacyPointsTo
private import semmle.python.ApiGraphs
/**
* Holds if the module `name` was deprecated in Python version `major`.`minor`,
@@ -80,7 +80,7 @@ where
name = imp.getName() and
deprecated_module(name, instead, _, _) and
not exists(Try try, ExceptStmt except | except = try.getAHandler() |
except.getType().(ExprWithPointsTo).pointsTo(ClassValue::importError()) and
except.getType() = API::builtin("ImportError").getAValueReachableFromSource().asExpr() and
except.containsInScope(imp)
)
select imp, deprecation_message(name) + replacement_message(name)

View File

@@ -13,7 +13,7 @@
*/
import python
private import LegacyPointsTo
private import semmle.python.ApiGraphs
predicate func_with_side_effects(Expr e) {
exists(string name | name = e.(Attribute).getName() or name = e.(Name).getId() |
@@ -24,11 +24,11 @@ predicate func_with_side_effects(Expr e) {
}
predicate call_with_side_effect(Call e) {
e.getAFlowNode() = Value::named("subprocess.call").getACall()
or
e.getAFlowNode() = Value::named("subprocess.check_call").getACall()
or
e.getAFlowNode() = Value::named("subprocess.check_output").getACall()
e.getAFlowNode() =
API::moduleImport("subprocess")
.getMember(["call", "check_call", "check_output"])
.getACall()
.asCfgNode()
}
predicate probable_side_effect(Expr e) {

View File

@@ -13,7 +13,7 @@
*/
import python
private import LegacyPointsTo
private import semmle.python.ApiGraphs
predicate isInsideLoop(AstNode node) {
node.getParentNode() instanceof While
@@ -33,9 +33,9 @@ where
not isInsideLoop(del) and
// False positive: calling `sys.exc_info` within a function results in a
// reference cycle, and an explicit call to `del` helps break this cycle.
not exists(FunctionValue ex |
ex = Value::named("sys.exc_info") and
ex.getACall().getScope() = f
not exists(API::CallNode call |
call = API::moduleImport("sys").getMember("exc_info").getACall() and
call.getScope() = f
)
select del, "Unnecessary deletion of local variable $@ in function $@.", e, e.toString(), f,
f.getName()

View File

@@ -12,7 +12,7 @@
*/
import python
private import LegacyPointsTo
private import semmle.python.ApiGraphs
import Definition
predicate is_increment(Stmt s) {
@@ -41,23 +41,16 @@ predicate one_item_only(For f) {
)
}
predicate points_to_call_to_range(ControlFlowNode f) {
/* (x)range is a function in Py2 and a class in Py3, so we must treat it as a plain object */
exists(Value range |
range = Value::named("range") or
range = Value::named("xrange")
|
f = range.getACall()
)
/** Holds if `node` is a call to `range`, `xrange`, or `list(range(...))`. */
predicate call_to_range(DataFlow::Node node) {
node = API::builtin(["range", "xrange"]).getACall()
or
/* In case points-to fails due to 'from six.moves import range' or similar. */
exists(string range | f.getNode().(Call).getFunc().(Name).getId() = range |
range = "range" or range = "xrange"
)
/* Handle 'from six.moves import range' or similar. */
node = API::moduleImport("six").getMember("moves").getMember(["range", "xrange"]).getACall()
or
/* Handle list(range(...)) and list(list(range(...))) */
f.(CallNode).(ControlFlowNodeWithPointsTo).pointsTo().getClass() = ClassValue::list() and
points_to_call_to_range(f.(CallNode).getArg(0))
node = API::builtin("list").getACall() and
call_to_range(node.(DataFlow::CallCfgNode).getArg(0))
}
/** Whether n is a use of a variable that is a not effectively a constant. */
@@ -102,8 +95,8 @@ from For f, Variable v, string msg
where
f.getTarget() = v.getAnAccess() and
not f.getAStmt().contains(v.getAnAccess()) and
not points_to_call_to_range(f.getIter().getAFlowNode()) and
not points_to_call_to_range(get_comp_iterable(f)) and
not call_to_range(DataFlow::exprNode(f.getIter())) and
not call_to_range(DataFlow::exprNode(get_comp_iterable(f).getNode())) and
not name_acceptable_for_unused_variable(v) and
not f.getScope().getName() = "genexpr" and
not empty_loop(f) and