mirror of
https://github.com/github/codeql.git
synced 2025-12-16 16:53:25 +01:00
Merge pull request #19165 from joefarebrother/python-qual-loop-var-capture
Python: Modernize the Loop Variable Capture query
This commit is contained in:
@@ -1,18 +0,0 @@
|
||||
|
||||
#Make a list of functions to increment their arguments by 0 to 9.
|
||||
def make_incrementers():
|
||||
result = []
|
||||
for i in range(10):
|
||||
def incrementer(x):
|
||||
return x + i
|
||||
result.append(incrementer)
|
||||
return result
|
||||
|
||||
#This will fail
|
||||
def test():
|
||||
incs = make_incrementers()
|
||||
for x in range(10):
|
||||
for y in range(10):
|
||||
assert incs[x](y) == x+y
|
||||
|
||||
test()
|
||||
@@ -1,60 +0,0 @@
|
||||
<!DOCTYPE qhelp PUBLIC
|
||||
"-//Semmle//qhelp//EN"
|
||||
"qhelp.dtd">
|
||||
<qhelp>
|
||||
|
||||
<overview>
|
||||
<p>
|
||||
Nested functions are a useful feature of Python as it allows a function to access the variables of its enclosing function.
|
||||
However, the programmer needs to be aware that when an inner function accesses a variable in an outer scope,
|
||||
it is the variable that is captured, not the value of that variable.
|
||||
</p>
|
||||
<p>
|
||||
Therefore, care must be taken when the captured variable is a loop variable, since it is the loop <em>variable</em> and
|
||||
<em>not</em> the <em>value</em> of that variable that is captured.
|
||||
This will mean that by the time that the inner function executes,
|
||||
the loop variable will have its final value, not the value when the inner function was created.
|
||||
</p>
|
||||
|
||||
</overview>
|
||||
<recommendation>
|
||||
<p>
|
||||
The simplest way to fix this problem is to add a local variable of the same name as the outer variable and initialize that
|
||||
using the outer variable as a default.
|
||||
<code>
|
||||
for var in seq:
|
||||
...
|
||||
def inner_func(arg):
|
||||
...
|
||||
use(var)
|
||||
</code>
|
||||
becomes
|
||||
<code>
|
||||
for var in seq:
|
||||
...
|
||||
def inner_func(arg, var=var):
|
||||
...
|
||||
use(var)
|
||||
</code>
|
||||
</p>
|
||||
|
||||
</recommendation>
|
||||
<example>
|
||||
<p>
|
||||
In this example, a list of functions is created which should each increment its argument by its index in the list.
|
||||
However, since <code>i</code> will be 9 when the functions execute, they will each increment their argument by 9.
|
||||
</p>
|
||||
<sample src="LoopVariableCapture.py" />
|
||||
<p>
|
||||
This can be fixed by adding the default value as shown below. The default value is computed when the function is created, so the desired effect is achieved.
|
||||
</p>
|
||||
|
||||
<sample src="LoopVariableCapture2.py" />
|
||||
|
||||
</example>
|
||||
<references>
|
||||
<li>The Hitchhiker’s Guide to Python: <a href="http://docs.python-guide.org/en/latest/writing/gotchas/#late-binding-closures">Late Binding Closures</a></li>
|
||||
<li>Python Language Reference: <a href="https://docs.python.org/reference/executionmodel.html#naming-and-binding">Naming and binding</a></li>
|
||||
|
||||
</references>
|
||||
</qhelp>
|
||||
@@ -1,47 +0,0 @@
|
||||
/**
|
||||
* @name Loop variable capture
|
||||
* @description Capture of a loop variable is not the same as capturing the value of a loop variable, and may be erroneous.
|
||||
* @kind problem
|
||||
* @tags correctness
|
||||
* @problem.severity error
|
||||
* @sub-severity low
|
||||
* @precision high
|
||||
* @id py/loop-variable-capture
|
||||
*/
|
||||
|
||||
import python
|
||||
|
||||
// Gets the scope of the iteration variable of the looping scope
|
||||
Scope iteration_variable_scope(AstNode loop) {
|
||||
result = loop.(For).getScope()
|
||||
or
|
||||
result = loop.(Comp).getFunction()
|
||||
}
|
||||
|
||||
predicate capturing_looping_construct(CallableExpr capturing, AstNode loop, Variable var) {
|
||||
var.getScope() = iteration_variable_scope(loop) and
|
||||
var.getAnAccess().getScope() = capturing.getInnerScope() and
|
||||
capturing.getParentNode+() = loop and
|
||||
(
|
||||
loop.(For).getTarget() = var.getAnAccess()
|
||||
or
|
||||
var = loop.(Comp).getAnIterationVariable()
|
||||
)
|
||||
}
|
||||
|
||||
predicate escaping_capturing_looping_construct(CallableExpr capturing, AstNode loop, Variable var) {
|
||||
capturing_looping_construct(capturing, loop, var) and
|
||||
// Escapes if used out side of for loop or is a lambda in a comprehension
|
||||
(
|
||||
loop instanceof For and
|
||||
exists(Expr e | e.pointsTo(_, _, capturing) | not loop.contains(e))
|
||||
or
|
||||
loop.(Comp).getElt() = capturing
|
||||
or
|
||||
loop.(Comp).getElt().(Tuple).getAnElt() = capturing
|
||||
)
|
||||
}
|
||||
|
||||
from CallableExpr capturing, AstNode loop, Variable var
|
||||
where escaping_capturing_looping_construct(capturing, loop, var)
|
||||
select capturing, "Capture of loop variable $@.", loop, var.getId()
|
||||
@@ -0,0 +1,47 @@
|
||||
<!DOCTYPE qhelp PUBLIC
|
||||
"-//Semmle//qhelp//EN"
|
||||
"qhelp.dtd">
|
||||
<qhelp>
|
||||
|
||||
<overview>
|
||||
<p>
|
||||
In Python, a nested function or lambda expression that captures a variable from its surrounding scope is a <em>late-binding</em> closure,
|
||||
meaning that the value of the variable is determined when the closure is called, not when it is created.
|
||||
</p>
|
||||
<p>
|
||||
Care must be taken when the captured variable is a loop variable. If the closure is called after the loop ends, it will use the value of the variable on the last iteration of the loop, rather than the value at the iteration at which it was created.
|
||||
</p>
|
||||
|
||||
</overview>
|
||||
<recommendation>
|
||||
<p>
|
||||
Ensure that closures that capture loop variables aren't used outside of a single iteration of the loop.
|
||||
To capture the value of a loop variable at the time the closure is created, use a default parameter, or <code>functools.partial</code>.
|
||||
</p>
|
||||
|
||||
</recommendation>
|
||||
<example>
|
||||
<p>
|
||||
In the following (BAD) example, a <code>tasks</code> list is created, but each task captures the loop variable <code>i</code>, and reads the same value when run.
|
||||
</p>
|
||||
<sample src="examples/bad.py" />
|
||||
<p>
|
||||
In the following (GOOD) example, each closure has an <code>i</code> default parameter, shadowing the outer <code>i</code> variable, the default value of which is determined as the value of the loop variable <code>i</code> at the time the closure is created.
|
||||
</p>
|
||||
<sample src="examples/good.py" />
|
||||
<p>
|
||||
In the following (GOOD) example, <code>functools.partial</code> is used to partially evaluate the lambda expression with the value of <code>i</code>.
|
||||
</p>
|
||||
<sample src="examples/good2.py" />
|
||||
|
||||
|
||||
|
||||
</example>
|
||||
<references>
|
||||
<li>The Hitchhiker's Guide to Python: <a href="http://docs.python-guide.org/en/latest/writing/gotchas/#late-binding-closures">Late Binding Closures</a>.</li>
|
||||
<li>Python Language Reference: <a href="https://docs.python.org/reference/executionmodel.html#naming-and-binding">Naming and binding</a>.</li>
|
||||
<li>Stack Overflow: <a href="https://stackoverflow.com/questions/3431676/creating-functions-or-lambdas-in-a-loop-or-comprehension">Creating functions (or lambdas) in a loop (or comprehension)</a>.</li>
|
||||
<li>Python Language Reference: <a href="https://docs.python.org/3/library/functools.html#functools.partial">functools.partial</a>.</li>
|
||||
|
||||
</references>
|
||||
</qhelp>
|
||||
@@ -0,0 +1,25 @@
|
||||
/**
|
||||
* @name Loop variable capture
|
||||
* @description Capturing a loop variable is not the same as capturing its value, and can lead to unexpected behavior or bugs.
|
||||
* @kind path-problem
|
||||
* @tags correctness
|
||||
* quality
|
||||
* @problem.severity error
|
||||
* @sub-severity low
|
||||
* @precision high
|
||||
* @id py/loop-variable-capture
|
||||
*/
|
||||
|
||||
import python
|
||||
import LoopVariableCaptureQuery
|
||||
import EscapingCaptureFlow::PathGraph
|
||||
|
||||
from
|
||||
CallableExpr capturing, AstNode loop, Variable var, string descr,
|
||||
EscapingCaptureFlow::PathNode source, EscapingCaptureFlow::PathNode sink
|
||||
where
|
||||
escapingCapture(capturing, loop, var, source, sink) and
|
||||
if capturing instanceof Lambda then descr = "lambda" else descr = "function"
|
||||
select capturing, source, sink,
|
||||
"This " + descr + " captures the loop variable $@, and may escape the loop by being stored at $@.",
|
||||
loop, var.getId(), sink, "this location"
|
||||
@@ -0,0 +1,81 @@
|
||||
/** Definitions for reasoning about loop variable capture issues. */
|
||||
|
||||
import python
|
||||
import semmle.python.dataflow.new.DataFlow
|
||||
|
||||
/** A looping construct. */
|
||||
abstract class Loop extends AstNode {
|
||||
/**
|
||||
* Gets a loop variable of this loop.
|
||||
* For example, `x` and `y` in `for x,y in pairs: print(x+y)`
|
||||
*/
|
||||
abstract Variable getALoopVariable();
|
||||
}
|
||||
|
||||
/** A `for` loop. */
|
||||
private class ForLoop extends Loop, For {
|
||||
override Variable getALoopVariable() {
|
||||
this.getTarget() = result.getAnAccess().getParentNode*() and
|
||||
result.getScope() = this.getScope()
|
||||
}
|
||||
}
|
||||
|
||||
/** Holds if the callable `capturing` captures the variable `var` from the loop `loop`. */
|
||||
predicate capturesLoopVariable(CallableExpr capturing, Loop loop, Variable var) {
|
||||
var.getAnAccess().getScope() = capturing.getInnerScope() and
|
||||
capturing.getParentNode+() = loop and
|
||||
var = loop.getALoopVariable()
|
||||
}
|
||||
|
||||
/** Dataflow configuration for reasoning about callables that capture a loop variable and then may escape from the loop. */
|
||||
module EscapingCaptureFlowConfig implements DataFlow::ConfigSig {
|
||||
predicate isSource(DataFlow::Node node) { capturesLoopVariable(node.asExpr(), _, _) }
|
||||
|
||||
predicate isSink(DataFlow::Node node) {
|
||||
// Stored in a dict/list.
|
||||
exists(Assign assign, Subscript sub |
|
||||
sub = assign.getATarget() and node.asExpr() = assign.getValue()
|
||||
)
|
||||
or
|
||||
// Stored in a list.
|
||||
exists(DataFlow::MethodCallNode mc | mc.calls(_, "append") and node = mc.getArg(0))
|
||||
or
|
||||
// Used in a yield statement, likely included in a collection.
|
||||
// The element of comprehension expressions desugar to involve a yield statement internally.
|
||||
exists(Yield y | node.asExpr() = y.getValue())
|
||||
// Checks for storing in a field leads to false positives, so are omitted.
|
||||
}
|
||||
|
||||
predicate isBarrierOut(DataFlow::Node node) { isSink(node) }
|
||||
|
||||
predicate isBarrier(DataFlow::Node node) {
|
||||
// Incorrect virtual dispatch to __call__ methods is a source of FPs.
|
||||
exists(Function call |
|
||||
call.getName() = "__call__" and
|
||||
call.getArg(0) = node.(DataFlow::ParameterNode).getParameter()
|
||||
)
|
||||
}
|
||||
|
||||
predicate allowImplicitRead(DataFlow::Node node, DataFlow::ContentSet cs) {
|
||||
isSink(node) and
|
||||
(
|
||||
cs instanceof DataFlow::TupleElementContent or
|
||||
cs instanceof DataFlow::ListElementContent or
|
||||
cs instanceof DataFlow::SetElementContent or
|
||||
cs instanceof DataFlow::DictionaryElementAnyContent
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/** Dataflow for reasoning about callables that capture a loop variable and then escape from the loop. */
|
||||
module EscapingCaptureFlow = DataFlow::Global<EscapingCaptureFlowConfig>;
|
||||
|
||||
/** Holds if `capturing` is a callable that captures the variable `var` of the loop `loop`, and then may escape the loop via a flow path from `source` to `sink`. */
|
||||
predicate escapingCapture(
|
||||
CallableExpr capturing, Loop loop, Variable var, EscapingCaptureFlow::PathNode source,
|
||||
EscapingCaptureFlow::PathNode sink
|
||||
) {
|
||||
capturesLoopVariable(capturing, loop, var) and
|
||||
capturing = source.getNode().asExpr() and
|
||||
EscapingCaptureFlow::flowPath(source, sink)
|
||||
}
|
||||
@@ -0,0 +1,8 @@
|
||||
# BAD: The loop variable `i` is captured.
|
||||
tasks = []
|
||||
for i in range(5):
|
||||
tasks.append(lambda: print(i))
|
||||
|
||||
# This will print `4,4,4,4,4`, rather than `0,1,2,3,4` as likely intended.
|
||||
for t in tasks:
|
||||
t()
|
||||
@@ -0,0 +1,8 @@
|
||||
# GOOD: A default parameter is used, so the variable `i` is not being captured.
|
||||
tasks = []
|
||||
for i in range(5):
|
||||
tasks.append(lambda i=i: print(i))
|
||||
|
||||
# This will print `0,1,2,3,4``.
|
||||
for t in tasks:
|
||||
t()
|
||||
@@ -0,0 +1,9 @@
|
||||
import functools
|
||||
# GOOD: `functools.partial` takes care of capturing the _value_ of `i`.
|
||||
tasks = []
|
||||
for i in range(5):
|
||||
tasks.append(functools.partial(lambda i: print(i), i))
|
||||
|
||||
# This will print `0,1,2,3,4``.
|
||||
for t in tasks:
|
||||
t()
|
||||
@@ -1,8 +0,0 @@
|
||||
| test.py:5:9:5:20 | FunctionExpr | Capture of loop variable $@. | test.py:4:5:4:23 | For | x |
|
||||
| test.py:10:6:10:14 | Lambda | Capture of loop variable $@. | test.py:10:5:10:36 | ListComp | i |
|
||||
| test.py:42:6:42:14 | Lambda | Capture of loop variable $@. | test.py:42:5:42:56 | ListComp | i |
|
||||
| test.py:43:6:43:14 | Lambda | Capture of loop variable $@. | test.py:43:5:43:56 | ListComp | j |
|
||||
| test.py:45:6:45:14 | Lambda | Capture of loop variable $@. | test.py:45:5:45:36 | SetComp | i |
|
||||
| test.py:49:8:49:16 | Lambda | Capture of loop variable $@. | test.py:49:5:49:38 | DictComp | i |
|
||||
| test.py:57:6:57:14 | Lambda | Capture of loop variable $@. | test.py:57:6:57:35 | GeneratorExp | i |
|
||||
| test.py:62:10:62:18 | Lambda | Capture of loop variable $@. | test.py:62:10:62:39 | GeneratorExp | i |
|
||||
@@ -1 +0,0 @@
|
||||
Variables/LoopVariableCapture.ql
|
||||
@@ -0,0 +1,19 @@
|
||||
import python
|
||||
import Variables.LoopVariableCapture.LoopVariableCaptureQuery
|
||||
import utils.test.InlineExpectationsTest
|
||||
|
||||
module MethodArgTest implements TestSig {
|
||||
string getARelevantTag() { result = "capturedVar" }
|
||||
|
||||
predicate hasActualResult(Location location, string element, string tag, string value) {
|
||||
exists(CallableExpr capturing, Variable var |
|
||||
escapingCapture(capturing, _, var, _, _) and
|
||||
element = capturing.toString() and
|
||||
location = capturing.getLocation() and
|
||||
tag = "capturedVar" and
|
||||
value = var.getId()
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
import MakeTest<MethodArgTest>
|
||||
@@ -2,12 +2,12 @@
|
||||
def bad1():
|
||||
results = []
|
||||
for x in range(10):
|
||||
def inner():
|
||||
def inner(): # $capturedVar=x
|
||||
return x
|
||||
results.append(inner)
|
||||
results.append(inner)
|
||||
return results
|
||||
|
||||
a = [lambda: i for i in range(1, 4)]
|
||||
a = [lambda: i for i in range(1, 4)] # $capturedVar=i
|
||||
for f in a:
|
||||
print(f())
|
||||
|
||||
@@ -18,7 +18,14 @@ def good1():
|
||||
for y in range(10):
|
||||
def inner(y=y):
|
||||
return y
|
||||
results.append(inner)
|
||||
results.append(inner)
|
||||
return results
|
||||
|
||||
# OK: Using default argument.
|
||||
def good2():
|
||||
results = []
|
||||
for y in range(10):
|
||||
results.append(lambda y=y: y)
|
||||
return results
|
||||
|
||||
#Factory function
|
||||
@@ -39,14 +46,14 @@ def ok1():
|
||||
result += inner()
|
||||
return result
|
||||
|
||||
b = [lambda: i for i in range(1, 4) for j in range(1,5)]
|
||||
c = [lambda: j for i in range(1, 4) for j in range(1,5)]
|
||||
b = [lambda: i for i in range(1, 4) for j in range(1,5)] # $capturedVar=i
|
||||
c = [lambda: j for i in range(1, 4) for j in range(1,5)] # $capturedVar=j
|
||||
|
||||
s = {lambda: i for i in range(1, 4)}
|
||||
s = {lambda: i for i in range(1, 4)} # $capturedVar=i
|
||||
for f in s:
|
||||
print(f())
|
||||
|
||||
d = {i:lambda: i for i in range(1, 4)}
|
||||
d = {i:lambda: i for i in range(1, 4)} # $capturedVar=i
|
||||
for k, f in d.items():
|
||||
print(k, f())
|
||||
|
||||
@@ -54,14 +61,15 @@ for k, f in d.items():
|
||||
#When the captured variable is used.
|
||||
#So technically this is a false positive, but it is extremely fragile
|
||||
#code, so I (Mark) think it is fine to report it as a violation.
|
||||
g = (lambda: i for i in range(1, 4))
|
||||
g = (lambda: i for i in range(1, 4)) # $capturedVar=i
|
||||
for f in g:
|
||||
print(f())
|
||||
|
||||
#But not if evaluated eagerly
|
||||
l = list(lambda: i for i in range(1, 4))
|
||||
l = list(lambda: i for i in range(1, 4)) # $capturedVar=i
|
||||
for f in l:
|
||||
print(f())
|
||||
|
||||
# This result is MISSING since the lambda is not detected to escape the loop
|
||||
def odasa4860(asset_ids):
|
||||
return dict((asset_id, filter(lambda c : c.asset_id == asset_id, xxx)) for asset_id in asset_ids)
|
||||
return dict((asset_id, filter(lambda c : c.asset_id == asset_id, xxx)) for asset_id in asset_ids) # $MISSING: capturedVar=asset_id
|
||||
|
||||
Reference in New Issue
Block a user