Skip to content

Commit 7f7fca9

Browse files
Merge pull request #19165 from joefarebrother/python-qual-loop-var-capture
Python: Modernize the Loop Variable Capture query
2 parents d2a4f1e + 6802037 commit 7f7fca9

15 files changed

+216
-145
lines changed

python/ql/src/Variables/LoopVariableCapture.py

Lines changed: 0 additions & 18 deletions
This file was deleted.

python/ql/src/Variables/LoopVariableCapture.qhelp

Lines changed: 0 additions & 60 deletions
This file was deleted.

python/ql/src/Variables/LoopVariableCapture.ql

Lines changed: 0 additions & 47 deletions
This file was deleted.
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
<!DOCTYPE qhelp PUBLIC
2+
"-//Semmle//qhelp//EN"
3+
"qhelp.dtd">
4+
<qhelp>
5+
6+
<overview>
7+
<p>
8+
In Python, a nested function or lambda expression that captures a variable from its surrounding scope is a <em>late-binding</em> closure,
9+
meaning that the value of the variable is determined when the closure is called, not when it is created.
10+
</p>
11+
<p>
12+
Care must be taken when the captured variable is a loop variable. If the closure is called after the loop ends, it will use the value of the variable on the last iteration of the loop, rather than the value at the iteration at which it was created.
13+
</p>
14+
15+
</overview>
16+
<recommendation>
17+
<p>
18+
Ensure that closures that capture loop variables aren't used outside of a single iteration of the loop.
19+
To capture the value of a loop variable at the time the closure is created, use a default parameter, or <code>functools.partial</code>.
20+
</p>
21+
22+
</recommendation>
23+
<example>
24+
<p>
25+
In the following (BAD) example, a <code>tasks</code> list is created, but each task captures the loop variable <code>i</code>, and reads the same value when run.
26+
</p>
27+
<sample src="examples/bad.py" />
28+
<p>
29+
In the following (GOOD) example, each closure has an <code>i</code> default parameter, shadowing the outer <code>i</code> variable, the default value of which is determined as the value of the loop variable <code>i</code> at the time the closure is created.
30+
</p>
31+
<sample src="examples/good.py" />
32+
<p>
33+
In the following (GOOD) example, <code>functools.partial</code> is used to partially evaluate the lambda expression with the value of <code>i</code>.
34+
</p>
35+
<sample src="examples/good2.py" />
36+
37+
38+
39+
</example>
40+
<references>
41+
<li>The Hitchhiker's Guide to Python: <a href="http://docs.python-guide.org/en/latest/writing/gotchas/#late-binding-closures">Late Binding Closures</a>.</li>
42+
<li>Python Language Reference: <a href="https://docs.python.org/reference/executionmodel.html#naming-and-binding">Naming and binding</a>.</li>
43+
<li>Stack Overflow: <a href="https://stackoverflow.com/questions/3431676/creating-functions-or-lambdas-in-a-loop-or-comprehension">Creating functions (or lambdas) in a loop (or comprehension)</a>.</li>
44+
<li>Python Language Reference: <a href="https://docs.python.org/3/library/functools.html#functools.partial">functools.partial</a>.</li>
45+
46+
</references>
47+
</qhelp>
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
/**
2+
* @name Loop variable capture
3+
* @description Capturing a loop variable is not the same as capturing its value, and can lead to unexpected behavior or bugs.
4+
* @kind path-problem
5+
* @tags correctness
6+
* quality
7+
* @problem.severity error
8+
* @sub-severity low
9+
* @precision high
10+
* @id py/loop-variable-capture
11+
*/
12+
13+
import python
14+
import LoopVariableCaptureQuery
15+
import EscapingCaptureFlow::PathGraph
16+
17+
from
18+
CallableExpr capturing, AstNode loop, Variable var, string descr,
19+
EscapingCaptureFlow::PathNode source, EscapingCaptureFlow::PathNode sink
20+
where
21+
escapingCapture(capturing, loop, var, source, sink) and
22+
if capturing instanceof Lambda then descr = "lambda" else descr = "function"
23+
select capturing, source, sink,
24+
"This " + descr + " captures the loop variable $@, and may escape the loop by being stored at $@.",
25+
loop, var.getId(), sink, "this location"
Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
/** Definitions for reasoning about loop variable capture issues. */
2+
3+
import python
4+
import semmle.python.dataflow.new.DataFlow
5+
6+
/** A looping construct. */
7+
abstract class Loop extends AstNode {
8+
/**
9+
* Gets a loop variable of this loop.
10+
* For example, `x` and `y` in `for x,y in pairs: print(x+y)`
11+
*/
12+
abstract Variable getALoopVariable();
13+
}
14+
15+
/** A `for` loop. */
16+
private class ForLoop extends Loop, For {
17+
override Variable getALoopVariable() {
18+
this.getTarget() = result.getAnAccess().getParentNode*() and
19+
result.getScope() = this.getScope()
20+
}
21+
}
22+
23+
/** Holds if the callable `capturing` captures the variable `var` from the loop `loop`. */
24+
predicate capturesLoopVariable(CallableExpr capturing, Loop loop, Variable var) {
25+
var.getAnAccess().getScope() = capturing.getInnerScope() and
26+
capturing.getParentNode+() = loop and
27+
var = loop.getALoopVariable()
28+
}
29+
30+
/** Dataflow configuration for reasoning about callables that capture a loop variable and then may escape from the loop. */
31+
module EscapingCaptureFlowConfig implements DataFlow::ConfigSig {
32+
predicate isSource(DataFlow::Node node) { capturesLoopVariable(node.asExpr(), _, _) }
33+
34+
predicate isSink(DataFlow::Node node) {
35+
// Stored in a dict/list.
36+
exists(Assign assign, Subscript sub |
37+
sub = assign.getATarget() and node.asExpr() = assign.getValue()
38+
)
39+
or
40+
// Stored in a list.
41+
exists(DataFlow::MethodCallNode mc | mc.calls(_, "append") and node = mc.getArg(0))
42+
or
43+
// Used in a yield statement, likely included in a collection.
44+
// The element of comprehension expressions desugar to involve a yield statement internally.
45+
exists(Yield y | node.asExpr() = y.getValue())
46+
// Checks for storing in a field leads to false positives, so are omitted.
47+
}
48+
49+
predicate isBarrierOut(DataFlow::Node node) { isSink(node) }
50+
51+
predicate isBarrier(DataFlow::Node node) {
52+
// Incorrect virtual dispatch to __call__ methods is a source of FPs.
53+
exists(Function call |
54+
call.getName() = "__call__" and
55+
call.getArg(0) = node.(DataFlow::ParameterNode).getParameter()
56+
)
57+
}
58+
59+
predicate allowImplicitRead(DataFlow::Node node, DataFlow::ContentSet cs) {
60+
isSink(node) and
61+
(
62+
cs instanceof DataFlow::TupleElementContent or
63+
cs instanceof DataFlow::ListElementContent or
64+
cs instanceof DataFlow::SetElementContent or
65+
cs instanceof DataFlow::DictionaryElementAnyContent
66+
)
67+
}
68+
}
69+
70+
/** Dataflow for reasoning about callables that capture a loop variable and then escape from the loop. */
71+
module EscapingCaptureFlow = DataFlow::Global<EscapingCaptureFlowConfig>;
72+
73+
/** Holds if `capturing` is a callable that captures the variable `var` of the loop `loop`, and then may escape the loop via a flow path from `source` to `sink`. */
74+
predicate escapingCapture(
75+
CallableExpr capturing, Loop loop, Variable var, EscapingCaptureFlow::PathNode source,
76+
EscapingCaptureFlow::PathNode sink
77+
) {
78+
capturesLoopVariable(capturing, loop, var) and
79+
capturing = source.getNode().asExpr() and
80+
EscapingCaptureFlow::flowPath(source, sink)
81+
}
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
# BAD: The loop variable `i` is captured.
2+
tasks = []
3+
for i in range(5):
4+
tasks.append(lambda: print(i))
5+
6+
# This will print `4,4,4,4,4`, rather than `0,1,2,3,4` as likely intended.
7+
for t in tasks:
8+
t()
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
# GOOD: A default parameter is used, so the variable `i` is not being captured.
2+
tasks = []
3+
for i in range(5):
4+
tasks.append(lambda i=i: print(i))
5+
6+
# This will print `0,1,2,3,4``.
7+
for t in tasks:
8+
t()
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
import functools
2+
# GOOD: `functools.partial` takes care of capturing the _value_ of `i`.
3+
tasks = []
4+
for i in range(5):
5+
tasks.append(functools.partial(lambda i: print(i), i))
6+
7+
# This will print `0,1,2,3,4``.
8+
for t in tasks:
9+
t()

0 commit comments

Comments
 (0)