Merge pull request #19554 from joefarebrother/python-qual-iter-not-return-self

Python: Modernize iter not returning self query
This commit is contained in:
Joe Farebrother
2025-06-13 13:13:31 +01:00
committed by GitHub
9 changed files with 144 additions and 32 deletions

View File

@@ -1,3 +1,4 @@
ql/python/ql/src/Functions/IterReturnsNonSelf.ql
ql/python/ql/src/Functions/NonCls.ql
ql/python/ql/src/Functions/NonSelf.ql
ql/python/ql/src/Functions/ReturnConsistentTupleSizes.ql

View File

@@ -3,34 +3,27 @@
"qhelp.dtd">
<qhelp>
<overview>
<p>The <code>__iter__</code> method of an iterator should return self.
This is important so that iterators can be used as sequences in any context
that expect a sequence. To do so requires that <code>__iter__</code> is
idempotent on iterators.</p>
<p>
Note that sequences and mapping should return a new iterator, it is just the returned
iterator that must obey this constraint.
<p>Iterator classes (classes defining a <code>__next__</code> method) should have an <code>__iter__</code> method that returns the iterator itself.
This ensures that the object is also an iterable; and behaves as expected when used anywhere an iterator or iterable is expected, such as in <code>for</code> loops.
</p>
</overview>
<recommendation>
<p>Make the <code>__iter__</code> return self unless the class should not be an iterator,
in which case rename the <code>next</code> (Python 2) or <code>__next__</code> (Python 3)
to something else.</p>
<p>Ensure that the <code>__iter__</code> method returns <code>self</code>, or is otherwise equivalent as an iterator to <code>self</code>.</p>
</recommendation>
<example>
<p>In this example the <code>Counter</code> class's <code>__iter__</code> method does not
return self (or even an iterator). This will cause the program to fail when anyone attempts
to use the iterator in a <code>for</code> loop or <code>in</code> statement.</p>
<sample src="IterReturnsNonSelf.py" />
<p>In the following example, the <code>MyRange</code> class's <code>__iter__</code> method does not return <code>self</code>.
This would lead to unexpected results when used with a <code>for</code> loop or <code>in</code> statement.</p>
<sample src="examples/IterReturnsNonSelf.py" />
</example>
<references>
<li>Python Language Reference: <a href="http://docs.python.org/2.7/reference/datamodel.html#object.__iter__">object.__iter__</a>.</li>
<li>Python Standard Library: <a href="http://docs.python.org/2/library/stdtypes.html#typeiter">Iterators</a>.</li>
<li>Python Language Reference: <a href="http://docs.python.org/3/reference/datamodel.html#object.__iter__">object.__iter__</a>.</li>
<li>Python Standard Library: <a href="http://docs.python.org/3/library/stdtypes.html#typeiter">Iterators</a>.</li>
</references>

View File

@@ -4,6 +4,7 @@
* @kind problem
* @tags reliability
* correctness
* quality
* @problem.severity error
* @sub-severity low
* @precision high
@@ -11,20 +12,79 @@
*/
import python
import semmle.python.ApiGraphs
Function iter_method(ClassValue t) { result = t.lookup("__iter__").(FunctionValue).getScope() }
/** Gets the __iter__ method of `c`. */
Function iterMethod(Class c) { result = c.getAMethod() and result.getName() = "__iter__" }
predicate is_self(Name value, Function f) { value.getVariable() = f.getArg(0).(Name).getVariable() }
/** Gets the `__next__` method of `c`. */
Function nextMethod(Class c) { result = c.getAMethod() and result.getName() = "__next__" }
predicate returns_non_self(Function f) {
exists(f.getFallthroughNode())
/** Holds if `var` is a variable referring to the `self` parameter of `f`. */
predicate isSelfVar(Function f, Name var) { var.getVariable() = f.getArg(0).(Name).getVariable() }
/** Holds if `e` is an expression that an iter function `f` should return. */
predicate isGoodReturn(Function f, Expr e) {
isSelfVar(f, e)
or
exists(Return r | r.getScope() = f and not is_self(r.getValue(), f))
or
exists(Return r | r.getScope() = f and not exists(r.getValue()))
exists(DataFlow::CallCfgNode call, DataFlow::AttrRead read, DataFlow::Node selfNode |
e = call.asExpr()
|
call = API::builtin("iter").getACall() and
call.getArg(0) = read and
read.accesses(selfNode, "__next__") and
isSelfVar(f, selfNode.asExpr()) and
call.getArg(1).asExpr() instanceof None
)
}
from ClassValue t, Function iter
where t.isIterator() and iter = iter_method(t) and returns_non_self(iter)
select t, "Class " + t.getName() + " is an iterator but its $@ method does not return 'self'.",
iter, iter.getName()
/** Holds if the iter method `f` does not return `self` or an equivalent. */
predicate returnsNonSelf(Function f) {
exists(f.getFallthroughNode())
or
exists(Return r | r.getScope() = f and not isGoodReturn(f, r.getValue()))
}
/** Holds if `iter` and `next` methods are wrappers around some field. */
predicate iterWrapperMethods(Function iter, Function next) {
exists(string field |
exists(Return r, DataFlow::Node self, DataFlow::AttrRead read |
r.getScope() = iter and
r.getValue() = [iterCall(read).asExpr(), read.asExpr()] and
read.accesses(self, field) and
isSelfVar(iter, self.asExpr())
) and
exists(Return r, DataFlow::Node self, DataFlow::AttrRead read |
r.getScope() = next and
r.getValue() = nextCall(read).asExpr() and
read.accesses(self, field) and
isSelfVar(next, self.asExpr())
)
)
}
/** Gets a call to `iter(arg)` or `arg.__iter__()`. */
private DataFlow::CallCfgNode iterCall(DataFlow::Node arg) {
result.(DataFlow::MethodCallNode).calls(arg, "__iter__")
or
result = API::builtin("iter").getACall() and
arg = result.getArg(0) and
not exists(result.getArg(1))
}
/** Gets a call to `next(arg)` or `arg.__next__()`. */
private DataFlow::CallCfgNode nextCall(DataFlow::Node arg) {
result.(DataFlow::MethodCallNode).calls(arg, "__next__")
or
result = API::builtin("next").getACall() and
arg = result.getArg(0)
}
from Class c, Function iter, Function next
where
next = nextMethod(c) and
iter = iterMethod(c) and
returnsNonSelf(iter) and
not iterWrapperMethods(iter, next)
select iter, "Iter method of iterator $@ does not return `" + iter.getArg(0).getName() + "`.", c,
c.getName()

View File

@@ -4,10 +4,10 @@ class MyRange(object):
self.high = high
def __iter__(self):
return self.current
return (self.current, self.high) # BAD: does not return `self`.
def next(self):
def __next__(self):
if self.current > self.high:
raise StopIteration
return None
self.current += 1
return self.current - 1

View File

@@ -0,0 +1,4 @@
---
category: minorAnalysis
---
* The `py/iter-returns-non-self` query has been modernized, and no longer alerts for certain cases where an equivalent iterator is returned.

View File

@@ -1 +0,0 @@
| protocols.py:54:1:54:29 | class AlmostIterator | Class AlmostIterator is an iterator but its $@ method does not return 'self'. | protocols.py:62:5:62:23 | Function __iter__ | __iter__ |

View File

@@ -0,0 +1,2 @@
| test.py:5:5:5:23 | Function __iter__ | Iter method of iterator $@ does not return `self`. | test.py:1:1:1:11 | Class Bad1 | Bad1 |
| test.py:51:5:51:23 | Function __iter__ | Iter method of iterator $@ does not return `self`. | test.py:42:1:42:21 | Class FalsePositive1 | FalsePositive1 |

View File

@@ -0,0 +1,53 @@
class Bad1:
def __next__(self):
return 0
def __iter__(self): # BAD: Iter does not return self
yield 0
class Good1:
def __next__(self):
return 0
def __iter__(self): # GOOD: iter returns self
return self
class Good2:
def __init__(self):
self._it = iter([0,0,0])
def __next__(self):
return next(self._it)
def __iter__(self): # GOOD: iter and next are wrappers around a field
return self._it.__iter__()
class Good3:
def __init__(self):
self._it = iter([0,0,0])
def __next__(self):
return self._it.__next__()
def __iter__(self): # GOOD: iter and next are wrappers around a field
return self._it
class Good4:
def __next__(self):
return 0
def __iter__(self): # GOOD: this is an equivalent iterator to `self`.
return iter(self.__next__, None)
class FalsePositive1:
def __init__(self):
self._it = None
def __next__(self):
if self._it is None:
self._it = iter(self)
return next(self._it)
def __iter__(self): # SPURIOUS, GOOD: implementation of next ensures the iterator is equivalent to the one returned by iter, but this is not detected.
yield 0
yield 0