diff --git a/python/ql/integration-tests/query-suite/python-code-quality.qls.expected b/python/ql/integration-tests/query-suite/python-code-quality.qls.expected index b81d300d024..c2168cab937 100644 --- a/python/ql/integration-tests/query-suite/python-code-quality.qls.expected +++ b/python/ql/integration-tests/query-suite/python-code-quality.qls.expected @@ -1,3 +1,4 @@ +ql/python/ql/src/Functions/IterReturnsNonSelf.ql ql/python/ql/src/Functions/NonCls.ql ql/python/ql/src/Functions/NonSelf.ql ql/python/ql/src/Functions/ReturnConsistentTupleSizes.ql diff --git a/python/ql/src/Functions/IterReturnsNonSelf.qhelp b/python/ql/src/Functions/IterReturnsNonSelf.qhelp index f614d912ff0..0ad5a05fdf4 100644 --- a/python/ql/src/Functions/IterReturnsNonSelf.qhelp +++ b/python/ql/src/Functions/IterReturnsNonSelf.qhelp @@ -3,34 +3,27 @@ "qhelp.dtd"> -

The __iter__ method of an iterator should return self. -This is important so that iterators can be used as sequences in any context -that expect a sequence. To do so requires that __iter__ is -idempotent on iterators.

- -

-Note that sequences and mapping should return a new iterator, it is just the returned -iterator that must obey this constraint. +

Iterator classes (classes defining a __next__ method) should have an __iter__ method that returns the iterator itself. +This ensures that the object is also an iterable; and behaves as expected when used anywhere an iterator or iterable is expected, such as in for loops.

+ +
-

Make the __iter__ return self unless the class should not be an iterator, -in which case rename the next (Python 2) or __next__ (Python 3) -to something else.

+

Ensure that the __iter__ method returns self, or is otherwise equivalent as an iterator to self.

-

In this example the Counter class's __iter__ method does not -return self (or even an iterator). This will cause the program to fail when anyone attempts -to use the iterator in a for loop or in statement.

- +

In the following example, the MyRange class's __iter__ method does not return self. +This would lead to unexpected results when used with a for loop or in statement.

+
-
  • Python Language Reference: object.__iter__.
  • -
  • Python Standard Library: Iterators.
  • +
  • Python Language Reference: object.__iter__.
  • +
  • Python Standard Library: Iterators.
  • diff --git a/python/ql/src/Functions/IterReturnsNonSelf.ql b/python/ql/src/Functions/IterReturnsNonSelf.ql index 385677a5763..d6501a803a3 100644 --- a/python/ql/src/Functions/IterReturnsNonSelf.ql +++ b/python/ql/src/Functions/IterReturnsNonSelf.ql @@ -4,6 +4,7 @@ * @kind problem * @tags reliability * correctness + * quality * @problem.severity error * @sub-severity low * @precision high @@ -11,20 +12,79 @@ */ import python +import semmle.python.ApiGraphs -Function iter_method(ClassValue t) { result = t.lookup("__iter__").(FunctionValue).getScope() } +/** Gets the __iter__ method of `c`. */ +Function iterMethod(Class c) { result = c.getAMethod() and result.getName() = "__iter__" } -predicate is_self(Name value, Function f) { value.getVariable() = f.getArg(0).(Name).getVariable() } +/** Gets the `__next__` method of `c`. */ +Function nextMethod(Class c) { result = c.getAMethod() and result.getName() = "__next__" } -predicate returns_non_self(Function f) { - exists(f.getFallthroughNode()) +/** Holds if `var` is a variable referring to the `self` parameter of `f`. */ +predicate isSelfVar(Function f, Name var) { var.getVariable() = f.getArg(0).(Name).getVariable() } + +/** Holds if `e` is an expression that an iter function `f` should return. */ +predicate isGoodReturn(Function f, Expr e) { + isSelfVar(f, e) or - exists(Return r | r.getScope() = f and not is_self(r.getValue(), f)) - or - exists(Return r | r.getScope() = f and not exists(r.getValue())) + exists(DataFlow::CallCfgNode call, DataFlow::AttrRead read, DataFlow::Node selfNode | + e = call.asExpr() + | + call = API::builtin("iter").getACall() and + call.getArg(0) = read and + read.accesses(selfNode, "__next__") and + isSelfVar(f, selfNode.asExpr()) and + call.getArg(1).asExpr() instanceof None + ) } -from ClassValue t, Function iter -where t.isIterator() and iter = iter_method(t) and returns_non_self(iter) -select t, "Class " + t.getName() + " is an iterator but its $@ method does not return 'self'.", - iter, iter.getName() +/** Holds if the iter method `f` does not return `self` or an equivalent. */ +predicate returnsNonSelf(Function f) { + exists(f.getFallthroughNode()) + or + exists(Return r | r.getScope() = f and not isGoodReturn(f, r.getValue())) +} + +/** Holds if `iter` and `next` methods are wrappers around some field. */ +predicate iterWrapperMethods(Function iter, Function next) { + exists(string field | + exists(Return r, DataFlow::Node self, DataFlow::AttrRead read | + r.getScope() = iter and + r.getValue() = [iterCall(read).asExpr(), read.asExpr()] and + read.accesses(self, field) and + isSelfVar(iter, self.asExpr()) + ) and + exists(Return r, DataFlow::Node self, DataFlow::AttrRead read | + r.getScope() = next and + r.getValue() = nextCall(read).asExpr() and + read.accesses(self, field) and + isSelfVar(next, self.asExpr()) + ) + ) +} + +/** Gets a call to `iter(arg)` or `arg.__iter__()`. */ +private DataFlow::CallCfgNode iterCall(DataFlow::Node arg) { + result.(DataFlow::MethodCallNode).calls(arg, "__iter__") + or + result = API::builtin("iter").getACall() and + arg = result.getArg(0) and + not exists(result.getArg(1)) +} + +/** Gets a call to `next(arg)` or `arg.__next__()`. */ +private DataFlow::CallCfgNode nextCall(DataFlow::Node arg) { + result.(DataFlow::MethodCallNode).calls(arg, "__next__") + or + result = API::builtin("next").getACall() and + arg = result.getArg(0) +} + +from Class c, Function iter, Function next +where + next = nextMethod(c) and + iter = iterMethod(c) and + returnsNonSelf(iter) and + not iterWrapperMethods(iter, next) +select iter, "Iter method of iterator $@ does not return `" + iter.getArg(0).getName() + "`.", c, + c.getName() diff --git a/python/ql/src/Functions/IterReturnsNonSelf.py b/python/ql/src/Functions/examples/IterReturnsNonSelf.py similarity index 56% rename from python/ql/src/Functions/IterReturnsNonSelf.py rename to python/ql/src/Functions/examples/IterReturnsNonSelf.py index 6251b87aba7..20ba5eb1821 100644 --- a/python/ql/src/Functions/IterReturnsNonSelf.py +++ b/python/ql/src/Functions/examples/IterReturnsNonSelf.py @@ -4,10 +4,10 @@ class MyRange(object): self.high = high def __iter__(self): - return self.current + return (self.current, self.high) # BAD: does not return `self`. - def next(self): + def __next__(self): if self.current > self.high: - raise StopIteration + return None self.current += 1 return self.current - 1 \ No newline at end of file diff --git a/python/ql/src/change-notes/2025-05-23-iter-not-return-self.md b/python/ql/src/change-notes/2025-05-23-iter-not-return-self.md new file mode 100644 index 00000000000..80b8313a72b --- /dev/null +++ b/python/ql/src/change-notes/2025-05-23-iter-not-return-self.md @@ -0,0 +1,4 @@ +--- +category: minorAnalysis +--- +* The `py/iter-returns-non-self` query has been modernized, and no longer alerts for certain cases where an equivalent iterator is returned. \ No newline at end of file diff --git a/python/ql/test/query-tests/Functions/general/IterReturnsNonSelf.expected b/python/ql/test/query-tests/Functions/general/IterReturnsNonSelf.expected deleted file mode 100644 index 9fd22c1df61..00000000000 --- a/python/ql/test/query-tests/Functions/general/IterReturnsNonSelf.expected +++ /dev/null @@ -1 +0,0 @@ -| protocols.py:54:1:54:29 | class AlmostIterator | Class AlmostIterator is an iterator but its $@ method does not return 'self'. | protocols.py:62:5:62:23 | Function __iter__ | __iter__ | diff --git a/python/ql/test/query-tests/Functions/iterators/IterReturnsNonSelf.expected b/python/ql/test/query-tests/Functions/iterators/IterReturnsNonSelf.expected new file mode 100644 index 00000000000..a21f8de68a5 --- /dev/null +++ b/python/ql/test/query-tests/Functions/iterators/IterReturnsNonSelf.expected @@ -0,0 +1,2 @@ +| test.py:5:5:5:23 | Function __iter__ | Iter method of iterator $@ does not return `self`. | test.py:1:1:1:11 | Class Bad1 | Bad1 | +| test.py:51:5:51:23 | Function __iter__ | Iter method of iterator $@ does not return `self`. | test.py:42:1:42:21 | Class FalsePositive1 | FalsePositive1 | diff --git a/python/ql/test/query-tests/Functions/general/IterReturnsNonSelf.qlref b/python/ql/test/query-tests/Functions/iterators/IterReturnsNonSelf.qlref similarity index 100% rename from python/ql/test/query-tests/Functions/general/IterReturnsNonSelf.qlref rename to python/ql/test/query-tests/Functions/iterators/IterReturnsNonSelf.qlref diff --git a/python/ql/test/query-tests/Functions/iterators/test.py b/python/ql/test/query-tests/Functions/iterators/test.py new file mode 100644 index 00000000000..ced389967e4 --- /dev/null +++ b/python/ql/test/query-tests/Functions/iterators/test.py @@ -0,0 +1,53 @@ +class Bad1: + def __next__(self): + return 0 + + def __iter__(self): # BAD: Iter does not return self + yield 0 + +class Good1: + def __next__(self): + return 0 + + def __iter__(self): # GOOD: iter returns self + return self + +class Good2: + def __init__(self): + self._it = iter([0,0,0]) + + def __next__(self): + return next(self._it) + + def __iter__(self): # GOOD: iter and next are wrappers around a field + return self._it.__iter__() + +class Good3: + def __init__(self): + self._it = iter([0,0,0]) + + def __next__(self): + return self._it.__next__() + + def __iter__(self): # GOOD: iter and next are wrappers around a field + return self._it + +class Good4: + def __next__(self): + return 0 + + def __iter__(self): # GOOD: this is an equivalent iterator to `self`. + return iter(self.__next__, None) + +class FalsePositive1: + def __init__(self): + self._it = None + + def __next__(self): + if self._it is None: + self._it = iter(self) + return next(self._it) + + def __iter__(self): # SPURIOUS, GOOD: implementation of next ensures the iterator is equivalent to the one returned by iter, but this is not detected. + yield 0 + yield 0 \ No newline at end of file