Merge pull request #19709 from joefarebrother/python-qual-init-call-subclass

Python: Modernize the init-calls-subclass query
This commit is contained in:
Joe Farebrother
2025-06-18 14:21:25 +01:00
committed by GitHub
12 changed files with 226 additions and 138 deletions

View File

@@ -1,3 +1,4 @@
ql/python/ql/src/Classes/InitCallsSubclass/InitCallsSubclassMethod.ql
ql/python/ql/src/Functions/IterReturnsNonSelf.ql
ql/python/ql/src/Functions/NonCls.ql
ql/python/ql/src/Functions/NonSelf.ql

View File

@@ -4,7 +4,7 @@ ql/python/ql/src/Classes/EqualsOrHash.ql
ql/python/ql/src/Classes/EqualsOrNotEquals.ql
ql/python/ql/src/Classes/IncompleteOrdering.ql
ql/python/ql/src/Classes/InconsistentMRO.ql
ql/python/ql/src/Classes/InitCallsSubclassMethod.ql
ql/python/ql/src/Classes/InitCallsSubclass/InitCallsSubclassMethod.ql
ql/python/ql/src/Classes/MissingCallToDel.ql
ql/python/ql/src/Classes/MissingCallToInit.ql
ql/python/ql/src/Classes/MutatingDescriptor.ql

View File

@@ -0,0 +1,48 @@
<!DOCTYPE qhelp PUBLIC
"-//Semmle//qhelp//EN"
"qhelp.dtd">
<qhelp>
<overview>
<p>
When initializing an instance of the class in the class' <code>__init__</code> method, calls tha are made using the instance may receive an instance of the class that is not
yet fully initialized. When a method called in an initializer is overridden in a subclass, the subclass method receives the instance
in a potentially unexpected state. Fields that would be initialized after the call, including potentially in the subclass' <code>__init__</code> method,
will not be initialized. This may lead to runtime errors, as well as make the code more difficult to maintain, as future changes may not
be aware of which fields would not be initialized.
</p>
</overview>
<recommendation>
<p>If possible, refactor the initializer method such that initialization is complete before calling any overridden methods.
For helper methods used as part of initialization, avoid overriding them, and instead call any additional logic required
in the subclass' <code>__init__</code> method.
</p>
<p>
If the overridden method does not depend on the instance <code>self</code>, and only on its class, consider making it a <code>@classmethod</code> or <code>@staticmethod</code> instead.
</p>
<p>
If calling an overridden method is absolutely required, consider marking it as an internal method (by using an <code>_</code> prefix) to
discourage external users of the library from overriding it and observing partially initialized state, and ensure that the fact it is called during initialization
is mentioned in the documentation.
</p>
</recommendation>
<example>
<p>In the following case, the <code>__init__</code> method of <code>Super</code> calls the <code>set_up</code> method that is overridden by <code>Sub</code>.
This results in <code>Sub.set_up</code> being called with a partially initialized instance of <code>Super</code> which may be unexpected. </p>
<sample src="examples/InitCallsSubclassMethodBad.py" />
<p>In the following case, the initialization methods are separate between the superclass and the subclass.</p>
<sample src="examples/InitCallsSubclassMethodGood.py" />
</example>
<references>
<li>CERT Secure Coding: <a href="https://www.securecoding.cert.org/confluence/display/java/MET05-J.+Ensure+that+constructors+do+not+call+overridable+methods">
Rule MET05-J</a>. Reference discusses Java but is applicable to object oriented programming in many languages.</li>
<li>StackOverflow: <a href="https://stackoverflow.com/questions/3404301/whats-wrong-with-overridable-method-calls-in-constructors">Overridable method calls in constructors</a>.</li>
<li>Python documentation: <a href="https://docs.python.org/3/library/functions.html#classmethod">@classmethod</a>.</li>
</references>
</qhelp>

View File

@@ -0,0 +1,58 @@
/**
* @name `__init__` method calls overridden method
* @description Calling a method from `__init__` that is overridden by a subclass may result in a partially
* initialized instance being observed.
* @kind problem
* @tags reliability
* correctness
* quality
* @problem.severity warning
* @sub-severity low
* @precision high
* @id py/init-calls-subclass
*/
import python
import semmle.python.dataflow.new.DataFlow
import semmle.python.dataflow.new.internal.DataFlowDispatch
predicate initSelfCallOverridden(
Function init, DataFlow::Node self, DataFlow::MethodCallNode call, Function target,
Function override
) {
init.isInitMethod() and
call.getScope() = init and
exists(Class superclass, Class subclass, DataFlow::ParameterNode selfArg |
superclass = init.getScope() and
subclass = override.getScope() and
subclass = getADirectSubclass+(superclass) and
selfArg.getParameter() = init.getArg(0) and
DataFlow::localFlow(selfArg, self) and
call.calls(self, override.getName()) and
target = superclass.getAMethod() and
target.getName() = override.getName()
)
}
predicate readsFromSelf(Function method) {
exists(DataFlow::ParameterNode self, DataFlow::Node sink |
self.getParameter() = method.getArg(0) and
DataFlow::localFlow(self, sink)
|
sink instanceof DataFlow::ArgumentNode
or
sink = any(DataFlow::AttrRead a).getObject()
)
}
from
Function init, DataFlow::Node self, DataFlow::MethodCallNode call, Function target,
Function override
where
initSelfCallOverridden(init, self, call, target, override) and
readsFromSelf(override) and
not isClassmethod(override) and
not isStaticmethod(override) and
not target.getName().matches("\\_%")
select call, "This call to $@ in an initialization method is overridden by $@.", target,
target.getQualifiedName(), override, override.getQualifiedName()

View File

@@ -0,0 +1,23 @@
class Super(object):
def __init__(self, arg):
self._state = "Not OK"
self.set_up(arg) # BAD: This method is overridden, so `Sub.set_up` receives a partially initialized instance.
self._state = "OK"
def set_up(self, arg):
"Do some setup"
self.a = 2
class Sub(Super):
def __init__(self, arg):
super().__init__(arg)
self.important_state = "OK"
def set_up(self, arg):
super().set_up(arg)
"Do some more setup"
# BAD: at this point `self._state` is set to `"Not OK"`, and `self.important_state` is not initialized.
if self._state == "OK":
self.b = self.a + 2

View File

@@ -0,0 +1,24 @@
class Super(object):
def __init__(self, arg):
self._state = "Not OK"
self.super_set_up(arg) # GOOD: This isn't overriden. Instead, additional setup the subclass needs is called by the subclass' `__init__ method.`
self._state = "OK"
def super_set_up(self, arg):
"Do some setup"
self.a = 2
class Sub(Super):
def __init__(self, arg):
super().__init__(arg)
self.sub_set_up(self, arg)
self.important_state = "OK"
def sub_set_up(self, arg):
"Do some more setup"
if self._state == "OK":
self.b = self.a + 2

View File

@@ -1,48 +0,0 @@
#Superclass __init__ calls subclass method
class Super(object):
def __init__(self, arg):
self._state = "Not OK"
self.set_up(arg)
self._state = "OK"
def set_up(self, arg):
"Do some set up"
class Sub(Super):
def __init__(self, arg):
Super.__init__(self, arg)
self.important_state = "OK"
def set_up(self, arg):
Super.set_up(self, arg)
"Do some more set up" # Dangerous as self._state is "Not OK"
#Improved version with inheritance:
class Super(object):
def __init__(self, arg):
self._state = "Not OK"
self.super_set_up(arg)
self._state = "OK"
def super_set_up(self, arg):
"Do some set up"
class Sub(Super):
def __init__(self, arg):
Super.__init__(self, arg)
self.sub_set_up(self, arg)
self.important_state = "OK"
def sub_set_up(self, arg):
"Do some more set up"

View File

@@ -1,42 +0,0 @@
<!DOCTYPE qhelp PUBLIC
"-//Semmle//qhelp//EN"
"qhelp.dtd">
<qhelp>
<overview>
<p>
When an instance of a class is initialized, the super-class state should be
fully initialized before it becomes visible to the subclass.
Calling methods of the subclass in the superclass' <code>__init__</code>
method violates this important invariant.
</p>
</overview>
<recommendation>
<p>Do not use methods that are subclassed in the construction of an object.
For simpler cases move the initialization into the superclass' <code>__init__</code> method,
preventing it being overridden. Additional initialization of subclass should
be done in the <code>__init__</code> method of the subclass.
For more complex cases, it is advisable to use a static method or function to manage
object creation.
</p>
<p>Alternatively, avoid inheritance altogether using composition instead.</p>
</recommendation>
<example>
<sample src="InitCallsSubclassMethod.py" />
</example>
<references>
<li>CERT Secure Coding: <a href="https://www.securecoding.cert.org/confluence/display/java/MET05-J.+Ensure+that+constructors+do+not+call+overridable+methods">
Rule MET05-J</a>. Although this is a Java rule it applies to most object-oriented languages.</li>
<li>Python Standard Library: <a href="http://docs.python.org/library/functions.html#staticmethod">Static methods</a>.</li>
<li>Wikipedia: <a href="http://en.wikipedia.org/wiki/Composition_over_inheritance">Composition over inheritance</a>.</li>
</references>
</qhelp>

View File

@@ -1,30 +0,0 @@
/**
* @name `__init__` method calls overridden method
* @description Calling a method from `__init__` that is overridden by a subclass may result in a partially
* initialized instance being observed.
* @kind problem
* @tags reliability
* correctness
* @problem.severity warning
* @sub-severity low
* @precision high
* @id py/init-calls-subclass
*/
import python
from
ClassObject supercls, string method, Call call, FunctionObject overriding,
FunctionObject overridden
where
exists(FunctionObject init, SelfAttribute sa |
supercls.declaredAttribute("__init__") = init and
call.getScope() = init.getFunction() and
call.getFunc() = sa
|
sa.getName() = method and
overridden = supercls.declaredAttribute(method) and
overriding.overrides(overridden)
)
select call, "Call to self.$@ in __init__ method, which is overridden by $@.", overridden, method,
overriding, overriding.descriptiveString()

View File

@@ -1 +1,2 @@
| init_calls_subclass.py:7:9:7:24 | Attribute() | Call to self.$@ in __init__ method, which is overridden by $@. | init_calls_subclass.py:10:5:10:26 | Function set_up | set_up | init_calls_subclass.py:19:5:19:26 | Function set_up | method Sub.set_up |
| init_calls_subclass.py:8:13:8:28 | ControlFlowNode for Attribute() | This call to $@ in an initialization method is overridden by $@. | init_calls_subclass.py:11:9:11:30 | Function set_up | bad1.Super.set_up | init_calls_subclass.py:20:9:20:30 | Function set_up | bad1.Sub.set_up |
| init_calls_subclass.py:32:13:32:27 | ControlFlowNode for Attribute() | This call to $@ in an initialization method is overridden by $@. | init_calls_subclass.py:34:9:34:27 | Function postproc | bad2.Super.postproc | init_calls_subclass.py:43:9:43:27 | Function postproc | bad2.Sub.postproc |

View File

@@ -1 +1 @@
Classes/InitCallsSubclassMethod.ql
Classes/InitCallsSubclass/InitCallsSubclassMethod.ql

View File

@@ -1,22 +1,75 @@
#Superclass __init__ calls subclass method
class Super(object):
def bad1():
class Super:
def __init__(self, arg):
self._state = "Not OK"
self.set_up(arg)
self._state = "OK"
def __init__(self, arg):
self._state = "Not OK"
self.set_up(arg) # BAD: set_up is overriden.
self._state = "OK"
def set_up(self, arg):
"Do some set up"
def set_up(self, arg):
"Do some set up"
class Sub(Super):
class Sub(Super):
def __init__(self, arg):
Super.__init__(self, arg)
self.important_state = "OK"
def __init__(self, arg):
super().__init__(arg)
self.important_state = "OK"
def set_up(self, arg):
Super.set_up(self, arg)
"Do some more set up" # Dangerous as self._state is "Not OK" and
# self.important_state is uninitialized
def set_up(self, arg):
super().set_up(arg)
"Do some more set up" # `self` is partially initialized
if self.important_state == "OK":
pass
def bad2():
class Super:
def __init__(self, arg):
self.a = arg
# BAD: postproc is called after initialization. This is still an issue
# since it may still occur before all initialization on a subclass is complete.
self.postproc()
def postproc(self):
if self.a == 1:
pass
class Sub(Super):
def __init__(self, arg):
super().__init__(arg)
self.b = 3
def postproc(self):
if self.a == 2 and self.b == 3:
pass
def good3():
class Super:
def __init__(self, arg):
self.a = arg
self.set_b() # OK: Here `set_b` is used for initialization, but does not read the partially initialized state of `self`.
self.c = 1
def set_b(self):
self.b = 3
class Sub(Super):
def set_b(self):
self.b = 4
def good4():
class Super:
def __init__(self, arg):
self.a = arg
# OK: Here `_set_b` is likely an internal method (as indicated by the _ prefix).
# We assume thus that regular consumers of the library will not override it, and classes that do are internal and account for `self`'s partially initialized state.
self._set_b()
self.c = 1
def _set_b(self):
self.b = 3
class Sub(Super):
def _set_b(self):
self.b = self.a+1