From 1f9899d7dbd77fa65583d9c67bd9258c35e77daa Mon Sep 17 00:00:00 2001 From: Owen Mansel-Chan Date: Wed, 17 Jun 2026 15:04:53 +0100 Subject: [PATCH] Extend added type tracking step to related types --- ...1-fix-type-tracking-instance-attributes.md | 2 +- .../new/internal/TypeTrackingImpl.qll | 66 ++++++++++++++++--- .../dataflow/typetracking/attribute_tests.py | 47 +++++++++++++ 3 files changed, 105 insertions(+), 10 deletions(-) diff --git a/python/ql/lib/change-notes/2026-06-11-fix-type-tracking-instance-attributes.md b/python/ql/lib/change-notes/2026-06-11-fix-type-tracking-instance-attributes.md index 25bc0e0f31f..da7b752ad67 100644 --- a/python/ql/lib/change-notes/2026-06-11-fix-type-tracking-instance-attributes.md +++ b/python/ql/lib/change-notes/2026-06-11-fix-type-tracking-instance-attributes.md @@ -1,4 +1,4 @@ --- category: minorAnalysis --- -* Python type tracking now follows values stored in instance attributes such as `self.attr` across instance methods on the same class. As a result, analysis is more likely to recognize user-defined objects that are stored on `self` and used later in other methods, which may produce additional results. +* Python type tracking now follows values stored in instance attributes such as `self.attr` across instance methods, including across a class hierarchy (for example, a value stored on `self.attr` in a base class and read in a subclass, or vice versa). As a result, analysis is more likely to recognize user-defined objects that are stored on `self` and used later in other methods, which may produce additional results. diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackingImpl.qll b/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackingImpl.qll index b76f8aeb9a5..4d9e95b4c7a 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackingImpl.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackingImpl.qll @@ -169,6 +169,8 @@ module TypeTrackingInput implements Shared::TypeTrackingInput { /** Holds if there is a level step from `nodeFrom` to `nodeTo`, which may depend on the call graph. */ predicate levelStepCall(Node nodeFrom, LocalSourceNode nodeTo) { instanceFieldStep(nodeFrom, nodeTo) + or + inheritedFieldStep(nodeFrom, nodeTo) } /** Holds if there is a level step from `nodeFrom` to `nodeTo`, which does not depend on the call graph. */ @@ -366,6 +368,11 @@ module TypeTrackingInput implements Shared::TypeTrackingInput { * the written value to the read reference, for any pair of methods on the class (not * just from `__init__`). * + * Flow across the class hierarchy (a write in one class observed in a method inherited + * from, or contributed by, a related class) is handled separately by + * `inheritedFieldStep`, because resolving superclasses depends on the call graph and so + * cannot appear in this call-graph-independent step. + * * This is an over-approximation: it is instance-insensitive (it does not distinguish * between different instances of the same class) and order-insensitive (it does not * require the write to happen before the read), matching the precision of @@ -380,25 +387,66 @@ module TypeTrackingInput implements Shared::TypeTrackingInput { ) } + /** + * Holds if `nodeFrom` is written to attribute `self.attr` in an instance method of one + * class, and `nodeTo` reads attribute `self.attr` in an instance method of a different + * class that is related to it by inheritance (one is a transitive superclass of the + * other). + * + * This is the cross-hierarchy counterpart of `localFieldStep`: at runtime the receiver + * of both methods may be an instance of the more-derived class, whose behaviour is made + * up of the methods it declares together with those inherited from all of its ancestors. + * It therefore models the common pattern of a base class storing `self.attr` that a + * subclass reads, and vice versa. Resolving the superclass relationship depends on the + * call graph (via `getADirectSuperclass`), so this step is reported as `levelStepCall` + * rather than `levelStepNoCall`. + * + * Like `localFieldStep`, this is an over-approximation: it is both instance-insensitive + * and order-insensitive. + */ + private predicate inheritedFieldStep(Node nodeFrom, LocalSourceNode nodeTo) { + exists( + Class writeCls, Class readCls, string attr, DataFlowPublic::AttrWrite write, + DataFlowPublic::AttrRead read + | + selfAttrRef(writeCls, attr, write) and + nodeFrom = write.getValue() and + selfAttrRef(readCls, attr, read) and + nodeTo = read and + writeCls != readCls and + ( + writeCls = DataFlowDispatch::getADirectSuperclass*(readCls) + or + readCls = DataFlowDispatch::getADirectSuperclass*(writeCls) + ) + ) + } + /** * Holds if `nodeFrom` is written to attribute `self.attr` in some instance method of a - * class, and `nodeTo` reads attribute `attr` from an instance of the same class outside - * its methods (e.g. `instance.attr`). + * class, and `nodeTo` reads attribute `attr` from an instance of that class (or a + * subclass of it) outside its methods (e.g. `instance.attr`). * * This is the cross-instance counterpart of `localFieldStep`: it relates a write of - * `self.attr` inside the class to a read of `attr` on a reference to an instance of the - * class. Identifying instances relies on the call graph (via `classInstanceTracker`), so - * this step is reported as `levelStepCall` rather than `levelStepNoCall`. + * `self.attr` inside a class to a read of `attr` on a reference to an instance of that + * class or one of its subclasses. Identifying instances relies on the call graph (via + * `classInstanceTracker`), so this step is reported as `levelStepCall` rather than + * `levelStepNoCall`. The write may occur in the instance's own class or in any of its + * superclasses, since those methods are inherited. * * Like `localFieldStep`, this is an over-approximation: it is both instance-insensitive * and order-insensitive. */ private predicate instanceFieldStep(Node nodeFrom, LocalSourceNode nodeTo) { - exists(Class cls, string attr, DataFlowPublic::AttrWrite write, DataFlowPublic::AttrRead read | - selfAttrRef(cls, attr, write) and + exists( + Class writeCls, Class instanceCls, string attr, DataFlowPublic::AttrWrite write, + DataFlowPublic::AttrRead read + | + selfAttrRef(writeCls, attr, write) and nodeFrom = write.getValue() and - instanceAttrRead(cls, attr, read) and - nodeTo = read + instanceAttrRead(instanceCls, attr, read) and + nodeTo = read and + writeCls = DataFlowDispatch::getADirectSuperclass*(instanceCls) ) } diff --git a/python/ql/test/library-tests/dataflow/typetracking/attribute_tests.py b/python/ql/test/library-tests/dataflow/typetracking/attribute_tests.py index 55f2edcac2a..b6bca72507f 100644 --- a/python/ql/test/library-tests/dataflow/typetracking/attribute_tests.py +++ b/python/ql/test/library-tests/dataflow/typetracking/attribute_tests.py @@ -177,3 +177,50 @@ instance = MyClass3() # $ tracked=foo instance.print_self() # $ tracked=foo instance.foo = tracked # $ tracked=foo tracked instance.print_foo() # $ tracked=foo + + +# ------------------------------------------------------------------------------ +# Tracking of instance attribute across the class hierarchy +# ------------------------------------------------------------------------------ + +# attribute written in a base class method, read in a subclass method + +class Base1(object): + def __init__(self): # $ tracked=foo + self.foo = tracked # $ tracked=foo tracked + +class Sub1(Base1): + def read_foo(self): # $ MISSING: tracked=foo + print(self.foo) # $ tracked MISSING: tracked=foo + +sub1 = Sub1() +sub1.read_foo() +print(sub1.foo) # $ tracked MISSING: tracked=foo + + +# attribute written in a subclass method, read in an inherited base class method + +class Base2(object): + def read_bar(self): # $ MISSING: tracked=bar + print(self.bar) # $ tracked MISSING: tracked=bar + +class Sub2(Base2): + def __init__(self): # $ tracked=bar + self.bar = tracked # $ tracked=bar tracked + +sub2 = Sub2() +sub2.read_bar() +print(sub2.bar) # $ tracked MISSING: tracked=bar + + +# attribute written in a base class method, read on an instance of the subclass + +class Base3(object): + def __init__(self): # $ tracked=baz + self.baz = tracked # $ tracked=baz tracked + +class Sub3(Base3): + pass + +sub3 = Sub3() +print(sub3.baz) # $ tracked MISSING: tracked=baz