From f251a572e1fa9349468524c4542b41fc665b042d Mon Sep 17 00:00:00 2001 From: Taus Date: Tue, 30 Jun 2026 14:14:53 +0000 Subject: [PATCH] Python: hotfix - disable instanceFieldStep to avoid type-tracker blowup The `instanceFieldStep` disjunct of `TypeTrackingInput::levelStepCall` that was added in 7.2.0 uses `classInstanceTracker(cls)` -- which is itself a type-tracker -- inside `levelStepCall`. That creates a structural mutual recursion between the main type-tracker fixpoint and `classInstanceTracker`, causing the type-tracker delta to blow up to ~100M tuples per iteration on some OOP-heavy Python codebases. Verified on the python/mypy database: SSRF query wall time goes from ~12s before the offending commit to >40 minutes after it. This hotfix temporarily drops the `instanceFieldStep` disjunct and keeps only `inheritedFieldStep`, which does not pull on the call graph and is well-behaved (verified at ~12s on mypy). The `instanceFieldStep` helper predicate itself is kept in place, and the `levelStepCall` body has a commented-out call to it so the change is trivial to re-enable once the recursion issue is properly addressed. --- .../2026-06-30-disable-instance-field-step-hotfix.md | 5 +++++ .../python/dataflow/new/internal/TypeTrackingImpl.qll | 8 +++++++- .../dataflow/typetracking/attribute_tests.py | 8 ++++---- .../Security/CWE-089-SqlInjection/SqlInjection.expected | 6 ------ .../test/query-tests/Security/CWE-089-SqlInjection/app.py | 4 ++-- 5 files changed, 18 insertions(+), 13 deletions(-) create mode 100644 python/ql/lib/change-notes/2026-06-30-disable-instance-field-step-hotfix.md diff --git a/python/ql/lib/change-notes/2026-06-30-disable-instance-field-step-hotfix.md b/python/ql/lib/change-notes/2026-06-30-disable-instance-field-step-hotfix.md new file mode 100644 index 00000000000..71ca0943f5e --- /dev/null +++ b/python/ql/lib/change-notes/2026-06-30-disable-instance-field-step-hotfix.md @@ -0,0 +1,5 @@ +--- +category: minorAnalysis +--- + +- Temporarily disabled the `instanceFieldStep` disjunct of the internal `TypeTrackingInput::levelStepCall` predicate, which was introduced in 7.2.0 and caused catastrophic query slowdowns on some OOP-heavy Python codebases (e.g. `mypy` and `dask`). diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackingImpl.qll b/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackingImpl.qll index 13afd6a4276..02fae4611f4 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackingImpl.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackingImpl.qll @@ -170,7 +170,13 @@ module TypeTrackingInput implements Shared::TypeTrackingInput { /** Holds if there is a level step from `nodeFrom` to `nodeTo`, which may depend on the call graph. */ predicate levelStepCall(Node nodeFrom, LocalSourceNode nodeTo) { - instanceFieldStep(nodeFrom, nodeTo) + // HOTFIX: `instanceFieldStep` is temporarily disabled (via `and none()`). + // It uses `classInstanceTracker(cls)` -- itself a type-tracker run -- + // from inside `levelStepCall`, creating a structural mutual recursion + // that causes catastrophic query slowdowns on some OOP-heavy Python + // codebases (e.g. mypy and dask). The `and none()` should be removed + // once that recursion is redesigned. + instanceFieldStep(nodeFrom, nodeTo) and none() or inheritedFieldStep(nodeFrom, nodeTo) } diff --git a/python/ql/test/library-tests/dataflow/typetracking/attribute_tests.py b/python/ql/test/library-tests/dataflow/typetracking/attribute_tests.py index b6bca72507f..09fed01398e 100644 --- a/python/ql/test/library-tests/dataflow/typetracking/attribute_tests.py +++ b/python/ql/test/library-tests/dataflow/typetracking/attribute_tests.py @@ -157,7 +157,7 @@ class MyClass2(object): print(self.foo) # $ tracked MISSING: tracked=foo instance = MyClass2() -print(instance.foo) # $ tracked MISSING: tracked=foo +print(instance.foo) # $ MISSING: tracked=foo tracked instance.print_foo() # $ MISSING: tracked=foo @@ -195,7 +195,7 @@ class Sub1(Base1): sub1 = Sub1() sub1.read_foo() -print(sub1.foo) # $ tracked MISSING: tracked=foo +print(sub1.foo) # $ MISSING: tracked=foo tracked # attribute written in a subclass method, read in an inherited base class method @@ -210,7 +210,7 @@ class Sub2(Base2): sub2 = Sub2() sub2.read_bar() -print(sub2.bar) # $ tracked MISSING: tracked=bar +print(sub2.bar) # $ MISSING: tracked=bar tracked # attribute written in a base class method, read on an instance of the subclass @@ -223,4 +223,4 @@ class Sub3(Base3): pass sub3 = Sub3() -print(sub3.baz) # $ tracked MISSING: tracked=baz +print(sub3.baz) # $ MISSING: tracked=baz tracked diff --git a/python/ql/test/query-tests/Security/CWE-089-SqlInjection/SqlInjection.expected b/python/ql/test/query-tests/Security/CWE-089-SqlInjection/SqlInjection.expected index 8f60394d8a2..4cbcb33440b 100644 --- a/python/ql/test/query-tests/Security/CWE-089-SqlInjection/SqlInjection.expected +++ b/python/ql/test/query-tests/Security/CWE-089-SqlInjection/SqlInjection.expected @@ -1,7 +1,6 @@ #select | app.py:23:20:23:24 | ControlFlowNode for query | app.py:20:18:20:21 | ControlFlowNode for name | app.py:23:20:23:24 | ControlFlowNode for query | This SQL query depends on a $@. | app.py:20:18:20:21 | ControlFlowNode for name | user-provided value | | app.py:30:20:30:24 | ControlFlowNode for query | app.py:27:19:27:22 | ControlFlowNode for name | app.py:30:20:30:24 | ControlFlowNode for query | This SQL query depends on a $@. | app.py:27:19:27:22 | ControlFlowNode for name | user-provided value | -| app.py:37:20:37:24 | ControlFlowNode for query | app.py:34:19:34:22 | ControlFlowNode for name | app.py:37:20:37:24 | ControlFlowNode for query | This SQL query depends on a $@. | app.py:34:19:34:22 | ControlFlowNode for name | user-provided value | | app.py:44:20:44:24 | ControlFlowNode for query | app.py:41:19:41:22 | ControlFlowNode for name | app.py:44:20:44:24 | ControlFlowNode for query | This SQL query depends on a $@. | app.py:41:19:41:22 | ControlFlowNode for name | user-provided value | | app.py:51:20:51:24 | ControlFlowNode for query | app.py:48:19:48:22 | ControlFlowNode for name | app.py:51:20:51:24 | ControlFlowNode for query | This SQL query depends on a $@. | app.py:48:19:48:22 | ControlFlowNode for name | user-provided value | | sql_injection.py:21:24:21:77 | ControlFlowNode for BinaryExpr | sql_injection.py:14:15:14:22 | ControlFlowNode for username | sql_injection.py:21:24:21:77 | ControlFlowNode for BinaryExpr | This SQL query depends on a $@. | sql_injection.py:14:15:14:22 | ControlFlowNode for username | user-provided value | @@ -25,8 +24,6 @@ edges | app.py:21:5:21:9 | ControlFlowNode for query | app.py:23:20:23:24 | ControlFlowNode for query | provenance | | | app.py:27:19:27:22 | ControlFlowNode for name | app.py:28:5:28:9 | ControlFlowNode for query | provenance | | | app.py:28:5:28:9 | ControlFlowNode for query | app.py:30:20:30:24 | ControlFlowNode for query | provenance | | -| app.py:34:19:34:22 | ControlFlowNode for name | app.py:35:5:35:9 | ControlFlowNode for query | provenance | | -| app.py:35:5:35:9 | ControlFlowNode for query | app.py:37:20:37:24 | ControlFlowNode for query | provenance | | | app.py:41:19:41:22 | ControlFlowNode for name | app.py:42:5:42:9 | ControlFlowNode for query | provenance | | | app.py:42:5:42:9 | ControlFlowNode for query | app.py:44:20:44:24 | ControlFlowNode for query | provenance | | | app.py:48:19:48:22 | ControlFlowNode for name | app.py:49:5:49:9 | ControlFlowNode for query | provenance | | @@ -54,9 +51,6 @@ nodes | app.py:27:19:27:22 | ControlFlowNode for name | semmle.label | ControlFlowNode for name | | app.py:28:5:28:9 | ControlFlowNode for query | semmle.label | ControlFlowNode for query | | app.py:30:20:30:24 | ControlFlowNode for query | semmle.label | ControlFlowNode for query | -| app.py:34:19:34:22 | ControlFlowNode for name | semmle.label | ControlFlowNode for name | -| app.py:35:5:35:9 | ControlFlowNode for query | semmle.label | ControlFlowNode for query | -| app.py:37:20:37:24 | ControlFlowNode for query | semmle.label | ControlFlowNode for query | | app.py:41:19:41:22 | ControlFlowNode for name | semmle.label | ControlFlowNode for name | | app.py:42:5:42:9 | ControlFlowNode for query | semmle.label | ControlFlowNode for query | | app.py:44:20:44:24 | ControlFlowNode for query | semmle.label | ControlFlowNode for query | diff --git a/python/ql/test/query-tests/Security/CWE-089-SqlInjection/app.py b/python/ql/test/query-tests/Security/CWE-089-SqlInjection/app.py index 4de61346d8f..8046f1ef52e 100644 --- a/python/ql/test/query-tests/Security/CWE-089-SqlInjection/app.py +++ b/python/ql/test/query-tests/Security/CWE-089-SqlInjection/app.py @@ -31,10 +31,10 @@ async def unsafe2(name: str): # $ Source cursor.close() @app.get("/unsafe3/") -async def unsafe3(name: str): # $ Source +async def unsafe3(name: str): # $ MISSING: Source query = "select * from users where name=" + name cursor = hdb_con3.cursor() - cursor.execute(query) # $ Alert + cursor.execute(query) # $ MISSING: Alert cursor.close() @app.get("/unsafe4/")