Merge pull request #7873 from RasmusWL/fix-attribute-taint

Python: Fix attribute taint
2026-07-21 03:08:25 +02:00 · 2022-02-25 15:02:24 +01:00
parent a8bfebaeb6 3e01816f0c
commit 8b926f6859
4 changed files with 67 additions and 3 deletions
--- a/python/ql/lib/change-notes/2022-02-08-fix-attribute-taint.md
+++ b/python/ql/lib/change-notes/2022-02-08-fix-attribute-taint.md
@@ -0,0 +1,4 @@
+---
+category: minorAnalysis
+---
+* Fixed taint propagation for attribute assignment. In the assignment `x.foo = tainted` we no longer treat the entire object `x` as tainted, just because the attribute `foo` contains tainted data. This leads to slightly fewer false positives.
--- a/python/ql/lib/semmle/python/dataflow/new/internal/TaintTrackingPrivate.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/internal/TaintTrackingPrivate.qll
@@ -167,8 +167,25 @@ predicate stringManipulation(DataFlow::CfgNode nodeFrom, DataFlow::CfgNode nodeT
 */
 predicate containerStep(DataFlow::CfgNode nodeFrom, DataFlow::Node nodeTo) {
  // construction by literal
-  // TODO: Not limiting the content argument here feels like a BIG hack, but we currently get nothing for free :|
-  DataFlowPrivate::storeStep(nodeFrom, _, nodeTo)
+  //
+  // TODO: once we have proper flow-summary modeling, we might not need this step any
+  // longer -- but there needs to be a matching read-step for the store-step, and we
+  // don't provide that right now.
+  DataFlowPrivate::listStoreStep(nodeFrom, _, nodeTo)
+  or
+  DataFlowPrivate::setStoreStep(nodeFrom, _, nodeTo)
+  or
+  DataFlowPrivate::tupleStoreStep(nodeFrom, _, nodeTo)
+  or
+  DataFlowPrivate::dictStoreStep(nodeFrom, _, nodeTo)
+  or
+  // comprehension, so there is taint-flow from `x` in `[x for x in xs]` to the
+  // resulting list of the list-comprehension.
+  //
+  // TODO: once we have proper flow-summary modeling, we might not need this step any
+  // longer -- but there needs to be a matching read-step for the store-step, and we
+  // don't provide that right now.
+  DataFlowPrivate::comprehensionStoreStep(nodeFrom, _, nodeTo)
  or
  // constructor call
  exists(DataFlow::CallCfgNode call | call = nodeTo |
--- a/python/ql/test/experimental/dataflow/tainttracking/defaultAdditionalTaintStep/test_attr.py
+++ b/python/ql/test/experimental/dataflow/tainttracking/defaultAdditionalTaintStep/test_attr.py
@@ -0,0 +1,43 @@
+# Add taintlib to PATH so it can be imported during runtime without any hassle
+import sys; import os; sys.path.append(os.path.dirname(os.path.dirname((__file__))))
+from taintlib import *
+
+# This has no runtime impact, but allows autocomplete to work
+from typing import TYPE_CHECKING
+if TYPE_CHECKING:
+    from ..taintlib import *
+
+
+# Actual tests
+
+class Foo:
+    def __init__(self, arg):
+        self.arg = arg
+        self.other_arg = "other_arg"
+
+
+def test_tainted_attr():
+    # The following demonstrates how tainting an attribute affected the taintedness of
+    # the object.
+    #
+    # Previously we would (wrongly) treat the object as tainted if we noticed a write of
+    # a tainted value to any of its' attributes. This lead to FP, highlighted in
+    # https://github.com/github/codeql/issues/7786
+
+    f = Foo(TAINTED_STRING)
+    ensure_not_tainted(f)
+    ensure_tainted(f.arg) # $ tainted
+    ensure_not_tainted(f.other_arg)
+
+
+    x = Foo("x")
+    ensure_not_tainted(x, x.arg, x.other_arg)
+
+    x.arg = TAINTED_STRING
+    ensure_not_tainted(x)
+    ensure_tainted(x.arg) # $ tainted
+    ensure_not_tainted(f.other_arg)
+
+
+    b = Foo("bar")
+    ensure_not_tainted(b, b.arg, b.other_arg)
--- a/python/ql/test/experimental/dataflow/tainttracking/defaultAdditionalTaintStep/test_with.py
+++ b/python/ql/test/experimental/dataflow/tainttracking/defaultAdditionalTaintStep/test_with.py
@@ -49,7 +49,7 @@ class Context_arg:
 def test_with_arg():
    ctx = Context_arg(TAINTED_STRING)
    with ctx as tainted:
-        ensure_tainted(tainted) # $ tainted
+        ensure_tainted(tainted) # $ MISSING: tainted