Python: Add tests for shared taint tracking for strings

I adopted the TestTaint testing setup that I made for the "old" taint tracking tests. This time around we should figure out if we can use .qlref or similar so it doesn't end up in multiple copies that are not kept up to date :| The `repr` predicate could probably be placed somewhere better. For now I just wanted something that could help me. I considered just expanding the `repr` predicate in `ql/src/semmle/python/strings.qll`, but since it's currently used by queries, I didn't want to do anything about it. Anyway, the output it gives is much more useful than seeing this ;) ``` | test.py:20 | ok | str_operations | test.py:20:9:20:10 | ts | | test.py:21 | fail | str_operations | test.py:21:9:21:18 | BinaryExpr | | test.py:22 | fail | str_operations | test.py:22:9:22:18 | BinaryExpr | | test.py:23 | fail | str_operations | test.py:23:9:23:21 | Subscript | | test.py:24 | fail | str_operations | test.py:24:9:24:13 | Subscript | | test.py:25 | fail | str_operations | test.py:25:9:25:18 | Subscript | | test.py:26 | fail | str_operations | test.py:26:9:26:13 | Subscript | | test.py:27 | fail | str_operations | test.py:27:9:27:15 | str() | | test.py:35 | fail | str_methods | test.py:35:9:35:23 | Attribute() | | test.py:36 | fail | str_methods | test.py:36:9:36:21 | Attribute() | | test.py:37 | fail | str_methods | test.py:37:9:37:22 | Attribute() | | test.py:38 | fail | str_methods | test.py:38:9:38:23 | Attribute() | | test.py:40 | fail | str_methods | test.py:40:9:40:19 | Attribute() | | test.py:41 | fail | str_methods | test.py:41:9:41:23 | Attribute() | | test.py:42 | fail | str_methods | test.py:42:9:42:36 | Attribute() | | test.py:44 | fail | str_methods | test.py:44:9:44:25 | Attribute() | | test.py:45 | fail | str_methods | test.py:45:9:45:45 | Attribute() | | test.py:47 | fail | str_methods | test.py:47:9:47:21 | Attribute() | | test.py:48 | fail | str_methods | test.py:48:9:48:19 | Attribute() | | test.py:49 | fail | str_methods | test.py:49:9:49:18 | Attribute() | | test.py:51 | fail | str_methods | test.py:51:9:51:32 | Attribute() | | test.py:52 | fail | str_methods | test.py:52:9:52:34 | Attribute() | | test.py:54 | fail | str_methods | test.py:54:9:54:21 | Attribute() | | test.py:55 | fail | str_methods | test.py:55:9:55:19 | Attribute() | | test.py:56 | fail | str_methods | test.py:56:9:56:18 | Attribute() | | test.py:57 | fail | str_methods | test.py:57:9:57:21 | Attribute() | | test.py:58 | fail | str_methods | test.py:58:9:58:18 | Attribute() | | test.py:59 | fail | str_methods | test.py:59:9:59:18 | Attribute() | | test.py:60 | fail | str_methods | test.py:60:9:60:21 | Attribute() | | test.py:62 | fail | str_methods | test.py:62:9:62:26 | Attribute() | | test.py:63 | fail | str_methods | test.py:63:9:63:42 | Attribute() | | test.py:65 | fail | str_methods | test.py:65:9:65:26 | Attribute() | | test.py:66 | fail | str_methods | test.py:66:9:66:42 | Attribute() | | test.py:69 | fail | str_methods | test.py:69:9:69:25 | Attribute() | | test.py:70 | fail | str_methods | test.py:70:9:70:26 | Attribute() | | test.py:71 | fail | str_methods | test.py:71:9:71:22 | Attribute() | | test.py:72 | fail | str_methods | test.py:72:9:72:21 | Attribute() | | test.py:73 | fail | str_methods | test.py:73:9:73:23 | Attribute() | | test.py:78 | ok | str_methods | test.py:78:9:78:39 | Attribute() | ```
2026-05-02 12:15:17 +02:00 · 2020-08-20 16:55:34 +02:00
parent 765c40ef03
commit 61f89ca3c3
3 changed files with 218 additions and 0 deletions
--- a/python/ql/test/experimental/dataflow/tainttracking/string/TestTaint.expected
+++ b/python/ql/test/experimental/dataflow/tainttracking/string/TestTaint.expected
@@ -0,0 +1,45 @@
+| test.py:24 | ok   | str_operations | ts |
+| test.py:25 | fail | str_operations | BinaryExpr |
+| test.py:26 | fail | str_operations | BinaryExpr |
+| test.py:27 | fail | str_operations | ts[Slice] |
+| test.py:28 | fail | str_operations | ts[Slice] |
+| test.py:29 | fail | str_operations | ts[Slice] |
+| test.py:30 | fail | str_operations | ts[0] |
+| test.py:31 | fail | str_operations | str(..) |
+| test.py:40 | fail | str_methods | ts.capitalize() |
+| test.py:41 | fail | str_methods | ts.casefold() |
+| test.py:42 | fail | str_methods | ts.center(..) |
+| test.py:43 | fail | str_methods | ts.expandtabs() |
+| test.py:45 | fail | str_methods | ts.format() |
+| test.py:46 | fail | str_methods | "{}".format(..) |
+| test.py:47 | fail | str_methods | "{unsafe}".format(..) |
+| test.py:49 | fail | str_methods | ts.format_map(..) |
+| test.py:50 | fail | str_methods | "{unsafe}".format_map(..) |
+| test.py:52 | fail | str_methods | ts.join(..) |
+| test.py:53 | fail | str_methods | "".join(..) |
+| test.py:55 | fail | str_methods | ts.ljust(..) |
+| test.py:56 | fail | str_methods | ts.lstrip() |
+| test.py:57 | fail | str_methods | ts.lower() |
+| test.py:59 | fail | str_methods | ts.replace(..) |
+| test.py:60 | fail | str_methods | "safe".replace(..) |
+| test.py:62 | fail | str_methods | ts.rjust(..) |
+| test.py:63 | fail | str_methods | ts.rstrip() |
+| test.py:64 | fail | str_methods | ts.strip() |
+| test.py:65 | fail | str_methods | ts.swapcase() |
+| test.py:66 | fail | str_methods | ts.title() |
+| test.py:67 | fail | str_methods | ts.upper() |
+| test.py:68 | fail | str_methods | ts.zfill(..) |
+| test.py:70 | fail | str_methods | ts.encode(..) |
+| test.py:71 | fail | str_methods | ts.encode(..).decode(..) |
+| test.py:73 | fail | str_methods | tb.decode(..) |
+| test.py:74 | fail | str_methods | tb.decode(..).encode(..) |
+| test.py:77 | fail | str_methods | ts.partition(..) |
+| test.py:78 | fail | str_methods | ts.rpartition(..) |
+| test.py:79 | fail | str_methods | ts.rsplit(..) |
+| test.py:80 | fail | str_methods | ts.split(..) |
+| test.py:81 | fail | str_methods | ts.splitlines() |
+| test.py:86 | ok   | str_methods | "safe".replace(..) |
+| test.py:88 | ok   | str_methods | ts.join(..) |
+| test.py:89 | ok   | str_methods | ts.join(..) |
+| test.py:99 | fail | non_syntactic | meth() |
+| test.py:100 | fail | non_syntactic | _str(..) |
--- a/python/ql/test/experimental/dataflow/tainttracking/string/TestTaint.ql
+++ b/python/ql/test/experimental/dataflow/tainttracking/string/TestTaint.ql
@@ -0,0 +1,66 @@
+import python
+import experimental.dataflow.TaintTracking
+import experimental.dataflow.DataFlow
+
+class TestTaintTrackingConfiguration extends TaintTracking::Configuration {
+  TestTaintTrackingConfiguration() { this = "TestTaintTrackingConfiguration" }
+
+  override predicate isSource(DataFlow::Node source) {
+    source.(DataFlow::CfgNode).getNode().(NameNode).getId() in ["TAINTED_STRING", "TAINTED_BYTES"]
+  }
+
+  override predicate isSink(DataFlow::Node sink) {
+    exists(CallNode call |
+      call.getFunction().(NameNode).getId() in ["ensure_tainted", "ensure_not_tainted"] and
+      sink.(DataFlow::CfgNode).getNode() = call.getAnArg()
+    )
+  }
+}
+
+private string repr(Expr e) {
+  not e instanceof Num and
+  not e instanceof StrConst and
+  not e instanceof Subscript and
+  not e instanceof Call and
+  not e instanceof Attribute and
+  result = e.toString()
+  or
+  result = e.(Num).getN()
+  or
+  result =
+    e.(StrConst).getPrefix() + e.(StrConst).getText() +
+      e.(StrConst).getPrefix().regexpReplaceAll("[a-zA-Z]+", "")
+  or
+  result = repr(e.(Subscript).getObject()) + "[" + repr(e.(Subscript).getIndex()) + "]"
+  or
+  (
+    if exists(e.(Call).getAnArg()) or exists(e.(Call).getANamedArg())
+    then result = repr(e.(Call).getFunc()) + "(..)"
+    else result = repr(e.(Call).getFunc()) + "()"
+  )
+  or
+  result = repr(e.(Attribute).getObject()) + "." + e.(Attribute).getName()
+}
+
+from Call call, Expr arg, boolean expected_taint, boolean has_taint, string test_res
+where
+  call.getLocation().getFile().getShortName() = "test.py" and
+  (
+    call.getFunc().(Name).getId() = "ensure_tainted" and
+    expected_taint = true
+    or
+    call.getFunc().(Name).getId() = "ensure_not_tainted" and
+    expected_taint = false
+  ) and
+  arg = call.getAnArg() and
+  (
+    // TODO: Replace with `hasFlowToExpr` once that is working
+    if
+      exists(TaintTracking::Configuration c |
+        c.hasFlowTo(any(DataFlow::Node n | n.(DataFlow::CfgNode).getNode() = arg.getAFlowNode()))
+      )
+    then has_taint = true
+    else has_taint = false
+  ) and
+  if expected_taint = has_taint then test_res = "ok  " else test_res = "fail"
+select arg.getLocation().toString(), test_res, call.getScope().(Function).getName(), repr(arg)
--- a/python/ql/test/experimental/dataflow/tainttracking/string/test.py
+++ b/python/ql/test/experimental/dataflow/tainttracking/string/test.py
@@ -0,0 +1,107 @@
+TAINTED_STRING = "TAINTED_STRING"
+TAINTED_BYTES = b"TAINTED_BYTES"
+
+
+def ensure_tainted(*args):
+    print("- ensure_tainted")
+    for i, arg in enumerate(args):
+        print("arg {}: {!r}".format(i, arg))
+
+
+def ensure_not_tainted(*args):
+    print("- ensure_not_tainted")
+    for i, arg in enumerate(args):
+        print("arg {}: {!r}".format(i, arg))
+
+
+# Actual tests
+
+
+def str_operations():
+    print("\n# str_operations")
+    ts = TAINTED_STRING
+    ensure_tainted(
+        ts,
+        ts + "foo",
+        "foo" + ts,
+        ts[0 : len(ts)],
+        ts[:],
+        ts[0:1000],
+        ts[0],
+        str(ts),
+    )
+
+
+def str_methods():
+    print("\n# str_methods")
+    ts = TAINTED_STRING
+    tb = TAINTED_BYTES
+    ensure_tainted(
+        ts.capitalize(),
+        ts.casefold(),
+        ts.center(100),
+        ts.expandtabs(),
+
+        ts.format(),
+        "{}".format(ts),
+        "{unsafe}".format(unsafe=ts),
+
+        ts.format_map({}),
+        "{unsafe}".format_map({"unsafe": ts}),
+
+        ts.join(["", ""]),
+        "".join([ts]),
+
+        ts.ljust(100),
+        ts.lstrip(),
+        ts.lower(),
+
+        ts.replace("old", "new"),
+        "safe".replace("safe", ts),
+
+        ts.rjust(100),
+        ts.rstrip(),
+        ts.strip(),
+        ts.swapcase(),
+        ts.title(),
+        ts.upper(),
+        ts.zfill(100),
+
+        ts.encode("utf-8"),
+        ts.encode("utf-8").decode("utf-8"),
+
+        tb.decode("utf-8"),
+        tb.decode("utf-8").encode("utf-8"),
+
+        # string methods that return a list
+        ts.partition("_"),
+        ts.rpartition("_"),
+        ts.rsplit("_"),
+        ts.split("_"),
+        ts.splitlines(),
+    )
+
+    ensure_not_tainted(
+        # Intuitively I think this should be safe, but better discuss it
+        "safe".replace(ts, "also-safe"),
+
+        ts.join([]),  # FP due to separator not being used with zero/one elements
+        ts.join(["safe"]),  # FP due to separator not being used with zero/one elements
+    )
+
+
+def non_syntactic():
+    print("\n# non_syntactic")
+    ts = TAINTED_STRING
+    meth = ts.upper
+    _str = str
+    ensure_tainted(
+        meth(),
+        _str(ts),
+    )
+
+# Make tests runable
+
+str_operations()
+str_methods()
+non_syntactic()