Python: Port taint tests to use inline expectations

The meat of this PR is described in the new python/ql/test/experimental/meta/InlineTaintTest.qll file: > Defines a InlineExpectationsTest for checking whether any arguments in > `ensure_tainted` and `ensure_not_tainted` calls are tainted. > > Also defines query predicates to ensure that: > - if any arguments to `ensure_not_tainted` are tainted, their annotation is marked with `SPURIOUS`. > - if any arguments to `ensure_tainted` are not tainted, their annotation is marked with `MISSING`. > > The functionality of this module is tested in `ql/test/experimental/meta/inline-taint-test-demo`.
2026-05-05 13:45:19 +02:00 · 2021-04-15 18:00:33 +02:00
parent 972cc47f67
commit 3e7dc12246
63 changed files with 689 additions and 1101 deletions
--- a/python/ql/test/experimental/dataflow/tainttracking/defaultAdditionalTaintStep/test_string.py
+++ b/python/ql/test/experimental/dataflow/tainttracking/defaultAdditionalTaintStep/test_string.py
@@ -22,23 +22,23 @@ def str_operations():
    tb = TAINTED_BYTES

    ensure_tainted(
-        ts,
-        ts + "foo",
-        "foo" + ts,
-        ts * 5,
-        ts[0 : len(ts)],
-        ts[:],
-        ts[0:1000],
-        ts[0],
-        str(ts),
-        bytes(tb),
-        unicode(ts),
+        ts, # $ tainted
+        ts + "foo", # $ tainted
+        "foo" + ts, # $ tainted
+        ts * 5, # $ tainted
+        ts[0 : len(ts)], # $ tainted
+        ts[:], # $ tainted
+        ts[0:1000], # $ tainted
+        ts[0], # $ tainted
+        str(ts), # $ tainted
+        bytes(tb), # $ tainted
+        unicode(ts), # $ tainted
    )

    aug_assignment = "safe"
    ensure_not_tainted(aug_assignment)
    aug_assignment += TAINTED_STRING
-    ensure_tainted(aug_assignment)
+    ensure_tainted(aug_assignment) # $ tainted


 def str_methods():
@@ -46,52 +46,54 @@ def str_methods():
    ts = TAINTED_STRING
    tb = TAINTED_BYTES
    ensure_tainted(
-        ts.capitalize(),
-        ts.center(100),
-        ts.expandtabs(),
+        ts.capitalize(), # $ tainted
+        ts.center(100), # $ tainted
+        ts.expandtabs(), # $ tainted

-        ts.format(),
-        "{}".format(ts),
-        "{unsafe}".format(unsafe=ts),
+        ts.format(), # $ tainted
+        "{}".format(ts), # $ tainted
+        "{unsafe}".format(unsafe=ts), # $ tainted

-        ts.join(["", ""]),
-        "".join([ts]),
+        ts.join(["", ""]), # $ tainted
+        "".join([ts]), # $ tainted

-        ts.ljust(100),
-        ts.lstrip(),
-        ts.lower(),
+        ts.ljust(100), # $ tainted
+        ts.lstrip(), # $ tainted
+        ts.lower(), # $ tainted

-        ts.replace("old", "new"),
-        "safe".replace("safe", ts),
+        ts.replace("old", "new"), # $ tainted
+        "safe".replace("safe", ts), # $ tainted

-        ts.rjust(100),
-        ts.rstrip(),
-        ts.strip(),
-        ts.swapcase(),
-        ts.title(),
-        ts.upper(),
-        ts.zfill(100),
+        ts.rjust(100), # $ tainted
+        ts.rstrip(), # $ tainted
+        ts.strip(), # $ tainted
+        ts.swapcase(), # $ tainted
+        ts.title(), # $ tainted
+        ts.upper(), # $ tainted
+        ts.zfill(100), # $ tainted

-        ts.encode("utf-8"),
-        ts.encode("utf-8").decode("utf-8"),
+        ts.encode("utf-8"), # $ tainted
+        ts.encode("utf-8").decode("utf-8"), # $ tainted

-        tb.decode("utf-8"),
-        tb.decode("utf-8").encode("utf-8"),
+        tb.decode("utf-8"), # $ tainted
+        tb.decode("utf-8").encode("utf-8"), # $ tainted

        # string methods that return a list
-        ts.partition("_"),
-        ts.rpartition("_"),
-        ts.rsplit("_"),
-        ts.split("_"),
-        ts.splitlines(),
+        ts.partition("_"), # $ tainted
+        ts.rpartition("_"), # $ tainted
+        ts.rsplit("_"), # $ tainted
+        ts.split("_"), # $ tainted
+        ts.splitlines(), # $ tainted
    )

    ensure_not_tainted(
        # Intuitively I think this should be safe, but better discuss it
        "safe".replace(ts, "also-safe"),

-        ts.join([]),  # FP due to separator not being used with zero/one elements
-        ts.join(["safe"]),  # FP due to separator not being used with zero/one elements
+        # FPs due to separator (`ts`) not ending up in result, when the list only has
+        # zero/one elements
+        ts.join([]), # $ SPURIOUS: tainted
+        ts.join(["safe"]),  # $ SPURIOUS: tainted
    )


@@ -101,8 +103,8 @@ def non_syntactic():
    meth = ts.upper
    _str = str
    ensure_tainted(
-        meth(),
-        _str(ts),
+        meth(), # $ MISSING: tainted
+        _str(ts), # $ MISSING: tainted
    )


@@ -111,9 +113,9 @@ def percent_fmt():
    ts = TAINTED_STRING
    tainted_fmt = ts + " %s %s"
    ensure_tainted(
-        tainted_fmt % (1, 2),
-        "%s foo bar" % ts,
-        "%s %s %s" % (1, 2, ts),
+        tainted_fmt % (1, 2), # $ tainted
+        "%s foo bar" % ts, # $ tainted
+        "%s %s %s" % (1, 2, ts), # $ tainted
    )


@@ -123,30 +125,30 @@ def binary_decode_encode():
    import base64

    ensure_tainted(
-        base64.b64encode(tb),
-        base64.b64decode(base64.b64encode(tb)),
+        base64.b64encode(tb), # $ tainted
+        base64.b64decode(base64.b64encode(tb)), # $ tainted

-        base64.standard_b64encode(tb),
-        base64.standard_b64decode(base64.standard_b64encode(tb)),
+        base64.standard_b64encode(tb), # $ tainted
+        base64.standard_b64decode(base64.standard_b64encode(tb)), # $ tainted

-        base64.urlsafe_b64encode(tb),
-        base64.urlsafe_b64decode(base64.urlsafe_b64encode(tb)),
+        base64.urlsafe_b64encode(tb), # $ tainted
+        base64.urlsafe_b64decode(base64.urlsafe_b64encode(tb)), # $ tainted

-        base64.b32encode(tb),
-        base64.b32decode(base64.b32encode(tb)),
+        base64.b32encode(tb), # $ tainted
+        base64.b32decode(base64.b32encode(tb)), # $ tainted

-        base64.b16encode(tb),
-        base64.b16decode(base64.b16encode(tb)),
+        base64.b16encode(tb), # $ tainted
+        base64.b16decode(base64.b16encode(tb)), # $ tainted

        # deprecated since Python 3.1, but still works
-        base64.encodestring(tb),
-        base64.decodestring(base64.encodestring(tb)),
+        base64.encodestring(tb), # $ tainted
+        base64.decodestring(base64.encodestring(tb)), # $ tainted
    )

    import quopri
    ensure_tainted(
-        quopri.encodestring(tb),
-        quopri.decodestring(quopri.encodestring(tb)),
+        quopri.encodestring(tb), # $ MISSING: tainted
+        quopri.decodestring(quopri.encodestring(tb)), # $ MISSING: tainted
    )


@@ -156,10 +158,10 @@ def test_os_path_join():
    print("\n# test_os_path_join")
    ts = TAINTED_STRING
    ensure_tainted(
-        os.path.join(ts, "foo", "bar"),
-        os.path.join(ts),
-        os.path.join("foo", "bar", ts),
-        ospath_alias.join("foo", "bar", ts),
+        os.path.join(ts, "foo", "bar"), # $ tainted
+        os.path.join(ts), # $ tainted
+        os.path.join("foo", "bar", ts), # $ tainted
+        ospath_alias.join("foo", "bar", ts), # $ tainted
    )