Python: Add tests for shared taint tracking for strings

I adopted the TestTaint testing setup that I made for the "old" taint tracking
tests. This time around we should figure out if we can use .qlref or similar so
it doesn't end up in multiple copies that are not kept up to date :|

The `repr` predicate could probably be placed somewhere better. For now I just
wanted something that could help me. I considered just expanding the `repr`
predicate in `ql/src/semmle/python/strings.qll`, but since it's currently used
by queries, I didn't want to do anything about it.

Anyway, the output it gives is much more useful than seeing this ;)

```
| test.py:20 | ok   | str_operations | test.py:20:9:20:10 | ts |
| test.py:21 | fail | str_operations | test.py:21:9:21:18 | BinaryExpr |
| test.py:22 | fail | str_operations | test.py:22:9:22:18 | BinaryExpr |
| test.py:23 | fail | str_operations | test.py:23:9:23:21 | Subscript |
| test.py:24 | fail | str_operations | test.py:24:9:24:13 | Subscript |
| test.py:25 | fail | str_operations | test.py:25:9:25:18 | Subscript |
| test.py:26 | fail | str_operations | test.py:26:9:26:13 | Subscript |
| test.py:27 | fail | str_operations | test.py:27:9:27:15 | str() |
| test.py:35 | fail | str_methods | test.py:35:9:35:23 | Attribute() |
| test.py:36 | fail | str_methods | test.py:36:9:36:21 | Attribute() |
| test.py:37 | fail | str_methods | test.py:37:9:37:22 | Attribute() |
| test.py:38 | fail | str_methods | test.py:38:9:38:23 | Attribute() |
| test.py:40 | fail | str_methods | test.py:40:9:40:19 | Attribute() |
| test.py:41 | fail | str_methods | test.py:41:9:41:23 | Attribute() |
| test.py:42 | fail | str_methods | test.py:42:9:42:36 | Attribute() |
| test.py:44 | fail | str_methods | test.py:44:9:44:25 | Attribute() |
| test.py:45 | fail | str_methods | test.py:45:9:45:45 | Attribute() |
| test.py:47 | fail | str_methods | test.py:47:9:47:21 | Attribute() |
| test.py:48 | fail | str_methods | test.py:48:9:48:19 | Attribute() |
| test.py:49 | fail | str_methods | test.py:49:9:49:18 | Attribute() |
| test.py:51 | fail | str_methods | test.py:51:9:51:32 | Attribute() |
| test.py:52 | fail | str_methods | test.py:52:9:52:34 | Attribute() |
| test.py:54 | fail | str_methods | test.py:54:9:54:21 | Attribute() |
| test.py:55 | fail | str_methods | test.py:55:9:55:19 | Attribute() |
| test.py:56 | fail | str_methods | test.py:56:9:56:18 | Attribute() |
| test.py:57 | fail | str_methods | test.py:57:9:57:21 | Attribute() |
| test.py:58 | fail | str_methods | test.py:58:9:58:18 | Attribute() |
| test.py:59 | fail | str_methods | test.py:59:9:59:18 | Attribute() |
| test.py:60 | fail | str_methods | test.py:60:9:60:21 | Attribute() |
| test.py:62 | fail | str_methods | test.py:62:9:62:26 | Attribute() |
| test.py:63 | fail | str_methods | test.py:63:9:63:42 | Attribute() |
| test.py:65 | fail | str_methods | test.py:65:9:65:26 | Attribute() |
| test.py:66 | fail | str_methods | test.py:66:9:66:42 | Attribute() |
| test.py:69 | fail | str_methods | test.py:69:9:69:25 | Attribute() |
| test.py:70 | fail | str_methods | test.py:70:9:70:26 | Attribute() |
| test.py:71 | fail | str_methods | test.py:71:9:71:22 | Attribute() |
| test.py:72 | fail | str_methods | test.py:72:9:72:21 | Attribute() |
| test.py:73 | fail | str_methods | test.py:73:9:73:23 | Attribute() |
| test.py:78 | ok   | str_methods | test.py:78:9:78:39 | Attribute() |
```
This commit is contained in:
Rasmus Wriedt Larsen
2020-08-20 16:55:34 +02:00
parent 765c40ef03
commit 61f89ca3c3
3 changed files with 218 additions and 0 deletions

View File

@@ -0,0 +1,45 @@
| test.py:24 | ok | str_operations | ts |
| test.py:25 | fail | str_operations | BinaryExpr |
| test.py:26 | fail | str_operations | BinaryExpr |
| test.py:27 | fail | str_operations | ts[Slice] |
| test.py:28 | fail | str_operations | ts[Slice] |
| test.py:29 | fail | str_operations | ts[Slice] |
| test.py:30 | fail | str_operations | ts[0] |
| test.py:31 | fail | str_operations | str(..) |
| test.py:40 | fail | str_methods | ts.capitalize() |
| test.py:41 | fail | str_methods | ts.casefold() |
| test.py:42 | fail | str_methods | ts.center(..) |
| test.py:43 | fail | str_methods | ts.expandtabs() |
| test.py:45 | fail | str_methods | ts.format() |
| test.py:46 | fail | str_methods | "{}".format(..) |
| test.py:47 | fail | str_methods | "{unsafe}".format(..) |
| test.py:49 | fail | str_methods | ts.format_map(..) |
| test.py:50 | fail | str_methods | "{unsafe}".format_map(..) |
| test.py:52 | fail | str_methods | ts.join(..) |
| test.py:53 | fail | str_methods | "".join(..) |
| test.py:55 | fail | str_methods | ts.ljust(..) |
| test.py:56 | fail | str_methods | ts.lstrip() |
| test.py:57 | fail | str_methods | ts.lower() |
| test.py:59 | fail | str_methods | ts.replace(..) |
| test.py:60 | fail | str_methods | "safe".replace(..) |
| test.py:62 | fail | str_methods | ts.rjust(..) |
| test.py:63 | fail | str_methods | ts.rstrip() |
| test.py:64 | fail | str_methods | ts.strip() |
| test.py:65 | fail | str_methods | ts.swapcase() |
| test.py:66 | fail | str_methods | ts.title() |
| test.py:67 | fail | str_methods | ts.upper() |
| test.py:68 | fail | str_methods | ts.zfill(..) |
| test.py:70 | fail | str_methods | ts.encode(..) |
| test.py:71 | fail | str_methods | ts.encode(..).decode(..) |
| test.py:73 | fail | str_methods | tb.decode(..) |
| test.py:74 | fail | str_methods | tb.decode(..).encode(..) |
| test.py:77 | fail | str_methods | ts.partition(..) |
| test.py:78 | fail | str_methods | ts.rpartition(..) |
| test.py:79 | fail | str_methods | ts.rsplit(..) |
| test.py:80 | fail | str_methods | ts.split(..) |
| test.py:81 | fail | str_methods | ts.splitlines() |
| test.py:86 | ok | str_methods | "safe".replace(..) |
| test.py:88 | ok | str_methods | ts.join(..) |
| test.py:89 | ok | str_methods | ts.join(..) |
| test.py:99 | fail | non_syntactic | meth() |
| test.py:100 | fail | non_syntactic | _str(..) |

View File

@@ -0,0 +1,66 @@
import python
import experimental.dataflow.TaintTracking
import experimental.dataflow.DataFlow
class TestTaintTrackingConfiguration extends TaintTracking::Configuration {
TestTaintTrackingConfiguration() { this = "TestTaintTrackingConfiguration" }
override predicate isSource(DataFlow::Node source) {
source.(DataFlow::CfgNode).getNode().(NameNode).getId() in ["TAINTED_STRING", "TAINTED_BYTES"]
}
override predicate isSink(DataFlow::Node sink) {
exists(CallNode call |
call.getFunction().(NameNode).getId() in ["ensure_tainted", "ensure_not_tainted"] and
sink.(DataFlow::CfgNode).getNode() = call.getAnArg()
)
}
}
private string repr(Expr e) {
not e instanceof Num and
not e instanceof StrConst and
not e instanceof Subscript and
not e instanceof Call and
not e instanceof Attribute and
result = e.toString()
or
result = e.(Num).getN()
or
result =
e.(StrConst).getPrefix() + e.(StrConst).getText() +
e.(StrConst).getPrefix().regexpReplaceAll("[a-zA-Z]+", "")
or
result = repr(e.(Subscript).getObject()) + "[" + repr(e.(Subscript).getIndex()) + "]"
or
(
if exists(e.(Call).getAnArg()) or exists(e.(Call).getANamedArg())
then result = repr(e.(Call).getFunc()) + "(..)"
else result = repr(e.(Call).getFunc()) + "()"
)
or
result = repr(e.(Attribute).getObject()) + "." + e.(Attribute).getName()
}
from Call call, Expr arg, boolean expected_taint, boolean has_taint, string test_res
where
call.getLocation().getFile().getShortName() = "test.py" and
(
call.getFunc().(Name).getId() = "ensure_tainted" and
expected_taint = true
or
call.getFunc().(Name).getId() = "ensure_not_tainted" and
expected_taint = false
) and
arg = call.getAnArg() and
(
// TODO: Replace with `hasFlowToExpr` once that is working
if
exists(TaintTracking::Configuration c |
c.hasFlowTo(any(DataFlow::Node n | n.(DataFlow::CfgNode).getNode() = arg.getAFlowNode()))
)
then has_taint = true
else has_taint = false
) and
if expected_taint = has_taint then test_res = "ok " else test_res = "fail"
select arg.getLocation().toString(), test_res, call.getScope().(Function).getName(), repr(arg)

View File

@@ -0,0 +1,107 @@
TAINTED_STRING = "TAINTED_STRING"
TAINTED_BYTES = b"TAINTED_BYTES"
def ensure_tainted(*args):
print("- ensure_tainted")
for i, arg in enumerate(args):
print("arg {}: {!r}".format(i, arg))
def ensure_not_tainted(*args):
print("- ensure_not_tainted")
for i, arg in enumerate(args):
print("arg {}: {!r}".format(i, arg))
# Actual tests
def str_operations():
print("\n# str_operations")
ts = TAINTED_STRING
ensure_tainted(
ts,
ts + "foo",
"foo" + ts,
ts[0 : len(ts)],
ts[:],
ts[0:1000],
ts[0],
str(ts),
)
def str_methods():
print("\n# str_methods")
ts = TAINTED_STRING
tb = TAINTED_BYTES
ensure_tainted(
ts.capitalize(),
ts.casefold(),
ts.center(100),
ts.expandtabs(),
ts.format(),
"{}".format(ts),
"{unsafe}".format(unsafe=ts),
ts.format_map({}),
"{unsafe}".format_map({"unsafe": ts}),
ts.join(["", ""]),
"".join([ts]),
ts.ljust(100),
ts.lstrip(),
ts.lower(),
ts.replace("old", "new"),
"safe".replace("safe", ts),
ts.rjust(100),
ts.rstrip(),
ts.strip(),
ts.swapcase(),
ts.title(),
ts.upper(),
ts.zfill(100),
ts.encode("utf-8"),
ts.encode("utf-8").decode("utf-8"),
tb.decode("utf-8"),
tb.decode("utf-8").encode("utf-8"),
# string methods that return a list
ts.partition("_"),
ts.rpartition("_"),
ts.rsplit("_"),
ts.split("_"),
ts.splitlines(),
)
ensure_not_tainted(
# Intuitively I think this should be safe, but better discuss it
"safe".replace(ts, "also-safe"),
ts.join([]), # FP due to separator not being used with zero/one elements
ts.join(["safe"]), # FP due to separator not being used with zero/one elements
)
def non_syntactic():
print("\n# non_syntactic")
ts = TAINTED_STRING
meth = ts.upper
_str = str
ensure_tainted(
meth(),
_str(ts),
)
# Make tests runable
str_operations()
str_methods()
non_syntactic()