From b72c93ff4fa25b3fdd4ab284a9ee2631dbc14047 Mon Sep 17 00:00:00 2001 From: Rasmus Lerchedahl Petersen Date: Fri, 9 Jun 2023 13:11:55 +0200 Subject: [PATCH 1/6] python: remove remaining explicit taint steps --- .../new/internal/TaintTrackingPrivate.qll | 19 ------------ .../dataflow/summaries/summaries.expected | 3 -- .../test_collections.py | 28 ++++++++--------- .../Security/CWE-022-TarSlip/TarSlip.expected | 31 ------------------- .../UnsafeUnpack.expected | 10 ------ .../CWE-022-UnsafeUnpacking/UnsafeUnpack.py | 2 +- .../frameworks/aiohttp/taint_test.py | 6 ++-- .../frameworks/multidict/taint_test.py | 12 +++---- .../frameworks/stdlib/http_server.py | 4 +-- 9 files changed, 26 insertions(+), 89 deletions(-) diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/TaintTrackingPrivate.qll b/python/ql/lib/semmle/python/dataflow/new/internal/TaintTrackingPrivate.qll index 78fb529b05a..6adab46f246 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/TaintTrackingPrivate.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/TaintTrackingPrivate.qll @@ -183,25 +183,6 @@ predicate containerStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) { // longer -- but there needs to be a matching read-step for the store-step, and we // don't provide that right now. DataFlowPrivate::comprehensionStoreStep(nodeFrom, _, nodeTo) - or - // functions operating on collections - exists(DataFlow::CallCfgNode call | call = nodeTo | - call = API::builtin(["sorted", "reversed", "iter", "next"]).getACall() and - call.getArg(0) = nodeFrom - ) - or - // dict methods - exists(DataFlow::MethodCallNode call, string methodName | call = nodeTo | - methodName in ["values", "items"] and - call.calls(nodeFrom, methodName) - ) - or - // list.append, set.add - exists(DataFlow::MethodCallNode call, DataFlow::Node obj | - call.calls(obj, ["append", "add"]) and - obj = nodeTo.(DataFlow::PostUpdateNode).getPreUpdateNode() and - call.getArg(0) = nodeFrom - ) } /** diff --git a/python/ql/test/experimental/dataflow/summaries/summaries.expected b/python/ql/test/experimental/dataflow/summaries/summaries.expected index b566cbdedc6..47cab4224a4 100644 --- a/python/ql/test/experimental/dataflow/summaries/summaries.expected +++ b/python/ql/test/experimental/dataflow/summaries/summaries.expected @@ -4,9 +4,7 @@ edges | summaries.py:36:18:36:54 | ControlFlowNode for apply_lambda() | summaries.py:37:6:37:19 | ControlFlowNode for tainted_lambda | | summaries.py:36:48:36:53 | ControlFlowNode for SOURCE | summaries.py:36:18:36:54 | ControlFlowNode for apply_lambda() | | summaries.py:44:16:44:33 | ControlFlowNode for reversed() [List element] | summaries.py:45:6:45:17 | ControlFlowNode for tainted_list [List element] | -| summaries.py:44:25:44:32 | ControlFlowNode for List | summaries.py:45:6:45:20 | ControlFlowNode for Subscript | | summaries.py:44:25:44:32 | ControlFlowNode for List [List element] | summaries.py:44:16:44:33 | ControlFlowNode for reversed() [List element] | -| summaries.py:44:26:44:31 | ControlFlowNode for SOURCE | summaries.py:44:25:44:32 | ControlFlowNode for List | | summaries.py:44:26:44:31 | ControlFlowNode for SOURCE | summaries.py:44:25:44:32 | ControlFlowNode for List [List element] | | summaries.py:45:6:45:17 | ControlFlowNode for tainted_list [List element] | summaries.py:45:6:45:20 | ControlFlowNode for Subscript | | summaries.py:51:18:51:46 | ControlFlowNode for list_map() [List element] | summaries.py:52:6:52:19 | ControlFlowNode for tainted_mapped [List element] | @@ -36,7 +34,6 @@ nodes | summaries.py:36:48:36:53 | ControlFlowNode for SOURCE | semmle.label | ControlFlowNode for SOURCE | | summaries.py:37:6:37:19 | ControlFlowNode for tainted_lambda | semmle.label | ControlFlowNode for tainted_lambda | | summaries.py:44:16:44:33 | ControlFlowNode for reversed() [List element] | semmle.label | ControlFlowNode for reversed() [List element] | -| summaries.py:44:25:44:32 | ControlFlowNode for List | semmle.label | ControlFlowNode for List | | summaries.py:44:25:44:32 | ControlFlowNode for List [List element] | semmle.label | ControlFlowNode for List [List element] | | summaries.py:44:26:44:31 | ControlFlowNode for SOURCE | semmle.label | ControlFlowNode for SOURCE | | summaries.py:45:6:45:17 | ControlFlowNode for tainted_list [List element] | semmle.label | ControlFlowNode for tainted_list [List element] | diff --git a/python/ql/test/experimental/dataflow/tainttracking/defaultAdditionalTaintStep/test_collections.py b/python/ql/test/experimental/dataflow/tainttracking/defaultAdditionalTaintStep/test_collections.py index 50f9a613f9b..1eaa5e44aa2 100644 --- a/python/ql/test/experimental/dataflow/tainttracking/defaultAdditionalTaintStep/test_collections.py +++ b/python/ql/test/experimental/dataflow/tainttracking/defaultAdditionalTaintStep/test_collections.py @@ -31,8 +31,8 @@ def test_construction(): list(tainted_list), # $ tainted list(tainted_tuple), # $ tainted list(tainted_set), # $ tainted - list(tainted_dict.values()), # $ tainted - list(tainted_dict.items()), # $ tainted + list(tainted_dict.values()), # $ MISSING: tainted + list(tainted_dict.items()), # $ MISSING: tainted tuple(tainted_list), # $ tainted set(tainted_list), # $ tainted @@ -56,10 +56,10 @@ def test_access(x, y, z): tainted_list[x], # $ tainted tainted_list[y:z], # $ tainted - sorted(tainted_list), # $ tainted - reversed(tainted_list), # $ tainted - iter(tainted_list), # $ tainted - next(iter(tainted_list)), # $ tainted + sorted(tainted_list), # $ MISSING: tainted + reversed(tainted_list), # $ MISSING: tainted + iter(tainted_list), # $ MISSING: tainted + next(iter(tainted_list)), # $ MISSING: tainted [i for i in tainted_list], # $ tainted [tainted_list for _i in [1,2,3]], # $ MISSING: tainted ) @@ -70,7 +70,7 @@ def test_access(x, y, z): for h in tainted_list: ensure_tainted(h) # $ tainted for i in reversed(tainted_list): - ensure_tainted(i) # $ tainted + ensure_tainted(i) # $ MISSING: tainted def test_access_explicit(x, y, z): tainted_list = [TAINTED_STRING] @@ -80,10 +80,10 @@ def test_access_explicit(x, y, z): tainted_list[x], # $ tainted tainted_list[y:z], # $ tainted - sorted(tainted_list)[0], # $ tainted + sorted(tainted_list)[0], # $ MISSING: tainted reversed(tainted_list)[0], # $ tainted - iter(tainted_list), # $ tainted - next(iter(tainted_list)), # $ tainted + iter(tainted_list), # $ MISSING: tainted + next(iter(tainted_list)), # $ MISSING: tainted [i for i in tainted_list], # $ tainted [tainted_list for i in [1,2,3]], # $ MISSING: tainted [TAINTED_STRING for i in [1,2,3]], # $ tainted @@ -109,9 +109,9 @@ def test_dict_access(x): ) for v in tainted_dict.values(): - ensure_tainted(v) # $ tainted + ensure_tainted(v) # $ MISSING: tainted for k, v in tainted_dict.items(): - ensure_tainted(v) # $ tainted + ensure_tainted(v) # $ MISSING: tainted def test_named_tuple(): # TODO: namedtuple currently not handled @@ -194,7 +194,7 @@ def list_append(): ensure_not_tainted(my_list) my_list.append(tainted_string) - ensure_tainted(my_list) # $ tainted + ensure_tainted(my_list) # $ MISSING: tainted def list_extend(): @@ -262,7 +262,7 @@ def set_add(): ensure_not_tainted(my_set) my_set.add(tainted_string) - ensure_tainted(my_set) # $ tainted + ensure_tainted(my_set) # $ MISSING: tainted # Make tests runable diff --git a/python/ql/test/experimental/query-tests/Security/CWE-022-TarSlip/TarSlip.expected b/python/ql/test/experimental/query-tests/Security/CWE-022-TarSlip/TarSlip.expected index 9a5571a8033..96a0dea5697 100644 --- a/python/ql/test/experimental/query-tests/Security/CWE-022-TarSlip/TarSlip.expected +++ b/python/ql/test/experimental/query-tests/Security/CWE-022-TarSlip/TarSlip.expected @@ -1,11 +1,4 @@ edges -| TarSlipImprov.py:15:7:15:39 | ControlFlowNode for Attribute() | TarSlipImprov.py:17:5:17:10 | GSSA Variable member | -| TarSlipImprov.py:17:5:17:10 | GSSA Variable member | TarSlipImprov.py:22:35:22:40 | ControlFlowNode for result | -| TarSlipImprov.py:26:21:26:27 | ControlFlowNode for tarfile | TarSlipImprov.py:28:9:28:14 | SSA variable member | -| TarSlipImprov.py:28:9:28:14 | SSA variable member | TarSlipImprov.py:36:12:36:17 | ControlFlowNode for result | -| TarSlipImprov.py:38:7:38:39 | ControlFlowNode for Attribute() | TarSlipImprov.py:39:65:39:67 | ControlFlowNode for tar | -| TarSlipImprov.py:39:65:39:67 | ControlFlowNode for tar | TarSlipImprov.py:26:21:26:27 | ControlFlowNode for tarfile | -| TarSlipImprov.py:39:65:39:67 | ControlFlowNode for tar | TarSlipImprov.py:39:49:39:68 | ControlFlowNode for members_filter1() | | TarSlipImprov.py:43:6:43:38 | ControlFlowNode for Attribute() | TarSlipImprov.py:44:9:44:13 | GSSA Variable entry | | TarSlipImprov.py:44:9:44:13 | GSSA Variable entry | TarSlipImprov.py:47:21:47:25 | ControlFlowNode for entry | | TarSlipImprov.py:54:6:54:38 | ControlFlowNode for Attribute() | TarSlipImprov.py:56:9:56:13 | GSSA Variable entry | @@ -26,8 +19,6 @@ edges | TarSlipImprov.py:188:7:188:27 | ControlFlowNode for Attribute() | TarSlipImprov.py:189:1:189:3 | ControlFlowNode for tar | | TarSlipImprov.py:193:6:193:31 | ControlFlowNode for Attribute() | TarSlipImprov.py:194:49:194:51 | ControlFlowNode for tar | | TarSlipImprov.py:210:6:210:43 | ControlFlowNode for Attribute() | TarSlipImprov.py:211:5:211:7 | ControlFlowNode for tar | -| TarSlipImprov.py:231:6:231:38 | ControlFlowNode for Attribute() | TarSlipImprov.py:233:9:233:9 | GSSA Variable f | -| TarSlipImprov.py:233:9:233:9 | GSSA Variable f | TarSlipImprov.py:236:44:236:50 | ControlFlowNode for members | | TarSlipImprov.py:258:6:258:26 | ControlFlowNode for Attribute() | TarSlipImprov.py:259:9:259:13 | GSSA Variable entry | | TarSlipImprov.py:259:9:259:13 | GSSA Variable entry | TarSlipImprov.py:261:25:261:29 | ControlFlowNode for entry | | TarSlipImprov.py:264:6:264:38 | ControlFlowNode for Attribute() | TarSlipImprov.py:265:9:265:13 | GSSA Variable entry | @@ -40,18 +31,7 @@ edges | TarSlipImprov.py:287:7:287:28 | ControlFlowNode for Attribute() | TarSlipImprov.py:288:49:288:51 | ControlFlowNode for tar | | TarSlipImprov.py:292:7:292:39 | ControlFlowNode for Attribute() | TarSlipImprov.py:293:1:293:3 | ControlFlowNode for tar | | TarSlipImprov.py:300:6:300:51 | ControlFlowNode for Attribute() | TarSlipImprov.py:301:49:301:51 | ControlFlowNode for tar | -| TarSlipImprov.py:304:7:304:39 | ControlFlowNode for Attribute() | TarSlipImprov.py:306:5:306:10 | GSSA Variable member | -| TarSlipImprov.py:306:5:306:10 | GSSA Variable member | TarSlipImprov.py:310:49:310:54 | ControlFlowNode for result | nodes -| TarSlipImprov.py:15:7:15:39 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() | -| TarSlipImprov.py:17:5:17:10 | GSSA Variable member | semmle.label | GSSA Variable member | -| TarSlipImprov.py:22:35:22:40 | ControlFlowNode for result | semmle.label | ControlFlowNode for result | -| TarSlipImprov.py:26:21:26:27 | ControlFlowNode for tarfile | semmle.label | ControlFlowNode for tarfile | -| TarSlipImprov.py:28:9:28:14 | SSA variable member | semmle.label | SSA variable member | -| TarSlipImprov.py:36:12:36:17 | ControlFlowNode for result | semmle.label | ControlFlowNode for result | -| TarSlipImprov.py:38:7:38:39 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() | -| TarSlipImprov.py:39:49:39:68 | ControlFlowNode for members_filter1() | semmle.label | ControlFlowNode for members_filter1() | -| TarSlipImprov.py:39:65:39:67 | ControlFlowNode for tar | semmle.label | ControlFlowNode for tar | | TarSlipImprov.py:43:6:43:38 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() | | TarSlipImprov.py:44:9:44:13 | GSSA Variable entry | semmle.label | GSSA Variable entry | | TarSlipImprov.py:47:21:47:25 | ControlFlowNode for entry | semmle.label | ControlFlowNode for entry | @@ -86,9 +66,6 @@ nodes | TarSlipImprov.py:194:49:194:51 | ControlFlowNode for tar | semmle.label | ControlFlowNode for tar | | TarSlipImprov.py:210:6:210:43 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() | | TarSlipImprov.py:211:5:211:7 | ControlFlowNode for tar | semmle.label | ControlFlowNode for tar | -| TarSlipImprov.py:231:6:231:38 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() | -| TarSlipImprov.py:233:9:233:9 | GSSA Variable f | semmle.label | GSSA Variable f | -| TarSlipImprov.py:236:44:236:50 | ControlFlowNode for members | semmle.label | ControlFlowNode for members | | TarSlipImprov.py:254:1:254:31 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() | | TarSlipImprov.py:258:6:258:26 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() | | TarSlipImprov.py:259:9:259:13 | GSSA Variable entry | semmle.label | GSSA Variable entry | @@ -110,15 +87,9 @@ nodes | TarSlipImprov.py:293:1:293:3 | ControlFlowNode for tar | semmle.label | ControlFlowNode for tar | | TarSlipImprov.py:300:6:300:51 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() | | TarSlipImprov.py:301:49:301:51 | ControlFlowNode for tar | semmle.label | ControlFlowNode for tar | -| TarSlipImprov.py:304:7:304:39 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() | -| TarSlipImprov.py:306:5:306:10 | GSSA Variable member | semmle.label | GSSA Variable member | -| TarSlipImprov.py:310:49:310:54 | ControlFlowNode for result | semmle.label | ControlFlowNode for result | | TarSlipImprov.py:316:1:316:46 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() | subpaths -| TarSlipImprov.py:39:65:39:67 | ControlFlowNode for tar | TarSlipImprov.py:26:21:26:27 | ControlFlowNode for tarfile | TarSlipImprov.py:36:12:36:17 | ControlFlowNode for result | TarSlipImprov.py:39:49:39:68 | ControlFlowNode for members_filter1() | #select -| TarSlipImprov.py:22:35:22:40 | ControlFlowNode for result | TarSlipImprov.py:15:7:15:39 | ControlFlowNode for Attribute() | TarSlipImprov.py:22:35:22:40 | ControlFlowNode for result | Extraction of tarfile from $@ to a potentially untrusted source $@. | TarSlipImprov.py:15:7:15:39 | ControlFlowNode for Attribute() | ControlFlowNode for Attribute() | TarSlipImprov.py:22:35:22:40 | ControlFlowNode for result | ControlFlowNode for result | -| TarSlipImprov.py:39:49:39:68 | ControlFlowNode for members_filter1() | TarSlipImprov.py:38:7:38:39 | ControlFlowNode for Attribute() | TarSlipImprov.py:39:49:39:68 | ControlFlowNode for members_filter1() | Extraction of tarfile from $@ to a potentially untrusted source $@. | TarSlipImprov.py:38:7:38:39 | ControlFlowNode for Attribute() | ControlFlowNode for Attribute() | TarSlipImprov.py:39:49:39:68 | ControlFlowNode for members_filter1() | ControlFlowNode for members_filter1() | | TarSlipImprov.py:47:21:47:25 | ControlFlowNode for entry | TarSlipImprov.py:43:6:43:38 | ControlFlowNode for Attribute() | TarSlipImprov.py:47:21:47:25 | ControlFlowNode for entry | Extraction of tarfile from $@ to a potentially untrusted source $@. | TarSlipImprov.py:43:6:43:38 | ControlFlowNode for Attribute() | ControlFlowNode for Attribute() | TarSlipImprov.py:47:21:47:25 | ControlFlowNode for entry | ControlFlowNode for entry | | TarSlipImprov.py:58:21:58:25 | ControlFlowNode for entry | TarSlipImprov.py:54:6:54:38 | ControlFlowNode for Attribute() | TarSlipImprov.py:58:21:58:25 | ControlFlowNode for entry | Extraction of tarfile from $@ to a potentially untrusted source $@. | TarSlipImprov.py:54:6:54:38 | ControlFlowNode for Attribute() | ControlFlowNode for Attribute() | TarSlipImprov.py:58:21:58:25 | ControlFlowNode for entry | ControlFlowNode for entry | | TarSlipImprov.py:91:5:91:7 | ControlFlowNode for tar | TarSlipImprov.py:88:6:88:43 | ControlFlowNode for Attribute() | TarSlipImprov.py:91:5:91:7 | ControlFlowNode for tar | Extraction of tarfile from $@ to a potentially untrusted source $@. | TarSlipImprov.py:88:6:88:43 | ControlFlowNode for Attribute() | ControlFlowNode for Attribute() | TarSlipImprov.py:91:5:91:7 | ControlFlowNode for tar | ControlFlowNode for tar | @@ -133,7 +104,6 @@ subpaths | TarSlipImprov.py:189:1:189:3 | ControlFlowNode for tar | TarSlipImprov.py:188:7:188:27 | ControlFlowNode for Attribute() | TarSlipImprov.py:189:1:189:3 | ControlFlowNode for tar | Extraction of tarfile from $@ to a potentially untrusted source $@. | TarSlipImprov.py:188:7:188:27 | ControlFlowNode for Attribute() | ControlFlowNode for Attribute() | TarSlipImprov.py:189:1:189:3 | ControlFlowNode for tar | ControlFlowNode for tar | | TarSlipImprov.py:194:49:194:51 | ControlFlowNode for tar | TarSlipImprov.py:193:6:193:31 | ControlFlowNode for Attribute() | TarSlipImprov.py:194:49:194:51 | ControlFlowNode for tar | Extraction of tarfile from $@ to a potentially untrusted source $@. | TarSlipImprov.py:193:6:193:31 | ControlFlowNode for Attribute() | ControlFlowNode for Attribute() | TarSlipImprov.py:194:49:194:51 | ControlFlowNode for tar | ControlFlowNode for tar | | TarSlipImprov.py:211:5:211:7 | ControlFlowNode for tar | TarSlipImprov.py:210:6:210:43 | ControlFlowNode for Attribute() | TarSlipImprov.py:211:5:211:7 | ControlFlowNode for tar | Extraction of tarfile from $@ to a potentially untrusted source $@. | TarSlipImprov.py:210:6:210:43 | ControlFlowNode for Attribute() | ControlFlowNode for Attribute() | TarSlipImprov.py:211:5:211:7 | ControlFlowNode for tar | ControlFlowNode for tar | -| TarSlipImprov.py:236:44:236:50 | ControlFlowNode for members | TarSlipImprov.py:231:6:231:38 | ControlFlowNode for Attribute() | TarSlipImprov.py:236:44:236:50 | ControlFlowNode for members | Extraction of tarfile from $@ to a potentially untrusted source $@. | TarSlipImprov.py:231:6:231:38 | ControlFlowNode for Attribute() | ControlFlowNode for Attribute() | TarSlipImprov.py:236:44:236:50 | ControlFlowNode for members | ControlFlowNode for members | | TarSlipImprov.py:254:1:254:31 | ControlFlowNode for Attribute() | TarSlipImprov.py:254:1:254:31 | ControlFlowNode for Attribute() | TarSlipImprov.py:254:1:254:31 | ControlFlowNode for Attribute() | Extraction of tarfile from $@ to a potentially untrusted source $@. | TarSlipImprov.py:254:1:254:31 | ControlFlowNode for Attribute() | ControlFlowNode for Attribute() | TarSlipImprov.py:254:1:254:31 | ControlFlowNode for Attribute() | ControlFlowNode for Attribute() | | TarSlipImprov.py:261:25:261:29 | ControlFlowNode for entry | TarSlipImprov.py:258:6:258:26 | ControlFlowNode for Attribute() | TarSlipImprov.py:261:25:261:29 | ControlFlowNode for entry | Extraction of tarfile from $@ to a potentially untrusted source $@. | TarSlipImprov.py:258:6:258:26 | ControlFlowNode for Attribute() | ControlFlowNode for Attribute() | TarSlipImprov.py:261:25:261:29 | ControlFlowNode for entry | ControlFlowNode for entry | | TarSlipImprov.py:268:21:268:25 | ControlFlowNode for entry | TarSlipImprov.py:264:6:264:38 | ControlFlowNode for Attribute() | TarSlipImprov.py:268:21:268:25 | ControlFlowNode for entry | Extraction of tarfile from $@ to a potentially untrusted source $@. | TarSlipImprov.py:264:6:264:38 | ControlFlowNode for Attribute() | ControlFlowNode for Attribute() | TarSlipImprov.py:268:21:268:25 | ControlFlowNode for entry | ControlFlowNode for entry | @@ -143,5 +113,4 @@ subpaths | TarSlipImprov.py:288:49:288:51 | ControlFlowNode for tar | TarSlipImprov.py:287:7:287:28 | ControlFlowNode for Attribute() | TarSlipImprov.py:288:49:288:51 | ControlFlowNode for tar | Extraction of tarfile from $@ to a potentially untrusted source $@. | TarSlipImprov.py:287:7:287:28 | ControlFlowNode for Attribute() | ControlFlowNode for Attribute() | TarSlipImprov.py:288:49:288:51 | ControlFlowNode for tar | ControlFlowNode for tar | | TarSlipImprov.py:293:1:293:3 | ControlFlowNode for tar | TarSlipImprov.py:292:7:292:39 | ControlFlowNode for Attribute() | TarSlipImprov.py:293:1:293:3 | ControlFlowNode for tar | Extraction of tarfile from $@ to a potentially untrusted source $@. | TarSlipImprov.py:292:7:292:39 | ControlFlowNode for Attribute() | ControlFlowNode for Attribute() | TarSlipImprov.py:293:1:293:3 | ControlFlowNode for tar | ControlFlowNode for tar | | TarSlipImprov.py:301:49:301:51 | ControlFlowNode for tar | TarSlipImprov.py:300:6:300:51 | ControlFlowNode for Attribute() | TarSlipImprov.py:301:49:301:51 | ControlFlowNode for tar | Extraction of tarfile from $@ to a potentially untrusted source $@. | TarSlipImprov.py:300:6:300:51 | ControlFlowNode for Attribute() | ControlFlowNode for Attribute() | TarSlipImprov.py:301:49:301:51 | ControlFlowNode for tar | ControlFlowNode for tar | -| TarSlipImprov.py:310:49:310:54 | ControlFlowNode for result | TarSlipImprov.py:304:7:304:39 | ControlFlowNode for Attribute() | TarSlipImprov.py:310:49:310:54 | ControlFlowNode for result | Extraction of tarfile from $@ to a potentially untrusted source $@. | TarSlipImprov.py:304:7:304:39 | ControlFlowNode for Attribute() | ControlFlowNode for Attribute() | TarSlipImprov.py:310:49:310:54 | ControlFlowNode for result | ControlFlowNode for result | | TarSlipImprov.py:316:1:316:46 | ControlFlowNode for Attribute() | TarSlipImprov.py:316:1:316:46 | ControlFlowNode for Attribute() | TarSlipImprov.py:316:1:316:46 | ControlFlowNode for Attribute() | Extraction of tarfile from $@ to a potentially untrusted source $@. | TarSlipImprov.py:316:1:316:46 | ControlFlowNode for Attribute() | ControlFlowNode for Attribute() | TarSlipImprov.py:316:1:316:46 | ControlFlowNode for Attribute() | ControlFlowNode for Attribute() | diff --git a/python/ql/test/experimental/query-tests/Security/CWE-022-UnsafeUnpacking/UnsafeUnpack.expected b/python/ql/test/experimental/query-tests/Security/CWE-022-UnsafeUnpacking/UnsafeUnpack.expected index f32d3037bbc..93ca771caaa 100644 --- a/python/ql/test/experimental/query-tests/Security/CWE-022-UnsafeUnpacking/UnsafeUnpack.expected +++ b/python/ql/test/experimental/query-tests/Security/CWE-022-UnsafeUnpacking/UnsafeUnpack.expected @@ -23,10 +23,6 @@ edges | UnsafeUnpack.py:116:27:116:39 | ControlFlowNode for Attribute | UnsafeUnpack.py:116:17:116:21 | SSA variable ufile | | UnsafeUnpack.py:118:38:118:47 | ControlFlowNode for Attribute | UnsafeUnpack.py:120:41:120:58 | ControlFlowNode for uploaded_file_path | | UnsafeUnpack.py:140:23:140:35 | ControlFlowNode for Attribute | UnsafeUnpack.py:142:49:142:51 | ControlFlowNode for tar | -| UnsafeUnpack.py:158:23:158:27 | SSA variable chunk | UnsafeUnpack.py:163:23:163:28 | SSA variable member | -| UnsafeUnpack.py:158:32:158:44 | ControlFlowNode for Attribute | UnsafeUnpack.py:158:32:158:54 | ControlFlowNode for Subscript | -| UnsafeUnpack.py:158:32:158:54 | ControlFlowNode for Subscript | UnsafeUnpack.py:158:23:158:27 | SSA variable chunk | -| UnsafeUnpack.py:163:23:163:28 | SSA variable member | UnsafeUnpack.py:167:67:167:72 | ControlFlowNode for result | | UnsafeUnpack.py:174:15:174:26 | ControlFlowNode for Attribute | UnsafeUnpack.py:176:1:176:34 | ControlFlowNode for Attribute() | | UnsafeUnpack.py:194:53:194:55 | ControlFlowNode for tmp | UnsafeUnpack.py:201:29:201:36 | ControlFlowNode for Attribute | nodes @@ -61,11 +57,6 @@ nodes | UnsafeUnpack.py:120:41:120:58 | ControlFlowNode for uploaded_file_path | semmle.label | ControlFlowNode for uploaded_file_path | | UnsafeUnpack.py:140:23:140:35 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | | UnsafeUnpack.py:142:49:142:51 | ControlFlowNode for tar | semmle.label | ControlFlowNode for tar | -| UnsafeUnpack.py:158:23:158:27 | SSA variable chunk | semmle.label | SSA variable chunk | -| UnsafeUnpack.py:158:32:158:44 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | -| UnsafeUnpack.py:158:32:158:54 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | -| UnsafeUnpack.py:163:23:163:28 | SSA variable member | semmle.label | SSA variable member | -| UnsafeUnpack.py:167:67:167:72 | ControlFlowNode for result | semmle.label | ControlFlowNode for result | | UnsafeUnpack.py:174:15:174:26 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | | UnsafeUnpack.py:176:1:176:34 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() | | UnsafeUnpack.py:194:53:194:55 | ControlFlowNode for tmp | semmle.label | ControlFlowNode for tmp | @@ -82,6 +73,5 @@ subpaths | UnsafeUnpack.py:112:35:112:43 | ControlFlowNode for file_path | UnsafeUnpack.py:108:22:108:34 | ControlFlowNode for Attribute | UnsafeUnpack.py:112:35:112:43 | ControlFlowNode for file_path | Unsafe extraction from a malicious tarball retrieved from a remote location. | | UnsafeUnpack.py:120:41:120:58 | ControlFlowNode for uploaded_file_path | UnsafeUnpack.py:116:27:116:39 | ControlFlowNode for Attribute | UnsafeUnpack.py:120:41:120:58 | ControlFlowNode for uploaded_file_path | Unsafe extraction from a malicious tarball retrieved from a remote location. | | UnsafeUnpack.py:142:49:142:51 | ControlFlowNode for tar | UnsafeUnpack.py:140:23:140:35 | ControlFlowNode for Attribute | UnsafeUnpack.py:142:49:142:51 | ControlFlowNode for tar | Unsafe extraction from a malicious tarball retrieved from a remote location. | -| UnsafeUnpack.py:167:67:167:72 | ControlFlowNode for result | UnsafeUnpack.py:158:32:158:44 | ControlFlowNode for Attribute | UnsafeUnpack.py:167:67:167:72 | ControlFlowNode for result | Unsafe extraction from a malicious tarball retrieved from a remote location. | | UnsafeUnpack.py:176:1:176:34 | ControlFlowNode for Attribute() | UnsafeUnpack.py:79:16:79:28 | ControlFlowNode for Attribute | UnsafeUnpack.py:176:1:176:34 | ControlFlowNode for Attribute() | Unsafe extraction from a malicious tarball retrieved from a remote location. | | UnsafeUnpack.py:201:29:201:36 | ControlFlowNode for Attribute | UnsafeUnpack.py:194:53:194:55 | ControlFlowNode for tmp | UnsafeUnpack.py:201:29:201:36 | ControlFlowNode for Attribute | Unsafe extraction from a malicious tarball retrieved from a remote location. | diff --git a/python/ql/test/experimental/query-tests/Security/CWE-022-UnsafeUnpacking/UnsafeUnpack.py b/python/ql/test/experimental/query-tests/Security/CWE-022-UnsafeUnpacking/UnsafeUnpack.py index 6b533462d23..3f4f2319690 100644 --- a/python/ql/test/experimental/query-tests/Security/CWE-022-UnsafeUnpacking/UnsafeUnpack.py +++ b/python/ql/test/experimental/query-tests/Security/CWE-022-UnsafeUnpacking/UnsafeUnpack.py @@ -164,7 +164,7 @@ def simple_upload(request): if member.issym(): raise ValueError("But it is a symlink") result.append(member) - tar.extractall(path=tempfile.mkdtemp(), members=result) # $result=BAD + tar.extractall(path=tempfile.mkdtemp(), members=result) # $ MISSING: result=BAD tar.close() diff --git a/python/ql/test/library-tests/frameworks/aiohttp/taint_test.py b/python/ql/test/library-tests/frameworks/aiohttp/taint_test.py index ec475a592ab..6c0a60530b5 100644 --- a/python/ql/test/library-tests/frameworks/aiohttp/taint_test.py +++ b/python/ql/test/library-tests/frameworks/aiohttp/taint_test.py @@ -40,10 +40,10 @@ async def test_taint(request: web.Request): # $ requestHandler request.cookies["key"], # $ tainted request.cookies.get("key"), # $ tainted request.cookies.keys(), # $ MISSING: tainted - request.cookies.values(), # $ tainted - request.cookies.items(), # $ tainted + request.cookies.values(), # $ MISSING: tainted + request.cookies.items(), # $ MISSING: tainted list(request.cookies), # $ tainted - iter(request.cookies), # $ tainted + iter(request.cookies), # $ MISSING: tainted # aiohttp.StreamReader diff --git a/python/ql/test/library-tests/frameworks/multidict/taint_test.py b/python/ql/test/library-tests/frameworks/multidict/taint_test.py index 8fbac79888f..4410e2b2a6f 100644 --- a/python/ql/test/library-tests/frameworks/multidict/taint_test.py +++ b/python/ql/test/library-tests/frameworks/multidict/taint_test.py @@ -13,11 +13,11 @@ ensure_tainted( mdp.getone("key"), # $ tainted mdp.getall("key"), # $ tainted mdp.keys(), # $ MISSING: tainted - mdp.values(), # $ tainted - mdp.items(), # $ tainted + mdp.values(), # $ MISSING: tainted + mdp.items(), # $ MISSING: tainted mdp.copy(), # $ tainted list(mdp), # $ tainted - iter(mdp), # $ tainted + iter(mdp), # $ MISSING: tainted ) # TODO: This is an invalid CIMultiDictProxy construction... but for the purpose of @@ -33,9 +33,9 @@ ensure_tainted( ci_mdp.getone("key"), # $ tainted ci_mdp.getall("key"), # $ tainted ci_mdp.keys(), # $ MISSING: tainted - ci_mdp.values(), # $ tainted - ci_mdp.items(), # $ tainted + ci_mdp.values(), # $ MISSING: tainted + ci_mdp.items(), # $ MISSING: tainted ci_mdp.copy(), # $ tainted list(ci_mdp), # $ tainted - iter(ci_mdp), # $ tainted + iter(ci_mdp), # $ MISSING: tainted ) diff --git a/python/ql/test/library-tests/frameworks/stdlib/http_server.py b/python/ql/test/library-tests/frameworks/stdlib/http_server.py index 27ec2211f4b..3dbc832c397 100644 --- a/python/ql/test/library-tests/frameworks/stdlib/http_server.py +++ b/python/ql/test/library-tests/frameworks/stdlib/http_server.py @@ -60,8 +60,8 @@ class MyHandler(BaseHTTPRequestHandler): self.headers.get('Foo'), # $ tainted self.headers.get_all('Foo'), # $ tainted self.headers.keys(), # $ tainted - self.headers.values(), # $ tainted - self.headers.items(), # $ tainted + self.headers.values(), # $ MISSING: tainted + self.headers.items(), # $ MISSING: tainted self.headers.as_bytes(), # $ tainted self.headers.as_string(), # $ tainted str(self.headers), # $ tainted From 4b4b9bf9da0d8451e383065008dbf06b4735501b Mon Sep 17 00:00:00 2001 From: Rasmus Lerchedahl Petersen Date: Fri, 9 Jun 2023 16:08:02 +0200 Subject: [PATCH 2/6] python: add missing summaries For append/add: The new results in the experimental tar slip query show that we do not recognize the sanitisers. --- .../lib/semmle/python/frameworks/Stdlib.qll | 277 +++++++++++++++++- .../experimental/dataflow/coverage/test.py | 2 +- .../dataflow/coverage/test_builtins.py | 34 +-- .../dataflow/summaries/summaries.expected | 5 + .../test_collections.py | 26 +- .../Security/CWE-022-TarSlip/TarSlip.expected | 47 +++ .../UnsafeUnpack.expected | 14 + .../CWE-022-UnsafeUnpacking/UnsafeUnpack.py | 2 +- .../frameworks/aiohttp/taint_test.py | 8 +- .../frameworks/multidict/taint_test.py | 16 +- .../frameworks/stdlib/http_server.py | 4 +- 11 files changed, 388 insertions(+), 47 deletions(-) diff --git a/python/ql/lib/semmle/python/frameworks/Stdlib.qll b/python/ql/lib/semmle/python/frameworks/Stdlib.qll index bfd9144020d..641dba0779e 100644 --- a/python/ql/lib/semmle/python/frameworks/Stdlib.qll +++ b/python/ql/lib/semmle/python/frameworks/Stdlib.qll @@ -3883,6 +3883,9 @@ private module StdlibPrivate { } } + // --------------------------------------------------------------------------- + // Flow summaries for functions operating on containers + // --------------------------------------------------------------------------- /** A flow summary for `reversed`. */ class ReversedSummary extends SummarizedCallable { ReversedSummary() { this = "builtins.reversed" } @@ -3894,9 +3897,114 @@ private module StdlibPrivate { } override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { - input = "Argument[0].ListElement" and + ( + input = "Argument[0].ListElement" + or + input = "Argument[0].SetElement" + or + exists(DataFlow::TupleElementContent tc, int i | i = tc.getIndex() | + input = "Argument[0].TupleElement[" + i.toString() + "]" + ) + // TODO: Once we have DictKeyContent, we need to transform that into ListElementContent + ) and output = "ReturnValue.ListElement" and preservesValue = true + or + input = "Argument[0]" and + output = "ReturnValue" and + preservesValue = false + } + } + + /** A flow summary for `sorted`. */ + class SortedSummary extends SummarizedCallable { + SortedSummary() { this = "builtins.sorted" } + + override DataFlow::CallCfgNode getACall() { result = API::builtin("sorted").getACall() } + + override DataFlow::ArgumentNode getACallback() { + result = API::builtin("sorted").getAValueReachableFromSource() + } + + override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + exists(string content | + content = "ListElement" + or + content = "SetElement" + or + exists(DataFlow::TupleElementContent tc, int i | i = tc.getIndex() | + content = "TupleElement[" + i.toString() + "]" + ) + | + // TODO: Once we have DictKeyContent, we need to transform that into ListElementContent + input = "Argument[0]." + content and + output = "ReturnValue.ListElement" and + preservesValue = true + ) + or + input = "Argument[0]" and + output = "ReturnValue" and + preservesValue = false + } + } + + /** A flow summary for `iter`. */ + class IterSummary extends SummarizedCallable { + IterSummary() { this = "builtins.iter" } + + override DataFlow::CallCfgNode getACall() { result = API::builtin("iter").getACall() } + + override DataFlow::ArgumentNode getACallback() { + result = API::builtin("iter").getAValueReachableFromSource() + } + + override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + ( + input = "Argument[0].ListElement" + or + input = "Argument[0].SetElement" + or + exists(DataFlow::TupleElementContent tc, int i | i = tc.getIndex() | + input = "Argument[0].TupleElement[" + i.toString() + "]" + ) + // TODO: Once we have DictKeyContent, we need to transform that into ListElementContent + ) and + output = "ReturnValue.ListElement" and + preservesValue = true + or + input = "Argument[0]" and + output = "ReturnValue" and + preservesValue = false + } + } + + /** A flow summary for `next`. */ + class NextSummary extends SummarizedCallable { + NextSummary() { this = "builtins.next" } + + override DataFlow::CallCfgNode getACall() { result = API::builtin("next").getACall() } + + override DataFlow::ArgumentNode getACallback() { + result = API::builtin("next").getAValueReachableFromSource() + } + + override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + ( + input = "Argument[0].ListElement" + or + input = "Argument[0].SetElement" + or + exists(DataFlow::TupleElementContent tc, int i | i = tc.getIndex() | + input = "Argument[0].TupleElement[" + i.toString() + "]" + ) + // TODO: Once we have DictKeyContent, we need to transform that into ListElementContent + ) and + output = "ReturnValue" and + preservesValue = true + or + input = "Argument[1]" and + output = "ReturnValue" and + preservesValue = true } } @@ -4127,6 +4235,173 @@ private module StdlibPrivate { preservesValue = true } } + + /** + * A flow summary for `dict.values`. + * + * See https://docs.python.org/3.10/library/stdtypes.html#dict.values + */ + class DictValues extends SummarizedCallable { + DictValues() { this = "dict.values" } + + override DataFlow::CallCfgNode getACall() { + result.(DataFlow::MethodCallNode).calls(_, "values") + } + + override DataFlow::ArgumentNode getACallback() { + result.(DataFlow::AttrRead).getAttributeName() = "values" + } + + override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + exists(DataFlow::DictionaryElementContent dc, string key | key = dc.getKey() | + input = "Argument[self].DictionaryElement[" + key + "]" and + output = "ReturnValue.ListElement" and + preservesValue = true + ) + or + input = "Argument[self]" and + output = "ReturnValue" and + preservesValue = false + } + } + + /** + * A flow summary for `dict.keys`. + * + * See https://docs.python.org/3.10/library/stdtypes.html#dict.keys + */ + class DictKeys extends SummarizedCallable { + DictKeys() { this = "dict.keys" } + + override DataFlow::CallCfgNode getACall() { result.(DataFlow::MethodCallNode).calls(_, "keys") } + + override DataFlow::ArgumentNode getACallback() { + result.(DataFlow::AttrRead).getAttributeName() = "keys" + } + + override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + // TODO: Once we have DictKeyContent, we need to transform that into ListElementContent + input = "Argument[self]" and + output = "ReturnValue" and + preservesValue = false + } + } + + /** + * A flow summary for `dict.items`. + * + * See https://docs.python.org/3.10/library/stdtypes.html#dict.items + */ + class DictItems extends SummarizedCallable { + DictItems() { this = "dict.items" } + + override DataFlow::CallCfgNode getACall() { + result.(DataFlow::MethodCallNode).calls(_, "items") + } + + override DataFlow::ArgumentNode getACallback() { + result.(DataFlow::AttrRead).getAttributeName() = "items" + } + + override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + exists(DataFlow::DictionaryElementContent dc, string key | key = dc.getKey() | + input = "Argument[self].DictionaryElement[" + key + "]" and + output = "ReturnValue.ListElement.TupleElement[1]" and + preservesValue = true + ) + or + // TODO: Add the keys to output list + input = "Argument[self]" and + output = "ReturnValue" and + preservesValue = false + } + } + + /** + * A flow summary for `list.append`. + * + * See https://docs.python.org/3.10/library/stdtypes.html#typesseq-mutable + */ + class ListAppend extends SummarizedCallable { + ListAppend() { this = "list.append" } + + override DataFlow::CallCfgNode getACall() { + result.(DataFlow::MethodCallNode).calls(_, "append") + } + + override DataFlow::ArgumentNode getACallback() { + result.(DataFlow::AttrRead).getAttributeName() = "append" + } + + override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + // existing elements + input = "Argument[self].ListElement" and + output = "ReturnValue.ListElement" and + preservesValue = true + or + // newly added element returned + input = "Argument[0]" and + output = "ReturnValue.ListElement" and + preservesValue = true + or + // newly added element added to this + input = "Argument[0]" and + output = "Argument[self].ListElement" and + preservesValue = true + or + // transfer taint from new element to this + input = "Argument[0]" and + output = "Argument[self]" and + preservesValue = false + or + // transfer taint from new element to return value + input = "Argument[0]" and + output = "ReturnValue" and + preservesValue = false + } + } + + /** + * A flow summary for `set.add`. + * + * See https://docs.python.org/3.10/library/stdtypes.html#frozenset.add + */ + class SetAdd extends SummarizedCallable { + SetAdd() { this = "set.add" } + + override DataFlow::CallCfgNode getACall() { result.(DataFlow::MethodCallNode).calls(_, "add") } + + override DataFlow::ArgumentNode getACallback() { + result.(DataFlow::AttrRead).getAttributeName() = "add" + } + + override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + // existing elements + input = "Argument[self].SetElement" and + output = "ReturnValue.SetElement" and + preservesValue = true + or + // newly added element returned + input = "Argument[0]" and + output = "ReturnValue.SetElement" and + preservesValue = true + or + // newly added element added to this + input = "Argument[0]" and + output = "Argument[self].SetElement" and + preservesValue = true + or + // transfer taint from new element to this + input = "Argument[0]" and + output = "Argument[self]" and + preservesValue = false + or + // transfer taint from new element to return value + input = "Argument[0]" and + output = "ReturnValue" and + preservesValue = false + } + } } // --------------------------------------------------------------------------- diff --git a/python/ql/test/experimental/dataflow/coverage/test.py b/python/ql/test/experimental/dataflow/coverage/test.py index f35339e4dca..81623c58ea0 100644 --- a/python/ql/test/experimental/dataflow/coverage/test.py +++ b/python/ql/test/experimental/dataflow/coverage/test.py @@ -192,7 +192,7 @@ def test_nested_comprehension_deep_with_local_flow(): def test_nested_comprehension_dict(): d = {"s": [SOURCE]} x = [y for k, v in d.items() for y in v] - SINK(x[0]) #$ MISSING:flow="SOURCE, l:-2 -> x[0]" + SINK(x[0]) #$ flow="SOURCE, l:-2 -> x[0]" def test_nested_comprehension_paren(): diff --git a/python/ql/test/experimental/dataflow/coverage/test_builtins.py b/python/ql/test/experimental/dataflow/coverage/test_builtins.py index 629e2600280..24592337076 100644 --- a/python/ql/test/experimental/dataflow/coverage/test_builtins.py +++ b/python/ql/test/experimental/dataflow/coverage/test_builtins.py @@ -171,7 +171,7 @@ def test_list_copy(): def test_list_append(): l = [NONSOURCE] l.append(SOURCE) - SINK(l[1]) #$ MISSING: flow="SOURCE, l:-1 -> l[1]" + SINK(l[1]) #$ flow="SOURCE, l:-1 -> l[1]" ### Set @@ -188,7 +188,7 @@ def test_set_copy(): def test_set_add(): s = set([]) s.add(SOURCE) - SINK(s.pop()) #$ MISSING: flow="SOURCE, l:-2 -> s.pop()" + SINK(s.pop()) #$ flow="SOURCE, l:-1 -> s.pop()" ### Dict @@ -202,7 +202,7 @@ def test_dict_values(): d = {'k': SOURCE} vals = d.values() val_list = list(vals) - SINK(val_list[0]) #$ MISSING: flow="SOURCE, l:-3 -> val_list[0]" + SINK(val_list[0]) #$ flow="SOURCE, l:-3 -> val_list[0]" @expects(4) def test_dict_items(): @@ -210,9 +210,9 @@ def test_dict_items(): items = d.items() item_list = list(items) SINK_F(item_list[0][0]) # expecting FP due to imprecise flow - SINK(item_list[0][1]) #$ MISSING: flow="SOURCE, l:-4 -> item_list[0][1]" + SINK(item_list[0][1]) #$ flow="SOURCE, l:-4 -> item_list[0][1]" SINK(item_list[1][0]) #$ MISSING: flow="SOURCE, l:-5 -> item_list[1][0]" - SINK_F(item_list[1][1]) # expecting FP due to imprecise flow + SINK_F(item_list[1][1]) #$ SPURIOUS: flow="SOURCE, l:-6 -> item_list[1][1]" @expects(3) def test_dict_pop(): @@ -257,17 +257,17 @@ def test_dict_copy(): def test_sorted_list(): l0 = [SOURCE] l = sorted(l0) - SINK(l[0]) #$ MISSING: flow="SOURCE, l:-2 -> l[0]" + SINK(l[0]) #$ flow="SOURCE, l:-2 -> l[0]" def test_sorted_tuple(): t = (SOURCE,) l = sorted(t) - SINK(l[0]) #$ MISSING: flow="SOURCE, l:-2 -> l[0]" + SINK(l[0]) #$ flow="SOURCE, l:-2 -> l[0]" def test_sorted_set(): s = {SOURCE} l = sorted(s) - SINK(l[0]) #$ MISSING: flow="SOURCE, l:-2 -> l[0]" + SINK(l[0]) #$ flow="SOURCE, l:-2 -> l[0]" def test_sorted_dict(): d = {SOURCE: "val"} @@ -289,8 +289,8 @@ def test_reversed_tuple(): t = (SOURCE, NONSOURCE) r = reversed(t) l = list(r) - SINK_F(l[0]) - SINK(l[1]) #$ MISSING: flow="SOURCE, l:-4 -> l[1]" + SINK_F(l[0]) #$ SPURIOUS: flow="SOURCE, l:-3 -> l[0]" + SINK(l[1]) #$ flow="SOURCE, l:-4 -> l[1]" @expects(2) def test_reversed_dict(): @@ -306,19 +306,19 @@ def test_iter_list(): l0 = [SOURCE] i = iter(l0) l = list(i) - SINK(l[0]) #$ MISSING: flow="SOURCE, l:-3 -> l[0]" + SINK(l[0]) #$ flow="SOURCE, l:-3 -> l[0]" def test_iter_tuple(): t = (SOURCE,) i = iter(t) l = list(i) - SINK(l[0]) #$ MISSING: flow="SOURCE, l:-3 -> l[0]" + SINK(l[0]) #$ flow="SOURCE, l:-3 -> l[0]" def test_iter_set(): t = {SOURCE} i = iter(t) l = list(i) - SINK(l[0]) #$ MISSING: flow="SOURCE, l:-3 -> l[0]" + SINK(l[0]) #$ flow="SOURCE, l:-3 -> l[0]" def test_iter_dict(): d = {SOURCE: "val"} @@ -331,7 +331,7 @@ def test_iter_iter(): l0 = [SOURCE] i = iter(iter(l0)) l = list(i) - SINK(l[0]) #$ MISSING: flow="SOURCE, l:-3 -> l[0]" + SINK(l[0]) #$ flow="SOURCE, l:-3 -> l[0]" ### next @@ -339,19 +339,19 @@ def test_next_list(): l = [SOURCE] i = iter(l) n = next(i) - SINK(n) #$ MISSING: flow="SOURCE, l:-3 -> n" + SINK(n) #$ flow="SOURCE, l:-3 -> n" def test_next_tuple(): t = (SOURCE,) i = iter(t) n = next(i) - SINK(n) #$ MISSING: flow="SOURCE, l:-3 -> n" + SINK(n) #$ flow="SOURCE, l:-3 -> n" def test_next_set(): s = {SOURCE} i = iter(s) n = next(i) - SINK(n) #$ MISSING: flow="SOURCE, l:-3 -> n" + SINK(n) #$ flow="SOURCE, l:-3 -> n" def test_next_dict(): d = {SOURCE: "val"} diff --git a/python/ql/test/experimental/dataflow/summaries/summaries.expected b/python/ql/test/experimental/dataflow/summaries/summaries.expected index 47cab4224a4..1d8a9f1eb0c 100644 --- a/python/ql/test/experimental/dataflow/summaries/summaries.expected +++ b/python/ql/test/experimental/dataflow/summaries/summaries.expected @@ -3,8 +3,11 @@ edges | summaries.py:32:20:32:25 | ControlFlowNode for SOURCE | summaries.py:32:11:32:26 | ControlFlowNode for identity() | | summaries.py:36:18:36:54 | ControlFlowNode for apply_lambda() | summaries.py:37:6:37:19 | ControlFlowNode for tainted_lambda | | summaries.py:36:48:36:53 | ControlFlowNode for SOURCE | summaries.py:36:18:36:54 | ControlFlowNode for apply_lambda() | +| summaries.py:44:16:44:33 | ControlFlowNode for reversed() | summaries.py:45:6:45:20 | ControlFlowNode for Subscript | | summaries.py:44:16:44:33 | ControlFlowNode for reversed() [List element] | summaries.py:45:6:45:17 | ControlFlowNode for tainted_list [List element] | +| summaries.py:44:25:44:32 | ControlFlowNode for List | summaries.py:44:16:44:33 | ControlFlowNode for reversed() | | summaries.py:44:25:44:32 | ControlFlowNode for List [List element] | summaries.py:44:16:44:33 | ControlFlowNode for reversed() [List element] | +| summaries.py:44:26:44:31 | ControlFlowNode for SOURCE | summaries.py:44:25:44:32 | ControlFlowNode for List | | summaries.py:44:26:44:31 | ControlFlowNode for SOURCE | summaries.py:44:25:44:32 | ControlFlowNode for List [List element] | | summaries.py:45:6:45:17 | ControlFlowNode for tainted_list [List element] | summaries.py:45:6:45:20 | ControlFlowNode for Subscript | | summaries.py:51:18:51:46 | ControlFlowNode for list_map() [List element] | summaries.py:52:6:52:19 | ControlFlowNode for tainted_mapped [List element] | @@ -33,7 +36,9 @@ nodes | summaries.py:36:18:36:54 | ControlFlowNode for apply_lambda() | semmle.label | ControlFlowNode for apply_lambda() | | summaries.py:36:48:36:53 | ControlFlowNode for SOURCE | semmle.label | ControlFlowNode for SOURCE | | summaries.py:37:6:37:19 | ControlFlowNode for tainted_lambda | semmle.label | ControlFlowNode for tainted_lambda | +| summaries.py:44:16:44:33 | ControlFlowNode for reversed() | semmle.label | ControlFlowNode for reversed() | | summaries.py:44:16:44:33 | ControlFlowNode for reversed() [List element] | semmle.label | ControlFlowNode for reversed() [List element] | +| summaries.py:44:25:44:32 | ControlFlowNode for List | semmle.label | ControlFlowNode for List | | summaries.py:44:25:44:32 | ControlFlowNode for List [List element] | semmle.label | ControlFlowNode for List [List element] | | summaries.py:44:26:44:31 | ControlFlowNode for SOURCE | semmle.label | ControlFlowNode for SOURCE | | summaries.py:45:6:45:17 | ControlFlowNode for tainted_list [List element] | semmle.label | ControlFlowNode for tainted_list [List element] | diff --git a/python/ql/test/experimental/dataflow/tainttracking/defaultAdditionalTaintStep/test_collections.py b/python/ql/test/experimental/dataflow/tainttracking/defaultAdditionalTaintStep/test_collections.py index 1eaa5e44aa2..0e2aae93554 100644 --- a/python/ql/test/experimental/dataflow/tainttracking/defaultAdditionalTaintStep/test_collections.py +++ b/python/ql/test/experimental/dataflow/tainttracking/defaultAdditionalTaintStep/test_collections.py @@ -31,8 +31,8 @@ def test_construction(): list(tainted_list), # $ tainted list(tainted_tuple), # $ tainted list(tainted_set), # $ tainted - list(tainted_dict.values()), # $ MISSING: tainted - list(tainted_dict.items()), # $ MISSING: tainted + list(tainted_dict.values()), # $ tainted + list(tainted_dict.items()), # $ tainted tuple(tainted_list), # $ tainted set(tainted_list), # $ tainted @@ -56,9 +56,9 @@ def test_access(x, y, z): tainted_list[x], # $ tainted tainted_list[y:z], # $ tainted - sorted(tainted_list), # $ MISSING: tainted - reversed(tainted_list), # $ MISSING: tainted - iter(tainted_list), # $ MISSING: tainted + sorted(tainted_list), # $ tainted + reversed(tainted_list), # $ tainted + iter(tainted_list), # $ tainted next(iter(tainted_list)), # $ MISSING: tainted [i for i in tainted_list], # $ tainted [tainted_list for _i in [1,2,3]], # $ MISSING: tainted @@ -70,7 +70,7 @@ def test_access(x, y, z): for h in tainted_list: ensure_tainted(h) # $ tainted for i in reversed(tainted_list): - ensure_tainted(i) # $ MISSING: tainted + ensure_tainted(i) # $ tainted def test_access_explicit(x, y, z): tainted_list = [TAINTED_STRING] @@ -80,10 +80,10 @@ def test_access_explicit(x, y, z): tainted_list[x], # $ tainted tainted_list[y:z], # $ tainted - sorted(tainted_list)[0], # $ MISSING: tainted + sorted(tainted_list)[0], # $ tainted reversed(tainted_list)[0], # $ tainted - iter(tainted_list), # $ MISSING: tainted - next(iter(tainted_list)), # $ MISSING: tainted + iter(tainted_list), # $ tainted + next(iter(tainted_list)), # $ tainted [i for i in tainted_list], # $ tainted [tainted_list for i in [1,2,3]], # $ MISSING: tainted [TAINTED_STRING for i in [1,2,3]], # $ tainted @@ -109,9 +109,9 @@ def test_dict_access(x): ) for v in tainted_dict.values(): - ensure_tainted(v) # $ MISSING: tainted + ensure_tainted(v) # $ tainted for k, v in tainted_dict.items(): - ensure_tainted(v) # $ MISSING: tainted + ensure_tainted(v) # $ tainted def test_named_tuple(): # TODO: namedtuple currently not handled @@ -194,7 +194,7 @@ def list_append(): ensure_not_tainted(my_list) my_list.append(tainted_string) - ensure_tainted(my_list) # $ MISSING: tainted + ensure_tainted(my_list) # $ tainted def list_extend(): @@ -262,7 +262,7 @@ def set_add(): ensure_not_tainted(my_set) my_set.add(tainted_string) - ensure_tainted(my_set) # $ MISSING: tainted + ensure_tainted(my_set) # $ tainted # Make tests runable diff --git a/python/ql/test/experimental/query-tests/Security/CWE-022-TarSlip/TarSlip.expected b/python/ql/test/experimental/query-tests/Security/CWE-022-TarSlip/TarSlip.expected index 96a0dea5697..0042b85512c 100644 --- a/python/ql/test/experimental/query-tests/Security/CWE-022-TarSlip/TarSlip.expected +++ b/python/ql/test/experimental/query-tests/Security/CWE-022-TarSlip/TarSlip.expected @@ -1,4 +1,15 @@ edges +| TarSlipImprov.py:15:7:15:39 | ControlFlowNode for Attribute() | TarSlipImprov.py:17:5:17:10 | GSSA Variable member | +| TarSlipImprov.py:17:5:17:10 | GSSA Variable member | TarSlipImprov.py:20:19:20:24 | ControlFlowNode for member | +| TarSlipImprov.py:20:5:20:10 | [post] ControlFlowNode for result | TarSlipImprov.py:22:35:22:40 | ControlFlowNode for result | +| TarSlipImprov.py:20:19:20:24 | ControlFlowNode for member | TarSlipImprov.py:20:5:20:10 | [post] ControlFlowNode for result | +| TarSlipImprov.py:26:21:26:27 | ControlFlowNode for tarfile | TarSlipImprov.py:28:9:28:14 | SSA variable member | +| TarSlipImprov.py:28:9:28:14 | SSA variable member | TarSlipImprov.py:35:23:35:28 | ControlFlowNode for member | +| TarSlipImprov.py:35:9:35:14 | [post] ControlFlowNode for result | TarSlipImprov.py:36:12:36:17 | ControlFlowNode for result | +| TarSlipImprov.py:35:23:35:28 | ControlFlowNode for member | TarSlipImprov.py:35:9:35:14 | [post] ControlFlowNode for result | +| TarSlipImprov.py:38:7:38:39 | ControlFlowNode for Attribute() | TarSlipImprov.py:39:65:39:67 | ControlFlowNode for tar | +| TarSlipImprov.py:39:65:39:67 | ControlFlowNode for tar | TarSlipImprov.py:26:21:26:27 | ControlFlowNode for tarfile | +| TarSlipImprov.py:39:65:39:67 | ControlFlowNode for tar | TarSlipImprov.py:39:49:39:68 | ControlFlowNode for members_filter1() | | TarSlipImprov.py:43:6:43:38 | ControlFlowNode for Attribute() | TarSlipImprov.py:44:9:44:13 | GSSA Variable entry | | TarSlipImprov.py:44:9:44:13 | GSSA Variable entry | TarSlipImprov.py:47:21:47:25 | ControlFlowNode for entry | | TarSlipImprov.py:54:6:54:38 | ControlFlowNode for Attribute() | TarSlipImprov.py:56:9:56:13 | GSSA Variable entry | @@ -19,6 +30,10 @@ edges | TarSlipImprov.py:188:7:188:27 | ControlFlowNode for Attribute() | TarSlipImprov.py:189:1:189:3 | ControlFlowNode for tar | | TarSlipImprov.py:193:6:193:31 | ControlFlowNode for Attribute() | TarSlipImprov.py:194:49:194:51 | ControlFlowNode for tar | | TarSlipImprov.py:210:6:210:43 | ControlFlowNode for Attribute() | TarSlipImprov.py:211:5:211:7 | ControlFlowNode for tar | +| TarSlipImprov.py:231:6:231:38 | ControlFlowNode for Attribute() | TarSlipImprov.py:233:9:233:9 | GSSA Variable f | +| TarSlipImprov.py:233:9:233:9 | GSSA Variable f | TarSlipImprov.py:235:28:235:28 | ControlFlowNode for f | +| TarSlipImprov.py:235:13:235:19 | [post] ControlFlowNode for members | TarSlipImprov.py:236:44:236:50 | ControlFlowNode for members | +| TarSlipImprov.py:235:28:235:28 | ControlFlowNode for f | TarSlipImprov.py:235:13:235:19 | [post] ControlFlowNode for members | | TarSlipImprov.py:258:6:258:26 | ControlFlowNode for Attribute() | TarSlipImprov.py:259:9:259:13 | GSSA Variable entry | | TarSlipImprov.py:259:9:259:13 | GSSA Variable entry | TarSlipImprov.py:261:25:261:29 | ControlFlowNode for entry | | TarSlipImprov.py:264:6:264:38 | ControlFlowNode for Attribute() | TarSlipImprov.py:265:9:265:13 | GSSA Variable entry | @@ -31,7 +46,24 @@ edges | TarSlipImprov.py:287:7:287:28 | ControlFlowNode for Attribute() | TarSlipImprov.py:288:49:288:51 | ControlFlowNode for tar | | TarSlipImprov.py:292:7:292:39 | ControlFlowNode for Attribute() | TarSlipImprov.py:293:1:293:3 | ControlFlowNode for tar | | TarSlipImprov.py:300:6:300:51 | ControlFlowNode for Attribute() | TarSlipImprov.py:301:49:301:51 | ControlFlowNode for tar | +| TarSlipImprov.py:304:7:304:39 | ControlFlowNode for Attribute() | TarSlipImprov.py:306:5:306:10 | GSSA Variable member | +| TarSlipImprov.py:306:5:306:10 | GSSA Variable member | TarSlipImprov.py:309:19:309:24 | ControlFlowNode for member | +| TarSlipImprov.py:309:5:309:10 | [post] ControlFlowNode for result | TarSlipImprov.py:310:49:310:54 | ControlFlowNode for result | +| TarSlipImprov.py:309:19:309:24 | ControlFlowNode for member | TarSlipImprov.py:309:5:309:10 | [post] ControlFlowNode for result | nodes +| TarSlipImprov.py:15:7:15:39 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() | +| TarSlipImprov.py:17:5:17:10 | GSSA Variable member | semmle.label | GSSA Variable member | +| TarSlipImprov.py:20:5:20:10 | [post] ControlFlowNode for result | semmle.label | [post] ControlFlowNode for result | +| TarSlipImprov.py:20:19:20:24 | ControlFlowNode for member | semmle.label | ControlFlowNode for member | +| TarSlipImprov.py:22:35:22:40 | ControlFlowNode for result | semmle.label | ControlFlowNode for result | +| TarSlipImprov.py:26:21:26:27 | ControlFlowNode for tarfile | semmle.label | ControlFlowNode for tarfile | +| TarSlipImprov.py:28:9:28:14 | SSA variable member | semmle.label | SSA variable member | +| TarSlipImprov.py:35:9:35:14 | [post] ControlFlowNode for result | semmle.label | [post] ControlFlowNode for result | +| TarSlipImprov.py:35:23:35:28 | ControlFlowNode for member | semmle.label | ControlFlowNode for member | +| TarSlipImprov.py:36:12:36:17 | ControlFlowNode for result | semmle.label | ControlFlowNode for result | +| TarSlipImprov.py:38:7:38:39 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() | +| TarSlipImprov.py:39:49:39:68 | ControlFlowNode for members_filter1() | semmle.label | ControlFlowNode for members_filter1() | +| TarSlipImprov.py:39:65:39:67 | ControlFlowNode for tar | semmle.label | ControlFlowNode for tar | | TarSlipImprov.py:43:6:43:38 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() | | TarSlipImprov.py:44:9:44:13 | GSSA Variable entry | semmle.label | GSSA Variable entry | | TarSlipImprov.py:47:21:47:25 | ControlFlowNode for entry | semmle.label | ControlFlowNode for entry | @@ -66,6 +98,11 @@ nodes | TarSlipImprov.py:194:49:194:51 | ControlFlowNode for tar | semmle.label | ControlFlowNode for tar | | TarSlipImprov.py:210:6:210:43 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() | | TarSlipImprov.py:211:5:211:7 | ControlFlowNode for tar | semmle.label | ControlFlowNode for tar | +| TarSlipImprov.py:231:6:231:38 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() | +| TarSlipImprov.py:233:9:233:9 | GSSA Variable f | semmle.label | GSSA Variable f | +| TarSlipImprov.py:235:13:235:19 | [post] ControlFlowNode for members | semmle.label | [post] ControlFlowNode for members | +| TarSlipImprov.py:235:28:235:28 | ControlFlowNode for f | semmle.label | ControlFlowNode for f | +| TarSlipImprov.py:236:44:236:50 | ControlFlowNode for members | semmle.label | ControlFlowNode for members | | TarSlipImprov.py:254:1:254:31 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() | | TarSlipImprov.py:258:6:258:26 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() | | TarSlipImprov.py:259:9:259:13 | GSSA Variable entry | semmle.label | GSSA Variable entry | @@ -87,9 +124,17 @@ nodes | TarSlipImprov.py:293:1:293:3 | ControlFlowNode for tar | semmle.label | ControlFlowNode for tar | | TarSlipImprov.py:300:6:300:51 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() | | TarSlipImprov.py:301:49:301:51 | ControlFlowNode for tar | semmle.label | ControlFlowNode for tar | +| TarSlipImprov.py:304:7:304:39 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() | +| TarSlipImprov.py:306:5:306:10 | GSSA Variable member | semmle.label | GSSA Variable member | +| TarSlipImprov.py:309:5:309:10 | [post] ControlFlowNode for result | semmle.label | [post] ControlFlowNode for result | +| TarSlipImprov.py:309:19:309:24 | ControlFlowNode for member | semmle.label | ControlFlowNode for member | +| TarSlipImprov.py:310:49:310:54 | ControlFlowNode for result | semmle.label | ControlFlowNode for result | | TarSlipImprov.py:316:1:316:46 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() | subpaths +| TarSlipImprov.py:39:65:39:67 | ControlFlowNode for tar | TarSlipImprov.py:26:21:26:27 | ControlFlowNode for tarfile | TarSlipImprov.py:36:12:36:17 | ControlFlowNode for result | TarSlipImprov.py:39:49:39:68 | ControlFlowNode for members_filter1() | #select +| TarSlipImprov.py:22:35:22:40 | ControlFlowNode for result | TarSlipImprov.py:15:7:15:39 | ControlFlowNode for Attribute() | TarSlipImprov.py:22:35:22:40 | ControlFlowNode for result | Extraction of tarfile from $@ to a potentially untrusted source $@. | TarSlipImprov.py:15:7:15:39 | ControlFlowNode for Attribute() | ControlFlowNode for Attribute() | TarSlipImprov.py:22:35:22:40 | ControlFlowNode for result | ControlFlowNode for result | +| TarSlipImprov.py:39:49:39:68 | ControlFlowNode for members_filter1() | TarSlipImprov.py:38:7:38:39 | ControlFlowNode for Attribute() | TarSlipImprov.py:39:49:39:68 | ControlFlowNode for members_filter1() | Extraction of tarfile from $@ to a potentially untrusted source $@. | TarSlipImprov.py:38:7:38:39 | ControlFlowNode for Attribute() | ControlFlowNode for Attribute() | TarSlipImprov.py:39:49:39:68 | ControlFlowNode for members_filter1() | ControlFlowNode for members_filter1() | | TarSlipImprov.py:47:21:47:25 | ControlFlowNode for entry | TarSlipImprov.py:43:6:43:38 | ControlFlowNode for Attribute() | TarSlipImprov.py:47:21:47:25 | ControlFlowNode for entry | Extraction of tarfile from $@ to a potentially untrusted source $@. | TarSlipImprov.py:43:6:43:38 | ControlFlowNode for Attribute() | ControlFlowNode for Attribute() | TarSlipImprov.py:47:21:47:25 | ControlFlowNode for entry | ControlFlowNode for entry | | TarSlipImprov.py:58:21:58:25 | ControlFlowNode for entry | TarSlipImprov.py:54:6:54:38 | ControlFlowNode for Attribute() | TarSlipImprov.py:58:21:58:25 | ControlFlowNode for entry | Extraction of tarfile from $@ to a potentially untrusted source $@. | TarSlipImprov.py:54:6:54:38 | ControlFlowNode for Attribute() | ControlFlowNode for Attribute() | TarSlipImprov.py:58:21:58:25 | ControlFlowNode for entry | ControlFlowNode for entry | | TarSlipImprov.py:91:5:91:7 | ControlFlowNode for tar | TarSlipImprov.py:88:6:88:43 | ControlFlowNode for Attribute() | TarSlipImprov.py:91:5:91:7 | ControlFlowNode for tar | Extraction of tarfile from $@ to a potentially untrusted source $@. | TarSlipImprov.py:88:6:88:43 | ControlFlowNode for Attribute() | ControlFlowNode for Attribute() | TarSlipImprov.py:91:5:91:7 | ControlFlowNode for tar | ControlFlowNode for tar | @@ -104,6 +149,7 @@ subpaths | TarSlipImprov.py:189:1:189:3 | ControlFlowNode for tar | TarSlipImprov.py:188:7:188:27 | ControlFlowNode for Attribute() | TarSlipImprov.py:189:1:189:3 | ControlFlowNode for tar | Extraction of tarfile from $@ to a potentially untrusted source $@. | TarSlipImprov.py:188:7:188:27 | ControlFlowNode for Attribute() | ControlFlowNode for Attribute() | TarSlipImprov.py:189:1:189:3 | ControlFlowNode for tar | ControlFlowNode for tar | | TarSlipImprov.py:194:49:194:51 | ControlFlowNode for tar | TarSlipImprov.py:193:6:193:31 | ControlFlowNode for Attribute() | TarSlipImprov.py:194:49:194:51 | ControlFlowNode for tar | Extraction of tarfile from $@ to a potentially untrusted source $@. | TarSlipImprov.py:193:6:193:31 | ControlFlowNode for Attribute() | ControlFlowNode for Attribute() | TarSlipImprov.py:194:49:194:51 | ControlFlowNode for tar | ControlFlowNode for tar | | TarSlipImprov.py:211:5:211:7 | ControlFlowNode for tar | TarSlipImprov.py:210:6:210:43 | ControlFlowNode for Attribute() | TarSlipImprov.py:211:5:211:7 | ControlFlowNode for tar | Extraction of tarfile from $@ to a potentially untrusted source $@. | TarSlipImprov.py:210:6:210:43 | ControlFlowNode for Attribute() | ControlFlowNode for Attribute() | TarSlipImprov.py:211:5:211:7 | ControlFlowNode for tar | ControlFlowNode for tar | +| TarSlipImprov.py:236:44:236:50 | ControlFlowNode for members | TarSlipImprov.py:231:6:231:38 | ControlFlowNode for Attribute() | TarSlipImprov.py:236:44:236:50 | ControlFlowNode for members | Extraction of tarfile from $@ to a potentially untrusted source $@. | TarSlipImprov.py:231:6:231:38 | ControlFlowNode for Attribute() | ControlFlowNode for Attribute() | TarSlipImprov.py:236:44:236:50 | ControlFlowNode for members | ControlFlowNode for members | | TarSlipImprov.py:254:1:254:31 | ControlFlowNode for Attribute() | TarSlipImprov.py:254:1:254:31 | ControlFlowNode for Attribute() | TarSlipImprov.py:254:1:254:31 | ControlFlowNode for Attribute() | Extraction of tarfile from $@ to a potentially untrusted source $@. | TarSlipImprov.py:254:1:254:31 | ControlFlowNode for Attribute() | ControlFlowNode for Attribute() | TarSlipImprov.py:254:1:254:31 | ControlFlowNode for Attribute() | ControlFlowNode for Attribute() | | TarSlipImprov.py:261:25:261:29 | ControlFlowNode for entry | TarSlipImprov.py:258:6:258:26 | ControlFlowNode for Attribute() | TarSlipImprov.py:261:25:261:29 | ControlFlowNode for entry | Extraction of tarfile from $@ to a potentially untrusted source $@. | TarSlipImprov.py:258:6:258:26 | ControlFlowNode for Attribute() | ControlFlowNode for Attribute() | TarSlipImprov.py:261:25:261:29 | ControlFlowNode for entry | ControlFlowNode for entry | | TarSlipImprov.py:268:21:268:25 | ControlFlowNode for entry | TarSlipImprov.py:264:6:264:38 | ControlFlowNode for Attribute() | TarSlipImprov.py:268:21:268:25 | ControlFlowNode for entry | Extraction of tarfile from $@ to a potentially untrusted source $@. | TarSlipImprov.py:264:6:264:38 | ControlFlowNode for Attribute() | ControlFlowNode for Attribute() | TarSlipImprov.py:268:21:268:25 | ControlFlowNode for entry | ControlFlowNode for entry | @@ -113,4 +159,5 @@ subpaths | TarSlipImprov.py:288:49:288:51 | ControlFlowNode for tar | TarSlipImprov.py:287:7:287:28 | ControlFlowNode for Attribute() | TarSlipImprov.py:288:49:288:51 | ControlFlowNode for tar | Extraction of tarfile from $@ to a potentially untrusted source $@. | TarSlipImprov.py:287:7:287:28 | ControlFlowNode for Attribute() | ControlFlowNode for Attribute() | TarSlipImprov.py:288:49:288:51 | ControlFlowNode for tar | ControlFlowNode for tar | | TarSlipImprov.py:293:1:293:3 | ControlFlowNode for tar | TarSlipImprov.py:292:7:292:39 | ControlFlowNode for Attribute() | TarSlipImprov.py:293:1:293:3 | ControlFlowNode for tar | Extraction of tarfile from $@ to a potentially untrusted source $@. | TarSlipImprov.py:292:7:292:39 | ControlFlowNode for Attribute() | ControlFlowNode for Attribute() | TarSlipImprov.py:293:1:293:3 | ControlFlowNode for tar | ControlFlowNode for tar | | TarSlipImprov.py:301:49:301:51 | ControlFlowNode for tar | TarSlipImprov.py:300:6:300:51 | ControlFlowNode for Attribute() | TarSlipImprov.py:301:49:301:51 | ControlFlowNode for tar | Extraction of tarfile from $@ to a potentially untrusted source $@. | TarSlipImprov.py:300:6:300:51 | ControlFlowNode for Attribute() | ControlFlowNode for Attribute() | TarSlipImprov.py:301:49:301:51 | ControlFlowNode for tar | ControlFlowNode for tar | +| TarSlipImprov.py:310:49:310:54 | ControlFlowNode for result | TarSlipImprov.py:304:7:304:39 | ControlFlowNode for Attribute() | TarSlipImprov.py:310:49:310:54 | ControlFlowNode for result | Extraction of tarfile from $@ to a potentially untrusted source $@. | TarSlipImprov.py:304:7:304:39 | ControlFlowNode for Attribute() | ControlFlowNode for Attribute() | TarSlipImprov.py:310:49:310:54 | ControlFlowNode for result | ControlFlowNode for result | | TarSlipImprov.py:316:1:316:46 | ControlFlowNode for Attribute() | TarSlipImprov.py:316:1:316:46 | ControlFlowNode for Attribute() | TarSlipImprov.py:316:1:316:46 | ControlFlowNode for Attribute() | Extraction of tarfile from $@ to a potentially untrusted source $@. | TarSlipImprov.py:316:1:316:46 | ControlFlowNode for Attribute() | ControlFlowNode for Attribute() | TarSlipImprov.py:316:1:316:46 | ControlFlowNode for Attribute() | ControlFlowNode for Attribute() | diff --git a/python/ql/test/experimental/query-tests/Security/CWE-022-UnsafeUnpacking/UnsafeUnpack.expected b/python/ql/test/experimental/query-tests/Security/CWE-022-UnsafeUnpacking/UnsafeUnpack.expected index 93ca771caaa..6813bf887db 100644 --- a/python/ql/test/experimental/query-tests/Security/CWE-022-UnsafeUnpacking/UnsafeUnpack.expected +++ b/python/ql/test/experimental/query-tests/Security/CWE-022-UnsafeUnpacking/UnsafeUnpack.expected @@ -23,6 +23,12 @@ edges | UnsafeUnpack.py:116:27:116:39 | ControlFlowNode for Attribute | UnsafeUnpack.py:116:17:116:21 | SSA variable ufile | | UnsafeUnpack.py:118:38:118:47 | ControlFlowNode for Attribute | UnsafeUnpack.py:120:41:120:58 | ControlFlowNode for uploaded_file_path | | UnsafeUnpack.py:140:23:140:35 | ControlFlowNode for Attribute | UnsafeUnpack.py:142:49:142:51 | ControlFlowNode for tar | +| UnsafeUnpack.py:158:23:158:27 | SSA variable chunk | UnsafeUnpack.py:163:23:163:28 | SSA variable member | +| UnsafeUnpack.py:158:32:158:44 | ControlFlowNode for Attribute | UnsafeUnpack.py:158:32:158:54 | ControlFlowNode for Subscript | +| UnsafeUnpack.py:158:32:158:54 | ControlFlowNode for Subscript | UnsafeUnpack.py:158:23:158:27 | SSA variable chunk | +| UnsafeUnpack.py:163:23:163:28 | SSA variable member | UnsafeUnpack.py:166:37:166:42 | ControlFlowNode for member | +| UnsafeUnpack.py:166:23:166:28 | [post] ControlFlowNode for result | UnsafeUnpack.py:167:67:167:72 | ControlFlowNode for result | +| UnsafeUnpack.py:166:37:166:42 | ControlFlowNode for member | UnsafeUnpack.py:166:23:166:28 | [post] ControlFlowNode for result | | UnsafeUnpack.py:174:15:174:26 | ControlFlowNode for Attribute | UnsafeUnpack.py:176:1:176:34 | ControlFlowNode for Attribute() | | UnsafeUnpack.py:194:53:194:55 | ControlFlowNode for tmp | UnsafeUnpack.py:201:29:201:36 | ControlFlowNode for Attribute | nodes @@ -57,6 +63,13 @@ nodes | UnsafeUnpack.py:120:41:120:58 | ControlFlowNode for uploaded_file_path | semmle.label | ControlFlowNode for uploaded_file_path | | UnsafeUnpack.py:140:23:140:35 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | | UnsafeUnpack.py:142:49:142:51 | ControlFlowNode for tar | semmle.label | ControlFlowNode for tar | +| UnsafeUnpack.py:158:23:158:27 | SSA variable chunk | semmle.label | SSA variable chunk | +| UnsafeUnpack.py:158:32:158:44 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | +| UnsafeUnpack.py:158:32:158:54 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | +| UnsafeUnpack.py:163:23:163:28 | SSA variable member | semmle.label | SSA variable member | +| UnsafeUnpack.py:166:23:166:28 | [post] ControlFlowNode for result | semmle.label | [post] ControlFlowNode for result | +| UnsafeUnpack.py:166:37:166:42 | ControlFlowNode for member | semmle.label | ControlFlowNode for member | +| UnsafeUnpack.py:167:67:167:72 | ControlFlowNode for result | semmle.label | ControlFlowNode for result | | UnsafeUnpack.py:174:15:174:26 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | | UnsafeUnpack.py:176:1:176:34 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() | | UnsafeUnpack.py:194:53:194:55 | ControlFlowNode for tmp | semmle.label | ControlFlowNode for tmp | @@ -73,5 +86,6 @@ subpaths | UnsafeUnpack.py:112:35:112:43 | ControlFlowNode for file_path | UnsafeUnpack.py:108:22:108:34 | ControlFlowNode for Attribute | UnsafeUnpack.py:112:35:112:43 | ControlFlowNode for file_path | Unsafe extraction from a malicious tarball retrieved from a remote location. | | UnsafeUnpack.py:120:41:120:58 | ControlFlowNode for uploaded_file_path | UnsafeUnpack.py:116:27:116:39 | ControlFlowNode for Attribute | UnsafeUnpack.py:120:41:120:58 | ControlFlowNode for uploaded_file_path | Unsafe extraction from a malicious tarball retrieved from a remote location. | | UnsafeUnpack.py:142:49:142:51 | ControlFlowNode for tar | UnsafeUnpack.py:140:23:140:35 | ControlFlowNode for Attribute | UnsafeUnpack.py:142:49:142:51 | ControlFlowNode for tar | Unsafe extraction from a malicious tarball retrieved from a remote location. | +| UnsafeUnpack.py:167:67:167:72 | ControlFlowNode for result | UnsafeUnpack.py:158:32:158:44 | ControlFlowNode for Attribute | UnsafeUnpack.py:167:67:167:72 | ControlFlowNode for result | Unsafe extraction from a malicious tarball retrieved from a remote location. | | UnsafeUnpack.py:176:1:176:34 | ControlFlowNode for Attribute() | UnsafeUnpack.py:79:16:79:28 | ControlFlowNode for Attribute | UnsafeUnpack.py:176:1:176:34 | ControlFlowNode for Attribute() | Unsafe extraction from a malicious tarball retrieved from a remote location. | | UnsafeUnpack.py:201:29:201:36 | ControlFlowNode for Attribute | UnsafeUnpack.py:194:53:194:55 | ControlFlowNode for tmp | UnsafeUnpack.py:201:29:201:36 | ControlFlowNode for Attribute | Unsafe extraction from a malicious tarball retrieved from a remote location. | diff --git a/python/ql/test/experimental/query-tests/Security/CWE-022-UnsafeUnpacking/UnsafeUnpack.py b/python/ql/test/experimental/query-tests/Security/CWE-022-UnsafeUnpacking/UnsafeUnpack.py index 3f4f2319690..6b533462d23 100644 --- a/python/ql/test/experimental/query-tests/Security/CWE-022-UnsafeUnpacking/UnsafeUnpack.py +++ b/python/ql/test/experimental/query-tests/Security/CWE-022-UnsafeUnpacking/UnsafeUnpack.py @@ -164,7 +164,7 @@ def simple_upload(request): if member.issym(): raise ValueError("But it is a symlink") result.append(member) - tar.extractall(path=tempfile.mkdtemp(), members=result) # $ MISSING: result=BAD + tar.extractall(path=tempfile.mkdtemp(), members=result) # $result=BAD tar.close() diff --git a/python/ql/test/library-tests/frameworks/aiohttp/taint_test.py b/python/ql/test/library-tests/frameworks/aiohttp/taint_test.py index 6c0a60530b5..54da5726803 100644 --- a/python/ql/test/library-tests/frameworks/aiohttp/taint_test.py +++ b/python/ql/test/library-tests/frameworks/aiohttp/taint_test.py @@ -39,11 +39,11 @@ async def test_taint(request: web.Request): # $ requestHandler request.cookies, # $ tainted request.cookies["key"], # $ tainted request.cookies.get("key"), # $ tainted - request.cookies.keys(), # $ MISSING: tainted - request.cookies.values(), # $ MISSING: tainted - request.cookies.items(), # $ MISSING: tainted + request.cookies.keys(), # $ tainted + request.cookies.values(), # $ tainted + request.cookies.items(), # $ tainted list(request.cookies), # $ tainted - iter(request.cookies), # $ MISSING: tainted + iter(request.cookies), # $ tainted # aiohttp.StreamReader diff --git a/python/ql/test/library-tests/frameworks/multidict/taint_test.py b/python/ql/test/library-tests/frameworks/multidict/taint_test.py index 4410e2b2a6f..77b4f00f271 100644 --- a/python/ql/test/library-tests/frameworks/multidict/taint_test.py +++ b/python/ql/test/library-tests/frameworks/multidict/taint_test.py @@ -12,12 +12,12 @@ ensure_tainted( mdp.get("key"), # $ tainted mdp.getone("key"), # $ tainted mdp.getall("key"), # $ tainted - mdp.keys(), # $ MISSING: tainted - mdp.values(), # $ MISSING: tainted - mdp.items(), # $ MISSING: tainted + mdp.keys(), # $ tainted + mdp.values(), # $ tainted + mdp.items(), # $ tainted mdp.copy(), # $ tainted list(mdp), # $ tainted - iter(mdp), # $ MISSING: tainted + iter(mdp), # $ tainted ) # TODO: This is an invalid CIMultiDictProxy construction... but for the purpose of @@ -32,10 +32,10 @@ ensure_tainted( ci_mdp.get("key"), # $ tainted ci_mdp.getone("key"), # $ tainted ci_mdp.getall("key"), # $ tainted - ci_mdp.keys(), # $ MISSING: tainted - ci_mdp.values(), # $ MISSING: tainted - ci_mdp.items(), # $ MISSING: tainted + ci_mdp.keys(), # $ tainted + ci_mdp.values(), # $ tainted + ci_mdp.items(), # $ tainted ci_mdp.copy(), # $ tainted list(ci_mdp), # $ tainted - iter(ci_mdp), # $ MISSING: tainted + iter(ci_mdp), # $ tainted ) diff --git a/python/ql/test/library-tests/frameworks/stdlib/http_server.py b/python/ql/test/library-tests/frameworks/stdlib/http_server.py index 3dbc832c397..27ec2211f4b 100644 --- a/python/ql/test/library-tests/frameworks/stdlib/http_server.py +++ b/python/ql/test/library-tests/frameworks/stdlib/http_server.py @@ -60,8 +60,8 @@ class MyHandler(BaseHTTPRequestHandler): self.headers.get('Foo'), # $ tainted self.headers.get_all('Foo'), # $ tainted self.headers.keys(), # $ tainted - self.headers.values(), # $ MISSING: tainted - self.headers.items(), # $ MISSING: tainted + self.headers.values(), # $ tainted + self.headers.items(), # $ tainted self.headers.as_bytes(), # $ tainted self.headers.as_string(), # $ tainted str(self.headers), # $ tainted From f1de75340053e523d2813d337f18f35b455aaba0 Mon Sep 17 00:00:00 2001 From: Rasmus Lerchedahl Petersen Date: Tue, 13 Jun 2023 21:59:51 +0200 Subject: [PATCH 3/6] python: add changenote --- .../ql/lib/change-notes/2023-06-13-container-store-steps.md | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 python/ql/lib/change-notes/2023-06-13-container-store-steps.md diff --git a/python/ql/lib/change-notes/2023-06-13-container-store-steps.md b/python/ql/lib/change-notes/2023-06-13-container-store-steps.md new file mode 100644 index 00000000000..1edff243128 --- /dev/null +++ b/python/ql/lib/change-notes/2023-06-13-container-store-steps.md @@ -0,0 +1,4 @@ +--- +category: minorAnalysis +--- +* More precise modelling of several container functions and methods. From 38cca08a865c5c53af9c6d93c7eef358d07d4b98 Mon Sep 17 00:00:00 2001 From: yoff Date: Wed, 14 Jun 2023 13:27:33 +0200 Subject: [PATCH 4/6] Apply suggestions from code review Co-authored-by: Rasmus Wriedt Larsen --- .../ql/lib/change-notes/2023-06-13-container-store-steps.md | 2 +- python/ql/lib/semmle/python/frameworks/Stdlib.qll | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/python/ql/lib/change-notes/2023-06-13-container-store-steps.md b/python/ql/lib/change-notes/2023-06-13-container-store-steps.md index 1edff243128..3e12554a92b 100644 --- a/python/ql/lib/change-notes/2023-06-13-container-store-steps.md +++ b/python/ql/lib/change-notes/2023-06-13-container-store-steps.md @@ -1,4 +1,4 @@ --- category: minorAnalysis --- -* More precise modelling of several container functions and methods. +* More precise modelling of several container functions (such as `sorted`, `reversed`) and methods (such as `set.add`, `list.append`). diff --git a/python/ql/lib/semmle/python/frameworks/Stdlib.qll b/python/ql/lib/semmle/python/frameworks/Stdlib.qll index 641dba0779e..ed9d33c16bb 100644 --- a/python/ql/lib/semmle/python/frameworks/Stdlib.qll +++ b/python/ql/lib/semmle/python/frameworks/Stdlib.qll @@ -4349,7 +4349,7 @@ private module StdlibPrivate { output = "Argument[self].ListElement" and preservesValue = true or - // transfer taint from new element to this + // transfer taint from new element to this (TODO: remove in future when taint-handling is more in line with other languages) input = "Argument[0]" and output = "Argument[self]" and preservesValue = false @@ -4391,7 +4391,7 @@ private module StdlibPrivate { output = "Argument[self].SetElement" and preservesValue = true or - // transfer taint from new element to this + // transfer taint from new element to this (TODO: remove in future when taint-handling is more in line with other languages) input = "Argument[0]" and output = "Argument[self]" and preservesValue = false From 3b558a0044b3ba453318c299bf83153df1606dd3 Mon Sep 17 00:00:00 2001 From: Rasmus Lerchedahl Petersen Date: Wed, 14 Jun 2023 13:35:37 +0200 Subject: [PATCH 5/6] python: remove spurious return flow --- .../lib/semmle/python/frameworks/Stdlib.qll | 20 ------------------- 1 file changed, 20 deletions(-) diff --git a/python/ql/lib/semmle/python/frameworks/Stdlib.qll b/python/ql/lib/semmle/python/frameworks/Stdlib.qll index ed9d33c16bb..309cc80b034 100644 --- a/python/ql/lib/semmle/python/frameworks/Stdlib.qll +++ b/python/ql/lib/semmle/python/frameworks/Stdlib.qll @@ -4339,11 +4339,6 @@ private module StdlibPrivate { output = "ReturnValue.ListElement" and preservesValue = true or - // newly added element returned - input = "Argument[0]" and - output = "ReturnValue.ListElement" and - preservesValue = true - or // newly added element added to this input = "Argument[0]" and output = "Argument[self].ListElement" and @@ -4353,11 +4348,6 @@ private module StdlibPrivate { input = "Argument[0]" and output = "Argument[self]" and preservesValue = false - or - // transfer taint from new element to return value - input = "Argument[0]" and - output = "ReturnValue" and - preservesValue = false } } @@ -4381,11 +4371,6 @@ private module StdlibPrivate { output = "ReturnValue.SetElement" and preservesValue = true or - // newly added element returned - input = "Argument[0]" and - output = "ReturnValue.SetElement" and - preservesValue = true - or // newly added element added to this input = "Argument[0]" and output = "Argument[self].SetElement" and @@ -4395,11 +4380,6 @@ private module StdlibPrivate { input = "Argument[0]" and output = "Argument[self]" and preservesValue = false - or - // transfer taint from new element to return value - input = "Argument[0]" and - output = "ReturnValue" and - preservesValue = false } } } From 9a1e895fdc7a5553c66e43cceb73fca75fa9e720 Mon Sep 17 00:00:00 2001 From: Rasmus Lerchedahl Petersen Date: Wed, 14 Jun 2023 14:51:21 +0200 Subject: [PATCH 6/6] Python: missed removing these `set.add` and `list.append` do not return a value --- python/ql/lib/semmle/python/frameworks/Stdlib.qll | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/python/ql/lib/semmle/python/frameworks/Stdlib.qll b/python/ql/lib/semmle/python/frameworks/Stdlib.qll index 309cc80b034..ec8d808d9ea 100644 --- a/python/ql/lib/semmle/python/frameworks/Stdlib.qll +++ b/python/ql/lib/semmle/python/frameworks/Stdlib.qll @@ -4334,11 +4334,6 @@ private module StdlibPrivate { } override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { - // existing elements - input = "Argument[self].ListElement" and - output = "ReturnValue.ListElement" and - preservesValue = true - or // newly added element added to this input = "Argument[0]" and output = "Argument[self].ListElement" and @@ -4366,11 +4361,6 @@ private module StdlibPrivate { } override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { - // existing elements - input = "Argument[self].SetElement" and - output = "ReturnValue.SetElement" and - preservesValue = true - or // newly added element added to this input = "Argument[0]" and output = "Argument[self].SetElement" and