From f327a3f62a83ef7809ea9b8d7483420cacdb8e25 Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Mon, 15 Aug 2022 17:13:40 +0200
Subject: [PATCH 001/415] Python: Remove
strange-pointsto-interaction-investigation
These tests are not relevant anymore :muscle:
---
.../README.md | 96 -------------------
.../src/eval_no_problem.py | 43 ---------
.../src/isfile_no_problem.py | 43 ---------
.../src/simple_no_problem.py | 43 ---------
.../src/urandom_no_if_no_problem.py | 43 ---------
.../src/urandom_no_import_no_problem.py | 43 ---------
.../src/urandom_problem.py | 43 ---------
.../test-1-normal/NormalDataflowTest.expected | 2 -
.../test-1-normal/NormalDataflowTest.ql | 2 -
.../test-1-normal/Splitting.expected | 6 --
.../test-1-normal/Splitting.ql | 16 ----
.../test-1-normal/UnresolvedCalls.expected | 0
.../test-1-normal/UnresolvedCalls.ql | 2 -
.../UnresolvedPointsToCalls.expected | 5 -
.../test-1-normal/UnresolvedPointsToCalls.ql | 10 --
.../test-1-normal/options | 1 -
.../NormalDataflowTest.expected | 3 -
.../NormalDataflowTest.ql | 2 -
.../Splitting.expected | 6 --
.../test-2-without-splitting/Splitting.ql | 16 ----
.../UnresolvedCalls.expected | 2 -
.../UnresolvedCalls.ql | 2 -
.../UnresolvedPointsToCalls.expected | 3 -
.../UnresolvedPointsToCalls.ql | 10 --
.../test-2-without-splitting/options | 1 -
.../NormalDataflowTest.expected | 3 -
.../NormalDataflowTest.ql | 2 -
.../Splitting.expected | 6 --
.../test-3-max-import-depth-0/Splitting.ql | 16 ----
.../UnresolvedCalls.expected | 2 -
.../UnresolvedCalls.ql | 2 -
.../UnresolvedPointsToCalls.expected | 2 -
.../UnresolvedPointsToCalls.ql | 10 --
.../test-3-max-import-depth-0/options | 1 -
.../NormalDataflowTest.expected | 3 -
.../NormalDataflowTest.ql | 2 -
.../Splitting.expected | 6 --
.../test-4-max-import-depth-100/Splitting.ql | 16 ----
.../UnresolvedCalls.expected | 5 -
.../UnresolvedCalls.ql | 2 -
.../UnresolvedPointsToCalls.expected | 1 -
.../UnresolvedPointsToCalls.ql | 10 --
.../test-4-max-import-depth-100/options | 1 -
.../NormalDataflowTest.expected | 3 -
.../NormalDataflowTest.ql | 2 -
.../Splitting.expected | 6 --
.../test-5-max-import-depth-3/Splitting.ql | 16 ----
.../UnresolvedCalls.expected | 5 -
.../UnresolvedCalls.ql | 2 -
.../UnresolvedPointsToCalls.expected | 1 -
.../UnresolvedPointsToCalls.ql | 10 --
.../test-5-max-import-depth-3/options | 1 -
.../NormalDataflowTest.expected | 5 -
.../NormalDataflowTest.ql | 2 -
.../Splitting.expected | 6 --
.../test-6-max-import-depth-2/Splitting.ql | 16 ----
.../UnresolvedCalls.expected | 6 --
.../UnresolvedCalls.ql | 2 -
.../UnresolvedPointsToCalls.expected | 4 -
.../UnresolvedPointsToCalls.ql | 10 --
.../test-6-max-import-depth-2/options | 1 -
61 files changed, 631 deletions(-)
delete mode 100644 python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/README.md
delete mode 100644 python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/src/eval_no_problem.py
delete mode 100644 python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/src/isfile_no_problem.py
delete mode 100644 python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/src/simple_no_problem.py
delete mode 100644 python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/src/urandom_no_if_no_problem.py
delete mode 100644 python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/src/urandom_no_import_no_problem.py
delete mode 100644 python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/src/urandom_problem.py
delete mode 100644 python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-1-normal/NormalDataflowTest.expected
delete mode 100644 python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-1-normal/NormalDataflowTest.ql
delete mode 100644 python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-1-normal/Splitting.expected
delete mode 100644 python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-1-normal/Splitting.ql
delete mode 100644 python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-1-normal/UnresolvedCalls.expected
delete mode 100644 python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-1-normal/UnresolvedCalls.ql
delete mode 100644 python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-1-normal/UnresolvedPointsToCalls.expected
delete mode 100644 python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-1-normal/UnresolvedPointsToCalls.ql
delete mode 100644 python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-1-normal/options
delete mode 100644 python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-2-without-splitting/NormalDataflowTest.expected
delete mode 100644 python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-2-without-splitting/NormalDataflowTest.ql
delete mode 100644 python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-2-without-splitting/Splitting.expected
delete mode 100644 python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-2-without-splitting/Splitting.ql
delete mode 100644 python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-2-without-splitting/UnresolvedCalls.expected
delete mode 100644 python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-2-without-splitting/UnresolvedCalls.ql
delete mode 100644 python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-2-without-splitting/UnresolvedPointsToCalls.expected
delete mode 100644 python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-2-without-splitting/UnresolvedPointsToCalls.ql
delete mode 100644 python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-2-without-splitting/options
delete mode 100644 python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-3-max-import-depth-0/NormalDataflowTest.expected
delete mode 100644 python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-3-max-import-depth-0/NormalDataflowTest.ql
delete mode 100644 python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-3-max-import-depth-0/Splitting.expected
delete mode 100644 python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-3-max-import-depth-0/Splitting.ql
delete mode 100644 python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-3-max-import-depth-0/UnresolvedCalls.expected
delete mode 100644 python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-3-max-import-depth-0/UnresolvedCalls.ql
delete mode 100644 python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-3-max-import-depth-0/UnresolvedPointsToCalls.expected
delete mode 100644 python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-3-max-import-depth-0/UnresolvedPointsToCalls.ql
delete mode 100644 python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-3-max-import-depth-0/options
delete mode 100644 python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-4-max-import-depth-100/NormalDataflowTest.expected
delete mode 100644 python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-4-max-import-depth-100/NormalDataflowTest.ql
delete mode 100644 python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-4-max-import-depth-100/Splitting.expected
delete mode 100644 python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-4-max-import-depth-100/Splitting.ql
delete mode 100644 python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-4-max-import-depth-100/UnresolvedCalls.expected
delete mode 100644 python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-4-max-import-depth-100/UnresolvedCalls.ql
delete mode 100644 python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-4-max-import-depth-100/UnresolvedPointsToCalls.expected
delete mode 100644 python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-4-max-import-depth-100/UnresolvedPointsToCalls.ql
delete mode 100644 python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-4-max-import-depth-100/options
delete mode 100644 python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-5-max-import-depth-3/NormalDataflowTest.expected
delete mode 100644 python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-5-max-import-depth-3/NormalDataflowTest.ql
delete mode 100644 python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-5-max-import-depth-3/Splitting.expected
delete mode 100644 python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-5-max-import-depth-3/Splitting.ql
delete mode 100644 python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-5-max-import-depth-3/UnresolvedCalls.expected
delete mode 100644 python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-5-max-import-depth-3/UnresolvedCalls.ql
delete mode 100644 python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-5-max-import-depth-3/UnresolvedPointsToCalls.expected
delete mode 100644 python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-5-max-import-depth-3/UnresolvedPointsToCalls.ql
delete mode 100644 python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-5-max-import-depth-3/options
delete mode 100644 python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-6-max-import-depth-2/NormalDataflowTest.expected
delete mode 100644 python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-6-max-import-depth-2/NormalDataflowTest.ql
delete mode 100644 python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-6-max-import-depth-2/Splitting.expected
delete mode 100644 python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-6-max-import-depth-2/Splitting.ql
delete mode 100644 python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-6-max-import-depth-2/UnresolvedCalls.expected
delete mode 100644 python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-6-max-import-depth-2/UnresolvedCalls.ql
delete mode 100644 python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-6-max-import-depth-2/UnresolvedPointsToCalls.expected
delete mode 100644 python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-6-max-import-depth-2/UnresolvedPointsToCalls.ql
delete mode 100644 python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-6-max-import-depth-2/options
diff --git a/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/README.md b/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/README.md
deleted file mode 100644
index 59e9087ee49..00000000000
--- a/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/README.md
+++ /dev/null
@@ -1,96 +0,0 @@
-While working on the field-flow tests, I encountered some very strange behavior. By moving some tests into a new file, they suddenly started working :O
-
-This folder contains the artifacts from investigating this problem, so we can recall the facts (but besides that, don't have much value in itself).
-
-The test files can be found in `src/`, and I have set of a bunch of different tests with different extractor options in the `test-*` folders.
-
-The core of the problem is that in _some_ configuration of extractor options, after seeing the code below, points-to gives up trying to resolve calls :flushed:
-
-```py
-import os
-cond = os.urandom(1)[0] > 128
-
-if cond:
- pass
-
-if cond:
- pass
-```
-
-This seems to have been caused by not allowing enough imports to be resolved. There is also some interaction with splitting, since turning that off also removes the problem.
-
-But allowing our test to see more imports is more representative of what happens when analyzing real code, so that's the better approach :+1: (and going above 3 does not seem to change anything in this case).
-
-I've thought about whether we can write a query to reliably cases such as this, but I don't see any solutions. However, we can easily try running all our tests with `--max-import-depth=100` and see if anything changes from this.
-
-# Seeing the solutions work
-
-Doing `diff -u -r test-1-normal/ test-5-max-import-depth-3/` shows that all the calls we should be able to resolve, are now resolved properly. and critically this line is added:
-
-```diff
-+| ../src/urandom_problem.py:43:6:43:8 | ControlFlowNode for foo | Fixed missing result:flow="SOURCE, l:-15 -> foo" |
-```
-
-
-full diff
-
-```diff
-diff '--color=auto' -u -r test-1-normal/NormalDataflowTest.expected test-5-max-import-depth-3/NormalDataflowTest.expected
---- test-1-normal/NormalDataflowTest.expected 2022-02-27 10:33:00.603882599 +0100
-+++ test-5-max-import-depth-3/NormalDataflowTest.expected 2022-02-28 10:10:08.930743800 +0100
-@@ -1,2 +1,3 @@
- missingAnnotationOnSink
- failures
-+| ../src/urandom_problem.py:43:6:43:8 | ControlFlowNode for foo | Fixed missing result:flow="SOURCE, l:-15 -> foo" |
-diff '--color=auto' -u -r test-1-normal/options test-5-max-import-depth-3/options
---- test-1-normal/options 2022-02-27 10:36:51.124793909 +0100
-+++ test-5-max-import-depth-3/options 2022-02-27 11:01:43.908098372 +0100
-@@ -1 +1 @@
--semmle-extractor-options: --max-import-depth=1 -R ../src
-+semmle-extractor-options: --max-import-depth=3 -R ../src
-diff '--color=auto' -u -r test-1-normal/UnresolvedCalls.expected test-5-max-import-depth-3/UnresolvedCalls.expected
---- test-1-normal/UnresolvedCalls.expected 2022-02-28 10:09:19.213742437 +0100
-+++ test-5-max-import-depth-3/UnresolvedCalls.expected 2022-02-28 10:10:08.638737921 +0100
-@@ -0,0 +1,5 @@
-+| ../src/isfile_no_problem.py:34:33:34:70 | Comment # $ unresolved_call=os.path.isfile(..) | Missing result:unresolved_call=os.path.isfile(..) |
-+| ../src/urandom_no_if_no_problem.py:34:31:34:64 | Comment # $ unresolved_call=os.urandom(..) | Missing result:unresolved_call=os.urandom(..) |
-+| ../src/urandom_problem.py:34:31:34:64 | Comment # $ unresolved_call=os.urandom(..) | Missing result:unresolved_call=os.urandom(..) |
-+| ../src/urandom_problem.py:42:18:42:47 | Comment # $ unresolved_call=give_src() | Missing result:unresolved_call=give_src() |
-+| ../src/urandom_problem.py:43:11:43:75 | Comment # $ unresolved_call=SINK(..) MISSING: flow="SOURCE, l:-15 -> foo" | Missing result:unresolved_call=SINK(..) |
-diff '--color=auto' -u -r test-1-normal/UnresolvedPointsToCalls.expected test-5-max-import-depth-3/UnresolvedPointsToCalls.expected
---- test-1-normal/UnresolvedPointsToCalls.expected 2022-02-28 10:09:19.033738812 +0100
-+++ test-5-max-import-depth-3/UnresolvedPointsToCalls.expected 2022-02-28 10:12:48.572752108 +0100
-@@ -1,5 +1 @@
--| ../src/urandom_no_if_no_problem.py:34:8:34:20 | ../src/urandom_no_if_no_problem.py:34 | os.urandom(..) |
- | ../src/urandom_no_import_no_problem.py:34:8:34:20 | ../src/urandom_no_import_no_problem.py:34 | os.urandom(..) |
--| ../src/urandom_problem.py:34:8:34:20 | ../src/urandom_problem.py:34 | os.urandom(..) |
--| ../src/urandom_problem.py:42:7:42:16 | ../src/urandom_problem.py:42 | give_src() |
--| ../src/urandom_problem.py:43:1:43:9 | ../src/urandom_problem.py:43 | SINK(..) |
-```
-
-
-
-There are no benefit in increasing import depth above 3 for this test-example:
-
-```diff
-$ diff -u -r test-4-max-import-depth-100/ test-5-max-import-depth-3/
---- test-4-max-import-depth-100/options 2022-02-28 10:02:09.269071781 +0100
-+++ test-5-max-import-depth-3/options 2022-02-27 11:01:43.908098372 +0100
-@@ -1 +1 @@
--semmle-extractor-options: --max-import-depth=100 -R ../src
-+semmle-extractor-options: --max-import-depth=3 -R ../src
-```
-
-Also notice that using import depth 2 actually makes things worse, as we no longer handle the `isfile_no_problem.py` file properly :facepalm: :sweat_smile: NOTE: This was only for Python 3, for Python 2 there was no change :flushed:
-
-```diff
-diff '--color=auto' -u -r test-4-max-import-depth-100/NormalDataflowTest.expected test-6-max-import-depth-2/NormalDataflowTest.expected
---- test-4-max-import-depth-100/NormalDataflowTest.expected 2022-02-28 10:10:02.206608379 +0100
-+++ test-6-max-import-depth-2/NormalDataflowTest.expected 2022-02-28 10:10:13.882716665 +0100
-@@ -1,3 +1,5 @@
- missingAnnotationOnSink
-+| ../src/isfile_no_problem.py:43:6:43:8 | ../src/isfile_no_problem.py:43 | ERROR, you should add `# $ MISSING: flow` annotation | foo |
- failures
-+| ../src/isfile_no_problem.py:43:11:43:41 | Comment # $ flow="SOURCE, l:-15 -> foo" | Missing result:flow="SOURCE, l:-15 -> foo" |
- | ../src/urandom_problem.py:43:6:43:8 | ControlFlowNode for foo | Fixed missing result:flow="SOURCE, l:-15 -> foo" |
-```
diff --git a/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/src/eval_no_problem.py b/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/src/eval_no_problem.py
deleted file mode 100644
index f64100c667b..00000000000
--- a/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/src/eval_no_problem.py
+++ /dev/null
@@ -1,43 +0,0 @@
-# These are defined so that we can evaluate the test code.
-NONSOURCE = "not a source"
-SOURCE = "source"
-
-
-def is_source(x):
- return x == "source" or x == b"source" or x == 42 or x == 42.0 or x == 42j
-
-
-def SINK(x):
- if is_source(x):
- print("OK")
- else:
- print("Unexpected flow", x)
-
-
-def SINK_F(x):
- if is_source(x):
- print("Unexpected flow", x)
- else:
- print("OK")
-
-# ------------------------------------------------------------------------------
-# Actual tests
-# ------------------------------------------------------------------------------
-
-def give_src():
- return SOURCE
-
-foo = give_src()
-SINK(foo) # $ flow="SOURCE, l:-3 -> foo"
-
-import os
-cond = eval("False")
-
-if cond:
- pass
-
-if cond:
- pass
-
-foo = give_src()
-SINK(foo) # $ flow="SOURCE, l:-15 -> foo"
diff --git a/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/src/isfile_no_problem.py b/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/src/isfile_no_problem.py
deleted file mode 100644
index 895763b7a88..00000000000
--- a/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/src/isfile_no_problem.py
+++ /dev/null
@@ -1,43 +0,0 @@
-# These are defined so that we can evaluate the test code.
-NONSOURCE = "not a source"
-SOURCE = "source"
-
-
-def is_source(x):
- return x == "source" or x == b"source" or x == 42 or x == 42.0 or x == 42j
-
-
-def SINK(x):
- if is_source(x):
- print("OK")
- else:
- print("Unexpected flow", x)
-
-
-def SINK_F(x):
- if is_source(x):
- print("Unexpected flow", x)
- else:
- print("OK")
-
-# ------------------------------------------------------------------------------
-# Actual tests
-# ------------------------------------------------------------------------------
-
-def give_src():
- return SOURCE
-
-foo = give_src()
-SINK(foo) # $ flow="SOURCE, l:-3 -> foo"
-
-import os
-cond = os.path.isfile(__file__) # $ unresolved_call=os.path.isfile(..)
-
-if cond:
- pass
-
-if cond:
- pass
-
-foo = give_src()
-SINK(foo) # $ flow="SOURCE, l:-15 -> foo"
diff --git a/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/src/simple_no_problem.py b/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/src/simple_no_problem.py
deleted file mode 100644
index 447d0258349..00000000000
--- a/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/src/simple_no_problem.py
+++ /dev/null
@@ -1,43 +0,0 @@
-# These are defined so that we can evaluate the test code.
-NONSOURCE = "not a source"
-SOURCE = "source"
-
-
-def is_source(x):
- return x == "source" or x == b"source" or x == 42 or x == 42.0 or x == 42j
-
-
-def SINK(x):
- if is_source(x):
- print("OK")
- else:
- print("Unexpected flow", x)
-
-
-def SINK_F(x):
- if is_source(x):
- print("Unexpected flow", x)
- else:
- print("OK")
-
-# ------------------------------------------------------------------------------
-# Actual tests
-# ------------------------------------------------------------------------------
-
-def give_src():
- return SOURCE
-
-foo = give_src()
-SINK(foo) # $ flow="SOURCE, l:-3 -> foo"
-
-import os
-cond = 1 + 1 == 2
-
-if cond:
- pass
-
-if cond:
- pass
-
-foo = give_src()
-SINK(foo) # $ flow="SOURCE, l:-15 -> foo"
diff --git a/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/src/urandom_no_if_no_problem.py b/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/src/urandom_no_if_no_problem.py
deleted file mode 100644
index 5a2c71b1e47..00000000000
--- a/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/src/urandom_no_if_no_problem.py
+++ /dev/null
@@ -1,43 +0,0 @@
-# These are defined so that we can evaluate the test code.
-NONSOURCE = "not a source"
-SOURCE = "source"
-
-
-def is_source(x):
- return x == "source" or x == b"source" or x == 42 or x == 42.0 or x == 42j
-
-
-def SINK(x):
- if is_source(x):
- print("OK")
- else:
- print("Unexpected flow", x)
-
-
-def SINK_F(x):
- if is_source(x):
- print("Unexpected flow", x)
- else:
- print("OK")
-
-# ------------------------------------------------------------------------------
-# Actual tests
-# ------------------------------------------------------------------------------
-
-def give_src():
- return SOURCE
-
-foo = give_src()
-SINK(foo) # $ flow="SOURCE, l:-3 -> foo"
-
-import os
-cond = os.urandom(1)[0] > 128 # $ unresolved_call=os.urandom(..)
-
-# if cond:
-# pass
-#
-# if cond:
-# pass
-
-foo = give_src()
-SINK(foo) # $ flow="SOURCE, l:-15 -> foo"
diff --git a/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/src/urandom_no_import_no_problem.py b/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/src/urandom_no_import_no_problem.py
deleted file mode 100644
index 487170078ab..00000000000
--- a/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/src/urandom_no_import_no_problem.py
+++ /dev/null
@@ -1,43 +0,0 @@
-# These are defined so that we can evaluate the test code.
-NONSOURCE = "not a source"
-SOURCE = "source"
-
-
-def is_source(x):
- return x == "source" or x == b"source" or x == 42 or x == 42.0 or x == 42j
-
-
-def SINK(x):
- if is_source(x):
- print("OK")
- else:
- print("Unexpected flow", x)
-
-
-def SINK_F(x):
- if is_source(x):
- print("Unexpected flow", x)
- else:
- print("OK")
-
-# ------------------------------------------------------------------------------
-# Actual tests
-# ------------------------------------------------------------------------------
-
-def give_src():
- return SOURCE
-
-foo = give_src()
-SINK(foo) # $ flow="SOURCE, l:-3 -> foo"
-
-# import os
-cond = os.urandom(1)[0] > 128 # $ unresolved_call=os.urandom(..)
-
-# if cond:
-# pass
-#
-# if cond:
-# pass
-
-foo = give_src()
-SINK(foo) # $ flow="SOURCE, l:-15 -> foo"
diff --git a/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/src/urandom_problem.py b/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/src/urandom_problem.py
deleted file mode 100644
index d4a06529cf6..00000000000
--- a/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/src/urandom_problem.py
+++ /dev/null
@@ -1,43 +0,0 @@
-# These are defined so that we can evaluate the test code.
-NONSOURCE = "not a source"
-SOURCE = "source"
-
-
-def is_source(x):
- return x == "source" or x == b"source" or x == 42 or x == 42.0 or x == 42j
-
-
-def SINK(x):
- if is_source(x):
- print("OK")
- else:
- print("Unexpected flow", x)
-
-
-def SINK_F(x):
- if is_source(x):
- print("Unexpected flow", x)
- else:
- print("OK")
-
-# ------------------------------------------------------------------------------
-# Actual tests
-# ------------------------------------------------------------------------------
-
-def give_src():
- return SOURCE
-
-foo = give_src()
-SINK(foo) # $ flow="SOURCE, l:-3 -> foo"
-
-import os
-cond = os.urandom(1)[0] > 128 # $ unresolved_call=os.urandom(..)
-
-if cond:
- pass
-
-if cond:
- pass
-
-foo = give_src() # $ unresolved_call=give_src()
-SINK(foo) # $ unresolved_call=SINK(..) MISSING: flow="SOURCE, l:-15 -> foo"
diff --git a/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-1-normal/NormalDataflowTest.expected b/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-1-normal/NormalDataflowTest.expected
deleted file mode 100644
index 3875da4e143..00000000000
--- a/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-1-normal/NormalDataflowTest.expected
+++ /dev/null
@@ -1,2 +0,0 @@
-missingAnnotationOnSink
-failures
diff --git a/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-1-normal/NormalDataflowTest.ql b/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-1-normal/NormalDataflowTest.ql
deleted file mode 100644
index 3ee344d0b87..00000000000
--- a/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-1-normal/NormalDataflowTest.ql
+++ /dev/null
@@ -1,2 +0,0 @@
-import python
-import experimental.dataflow.TestUtil.NormalDataflowTest
diff --git a/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-1-normal/Splitting.expected b/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-1-normal/Splitting.expected
deleted file mode 100644
index 6d281f5f299..00000000000
--- a/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-1-normal/Splitting.expected
+++ /dev/null
@@ -1,6 +0,0 @@
-| ../src/eval_no_problem.py | has splitting |
-| ../src/isfile_no_problem.py | has splitting |
-| ../src/simple_no_problem.py | has splitting |
-| ../src/urandom_no_if_no_problem.py | does not have splitting |
-| ../src/urandom_no_import_no_problem.py | does not have splitting |
-| ../src/urandom_problem.py | has splitting |
diff --git a/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-1-normal/Splitting.ql b/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-1-normal/Splitting.ql
deleted file mode 100644
index ce4cba33871..00000000000
--- a/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-1-normal/Splitting.ql
+++ /dev/null
@@ -1,16 +0,0 @@
-import python
-
-// this can be quick-eval to see which ones have splitting. But that's basically just
-// anything from line 39 and further.
-predicate exprWithSplitting(Expr e) {
- exists(e.getLocation().getFile().getRelativePath()) and
- 1 < count(ControlFlowNode cfn | cfn.getNode() = e)
-}
-
-from File f, string msg
-where
- exists(f.getRelativePath()) and
- if exists(Expr e | e.getLocation().getFile() = f and exprWithSplitting(e))
- then msg = "has splitting"
- else msg = "does not have splitting"
-select f.toString(), msg
diff --git a/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-1-normal/UnresolvedCalls.expected b/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-1-normal/UnresolvedCalls.expected
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-1-normal/UnresolvedCalls.ql b/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-1-normal/UnresolvedCalls.ql
deleted file mode 100644
index c31dc161620..00000000000
--- a/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-1-normal/UnresolvedCalls.ql
+++ /dev/null
@@ -1,2 +0,0 @@
-import python
-import experimental.dataflow.TestUtil.UnresolvedCalls
diff --git a/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-1-normal/UnresolvedPointsToCalls.expected b/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-1-normal/UnresolvedPointsToCalls.expected
deleted file mode 100644
index 5c3279e5b65..00000000000
--- a/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-1-normal/UnresolvedPointsToCalls.expected
+++ /dev/null
@@ -1,5 +0,0 @@
-| ../src/urandom_no_if_no_problem.py:34:8:34:20 | ../src/urandom_no_if_no_problem.py:34 | os.urandom(..) |
-| ../src/urandom_no_import_no_problem.py:34:8:34:20 | ../src/urandom_no_import_no_problem.py:34 | os.urandom(..) |
-| ../src/urandom_problem.py:34:8:34:20 | ../src/urandom_problem.py:34 | os.urandom(..) |
-| ../src/urandom_problem.py:42:7:42:16 | ../src/urandom_problem.py:42 | give_src() |
-| ../src/urandom_problem.py:43:1:43:9 | ../src/urandom_problem.py:43 | SINK(..) |
diff --git a/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-1-normal/UnresolvedPointsToCalls.ql b/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-1-normal/UnresolvedPointsToCalls.ql
deleted file mode 100644
index 212b840decc..00000000000
--- a/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-1-normal/UnresolvedPointsToCalls.ql
+++ /dev/null
@@ -1,10 +0,0 @@
-import python
-private import semmle.python.dataflow.new.internal.PrintNode
-
-from CallNode call
-where
- exists(call.getLocation().getFile().getRelativePath()) and
- not exists(Value value | call = value.getACall()) and
- // somehow print is not resolved, but that is not the focus right now
- not call.getFunction().(NameNode).getId() = "print"
-select call.getLocation(), prettyExpr(call.getNode())
diff --git a/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-1-normal/options b/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-1-normal/options
deleted file mode 100644
index 2b7a8c5c0dd..00000000000
--- a/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-1-normal/options
+++ /dev/null
@@ -1 +0,0 @@
-semmle-extractor-options: --lang=3 --max-import-depth=1 -R ../src
diff --git a/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-2-without-splitting/NormalDataflowTest.expected b/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-2-without-splitting/NormalDataflowTest.expected
deleted file mode 100644
index ceeb0ef30e0..00000000000
--- a/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-2-without-splitting/NormalDataflowTest.expected
+++ /dev/null
@@ -1,3 +0,0 @@
-missingAnnotationOnSink
-failures
-| ../src/urandom_problem.py:43:6:43:8 | ControlFlowNode for foo | Fixed missing result:flow="SOURCE, l:-15 -> foo" |
diff --git a/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-2-without-splitting/NormalDataflowTest.ql b/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-2-without-splitting/NormalDataflowTest.ql
deleted file mode 100644
index 3ee344d0b87..00000000000
--- a/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-2-without-splitting/NormalDataflowTest.ql
+++ /dev/null
@@ -1,2 +0,0 @@
-import python
-import experimental.dataflow.TestUtil.NormalDataflowTest
diff --git a/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-2-without-splitting/Splitting.expected b/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-2-without-splitting/Splitting.expected
deleted file mode 100644
index 1669504bb58..00000000000
--- a/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-2-without-splitting/Splitting.expected
+++ /dev/null
@@ -1,6 +0,0 @@
-| ../src/eval_no_problem.py | does not have splitting |
-| ../src/isfile_no_problem.py | does not have splitting |
-| ../src/simple_no_problem.py | does not have splitting |
-| ../src/urandom_no_if_no_problem.py | does not have splitting |
-| ../src/urandom_no_import_no_problem.py | does not have splitting |
-| ../src/urandom_problem.py | does not have splitting |
diff --git a/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-2-without-splitting/Splitting.ql b/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-2-without-splitting/Splitting.ql
deleted file mode 100644
index ce4cba33871..00000000000
--- a/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-2-without-splitting/Splitting.ql
+++ /dev/null
@@ -1,16 +0,0 @@
-import python
-
-// this can be quick-eval to see which ones have splitting. But that's basically just
-// anything from line 39 and further.
-predicate exprWithSplitting(Expr e) {
- exists(e.getLocation().getFile().getRelativePath()) and
- 1 < count(ControlFlowNode cfn | cfn.getNode() = e)
-}
-
-from File f, string msg
-where
- exists(f.getRelativePath()) and
- if exists(Expr e | e.getLocation().getFile() = f and exprWithSplitting(e))
- then msg = "has splitting"
- else msg = "does not have splitting"
-select f.toString(), msg
diff --git a/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-2-without-splitting/UnresolvedCalls.expected b/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-2-without-splitting/UnresolvedCalls.expected
deleted file mode 100644
index 8078da5b640..00000000000
--- a/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-2-without-splitting/UnresolvedCalls.expected
+++ /dev/null
@@ -1,2 +0,0 @@
-| ../src/urandom_problem.py:42:18:42:47 | Comment # $ unresolved_call=give_src() | Missing result:unresolved_call=give_src() |
-| ../src/urandom_problem.py:43:11:43:75 | Comment # $ unresolved_call=SINK(..) MISSING: flow="SOURCE, l:-15 -> foo" | Missing result:unresolved_call=SINK(..) |
diff --git a/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-2-without-splitting/UnresolvedCalls.ql b/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-2-without-splitting/UnresolvedCalls.ql
deleted file mode 100644
index c31dc161620..00000000000
--- a/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-2-without-splitting/UnresolvedCalls.ql
+++ /dev/null
@@ -1,2 +0,0 @@
-import python
-import experimental.dataflow.TestUtil.UnresolvedCalls
diff --git a/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-2-without-splitting/UnresolvedPointsToCalls.expected b/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-2-without-splitting/UnresolvedPointsToCalls.expected
deleted file mode 100644
index f145962da0d..00000000000
--- a/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-2-without-splitting/UnresolvedPointsToCalls.expected
+++ /dev/null
@@ -1,3 +0,0 @@
-| ../src/urandom_no_if_no_problem.py:34:8:34:20 | ../src/urandom_no_if_no_problem.py:34 | os.urandom(..) |
-| ../src/urandom_no_import_no_problem.py:34:8:34:20 | ../src/urandom_no_import_no_problem.py:34 | os.urandom(..) |
-| ../src/urandom_problem.py:34:8:34:20 | ../src/urandom_problem.py:34 | os.urandom(..) |
diff --git a/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-2-without-splitting/UnresolvedPointsToCalls.ql b/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-2-without-splitting/UnresolvedPointsToCalls.ql
deleted file mode 100644
index 212b840decc..00000000000
--- a/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-2-without-splitting/UnresolvedPointsToCalls.ql
+++ /dev/null
@@ -1,10 +0,0 @@
-import python
-private import semmle.python.dataflow.new.internal.PrintNode
-
-from CallNode call
-where
- exists(call.getLocation().getFile().getRelativePath()) and
- not exists(Value value | call = value.getACall()) and
- // somehow print is not resolved, but that is not the focus right now
- not call.getFunction().(NameNode).getId() = "print"
-select call.getLocation(), prettyExpr(call.getNode())
diff --git a/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-2-without-splitting/options b/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-2-without-splitting/options
deleted file mode 100644
index acbba69f1d5..00000000000
--- a/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-2-without-splitting/options
+++ /dev/null
@@ -1 +0,0 @@
-semmle-extractor-options: --lang=3 --dont-split-graph --max-import-depth=1 -R ../src
diff --git a/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-3-max-import-depth-0/NormalDataflowTest.expected b/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-3-max-import-depth-0/NormalDataflowTest.expected
deleted file mode 100644
index ceeb0ef30e0..00000000000
--- a/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-3-max-import-depth-0/NormalDataflowTest.expected
+++ /dev/null
@@ -1,3 +0,0 @@
-missingAnnotationOnSink
-failures
-| ../src/urandom_problem.py:43:6:43:8 | ControlFlowNode for foo | Fixed missing result:flow="SOURCE, l:-15 -> foo" |
diff --git a/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-3-max-import-depth-0/NormalDataflowTest.ql b/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-3-max-import-depth-0/NormalDataflowTest.ql
deleted file mode 100644
index 3ee344d0b87..00000000000
--- a/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-3-max-import-depth-0/NormalDataflowTest.ql
+++ /dev/null
@@ -1,2 +0,0 @@
-import python
-import experimental.dataflow.TestUtil.NormalDataflowTest
diff --git a/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-3-max-import-depth-0/Splitting.expected b/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-3-max-import-depth-0/Splitting.expected
deleted file mode 100644
index 6d281f5f299..00000000000
--- a/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-3-max-import-depth-0/Splitting.expected
+++ /dev/null
@@ -1,6 +0,0 @@
-| ../src/eval_no_problem.py | has splitting |
-| ../src/isfile_no_problem.py | has splitting |
-| ../src/simple_no_problem.py | has splitting |
-| ../src/urandom_no_if_no_problem.py | does not have splitting |
-| ../src/urandom_no_import_no_problem.py | does not have splitting |
-| ../src/urandom_problem.py | has splitting |
diff --git a/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-3-max-import-depth-0/Splitting.ql b/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-3-max-import-depth-0/Splitting.ql
deleted file mode 100644
index ce4cba33871..00000000000
--- a/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-3-max-import-depth-0/Splitting.ql
+++ /dev/null
@@ -1,16 +0,0 @@
-import python
-
-// this can be quick-eval to see which ones have splitting. But that's basically just
-// anything from line 39 and further.
-predicate exprWithSplitting(Expr e) {
- exists(e.getLocation().getFile().getRelativePath()) and
- 1 < count(ControlFlowNode cfn | cfn.getNode() = e)
-}
-
-from File f, string msg
-where
- exists(f.getRelativePath()) and
- if exists(Expr e | e.getLocation().getFile() = f and exprWithSplitting(e))
- then msg = "has splitting"
- else msg = "does not have splitting"
-select f.toString(), msg
diff --git a/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-3-max-import-depth-0/UnresolvedCalls.expected b/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-3-max-import-depth-0/UnresolvedCalls.expected
deleted file mode 100644
index 8078da5b640..00000000000
--- a/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-3-max-import-depth-0/UnresolvedCalls.expected
+++ /dev/null
@@ -1,2 +0,0 @@
-| ../src/urandom_problem.py:42:18:42:47 | Comment # $ unresolved_call=give_src() | Missing result:unresolved_call=give_src() |
-| ../src/urandom_problem.py:43:11:43:75 | Comment # $ unresolved_call=SINK(..) MISSING: flow="SOURCE, l:-15 -> foo" | Missing result:unresolved_call=SINK(..) |
diff --git a/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-3-max-import-depth-0/UnresolvedCalls.ql b/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-3-max-import-depth-0/UnresolvedCalls.ql
deleted file mode 100644
index c31dc161620..00000000000
--- a/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-3-max-import-depth-0/UnresolvedCalls.ql
+++ /dev/null
@@ -1,2 +0,0 @@
-import python
-import experimental.dataflow.TestUtil.UnresolvedCalls
diff --git a/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-3-max-import-depth-0/UnresolvedPointsToCalls.expected b/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-3-max-import-depth-0/UnresolvedPointsToCalls.expected
deleted file mode 100644
index b9a1652ce68..00000000000
--- a/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-3-max-import-depth-0/UnresolvedPointsToCalls.expected
+++ /dev/null
@@ -1,2 +0,0 @@
-| ../src/isfile_no_problem.py:34:8:34:31 | ../src/isfile_no_problem.py:34 | os.path.isfile(..) |
-| ../src/urandom_no_import_no_problem.py:34:8:34:20 | ../src/urandom_no_import_no_problem.py:34 | os.urandom(..) |
diff --git a/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-3-max-import-depth-0/UnresolvedPointsToCalls.ql b/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-3-max-import-depth-0/UnresolvedPointsToCalls.ql
deleted file mode 100644
index 212b840decc..00000000000
--- a/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-3-max-import-depth-0/UnresolvedPointsToCalls.ql
+++ /dev/null
@@ -1,10 +0,0 @@
-import python
-private import semmle.python.dataflow.new.internal.PrintNode
-
-from CallNode call
-where
- exists(call.getLocation().getFile().getRelativePath()) and
- not exists(Value value | call = value.getACall()) and
- // somehow print is not resolved, but that is not the focus right now
- not call.getFunction().(NameNode).getId() = "print"
-select call.getLocation(), prettyExpr(call.getNode())
diff --git a/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-3-max-import-depth-0/options b/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-3-max-import-depth-0/options
deleted file mode 100644
index 762f1f95d9a..00000000000
--- a/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-3-max-import-depth-0/options
+++ /dev/null
@@ -1 +0,0 @@
-semmle-extractor-options: --lang=3 --max-import-depth=0 -R ../src
diff --git a/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-4-max-import-depth-100/NormalDataflowTest.expected b/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-4-max-import-depth-100/NormalDataflowTest.expected
deleted file mode 100644
index ceeb0ef30e0..00000000000
--- a/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-4-max-import-depth-100/NormalDataflowTest.expected
+++ /dev/null
@@ -1,3 +0,0 @@
-missingAnnotationOnSink
-failures
-| ../src/urandom_problem.py:43:6:43:8 | ControlFlowNode for foo | Fixed missing result:flow="SOURCE, l:-15 -> foo" |
diff --git a/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-4-max-import-depth-100/NormalDataflowTest.ql b/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-4-max-import-depth-100/NormalDataflowTest.ql
deleted file mode 100644
index 3ee344d0b87..00000000000
--- a/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-4-max-import-depth-100/NormalDataflowTest.ql
+++ /dev/null
@@ -1,2 +0,0 @@
-import python
-import experimental.dataflow.TestUtil.NormalDataflowTest
diff --git a/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-4-max-import-depth-100/Splitting.expected b/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-4-max-import-depth-100/Splitting.expected
deleted file mode 100644
index 6d281f5f299..00000000000
--- a/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-4-max-import-depth-100/Splitting.expected
+++ /dev/null
@@ -1,6 +0,0 @@
-| ../src/eval_no_problem.py | has splitting |
-| ../src/isfile_no_problem.py | has splitting |
-| ../src/simple_no_problem.py | has splitting |
-| ../src/urandom_no_if_no_problem.py | does not have splitting |
-| ../src/urandom_no_import_no_problem.py | does not have splitting |
-| ../src/urandom_problem.py | has splitting |
diff --git a/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-4-max-import-depth-100/Splitting.ql b/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-4-max-import-depth-100/Splitting.ql
deleted file mode 100644
index ce4cba33871..00000000000
--- a/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-4-max-import-depth-100/Splitting.ql
+++ /dev/null
@@ -1,16 +0,0 @@
-import python
-
-// this can be quick-eval to see which ones have splitting. But that's basically just
-// anything from line 39 and further.
-predicate exprWithSplitting(Expr e) {
- exists(e.getLocation().getFile().getRelativePath()) and
- 1 < count(ControlFlowNode cfn | cfn.getNode() = e)
-}
-
-from File f, string msg
-where
- exists(f.getRelativePath()) and
- if exists(Expr e | e.getLocation().getFile() = f and exprWithSplitting(e))
- then msg = "has splitting"
- else msg = "does not have splitting"
-select f.toString(), msg
diff --git a/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-4-max-import-depth-100/UnresolvedCalls.expected b/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-4-max-import-depth-100/UnresolvedCalls.expected
deleted file mode 100644
index d215a40ab29..00000000000
--- a/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-4-max-import-depth-100/UnresolvedCalls.expected
+++ /dev/null
@@ -1,5 +0,0 @@
-| ../src/isfile_no_problem.py:34:33:34:70 | Comment # $ unresolved_call=os.path.isfile(..) | Missing result:unresolved_call=os.path.isfile(..) |
-| ../src/urandom_no_if_no_problem.py:34:31:34:64 | Comment # $ unresolved_call=os.urandom(..) | Missing result:unresolved_call=os.urandom(..) |
-| ../src/urandom_problem.py:34:31:34:64 | Comment # $ unresolved_call=os.urandom(..) | Missing result:unresolved_call=os.urandom(..) |
-| ../src/urandom_problem.py:42:18:42:47 | Comment # $ unresolved_call=give_src() | Missing result:unresolved_call=give_src() |
-| ../src/urandom_problem.py:43:11:43:75 | Comment # $ unresolved_call=SINK(..) MISSING: flow="SOURCE, l:-15 -> foo" | Missing result:unresolved_call=SINK(..) |
diff --git a/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-4-max-import-depth-100/UnresolvedCalls.ql b/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-4-max-import-depth-100/UnresolvedCalls.ql
deleted file mode 100644
index c31dc161620..00000000000
--- a/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-4-max-import-depth-100/UnresolvedCalls.ql
+++ /dev/null
@@ -1,2 +0,0 @@
-import python
-import experimental.dataflow.TestUtil.UnresolvedCalls
diff --git a/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-4-max-import-depth-100/UnresolvedPointsToCalls.expected b/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-4-max-import-depth-100/UnresolvedPointsToCalls.expected
deleted file mode 100644
index ce3d7597c36..00000000000
--- a/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-4-max-import-depth-100/UnresolvedPointsToCalls.expected
+++ /dev/null
@@ -1 +0,0 @@
-| ../src/urandom_no_import_no_problem.py:34:8:34:20 | ../src/urandom_no_import_no_problem.py:34 | os.urandom(..) |
diff --git a/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-4-max-import-depth-100/UnresolvedPointsToCalls.ql b/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-4-max-import-depth-100/UnresolvedPointsToCalls.ql
deleted file mode 100644
index 212b840decc..00000000000
--- a/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-4-max-import-depth-100/UnresolvedPointsToCalls.ql
+++ /dev/null
@@ -1,10 +0,0 @@
-import python
-private import semmle.python.dataflow.new.internal.PrintNode
-
-from CallNode call
-where
- exists(call.getLocation().getFile().getRelativePath()) and
- not exists(Value value | call = value.getACall()) and
- // somehow print is not resolved, but that is not the focus right now
- not call.getFunction().(NameNode).getId() = "print"
-select call.getLocation(), prettyExpr(call.getNode())
diff --git a/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-4-max-import-depth-100/options b/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-4-max-import-depth-100/options
deleted file mode 100644
index 82f44bce3c0..00000000000
--- a/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-4-max-import-depth-100/options
+++ /dev/null
@@ -1 +0,0 @@
-semmle-extractor-options: --lang=3 --max-import-depth=100 -R ../src
diff --git a/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-5-max-import-depth-3/NormalDataflowTest.expected b/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-5-max-import-depth-3/NormalDataflowTest.expected
deleted file mode 100644
index ceeb0ef30e0..00000000000
--- a/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-5-max-import-depth-3/NormalDataflowTest.expected
+++ /dev/null
@@ -1,3 +0,0 @@
-missingAnnotationOnSink
-failures
-| ../src/urandom_problem.py:43:6:43:8 | ControlFlowNode for foo | Fixed missing result:flow="SOURCE, l:-15 -> foo" |
diff --git a/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-5-max-import-depth-3/NormalDataflowTest.ql b/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-5-max-import-depth-3/NormalDataflowTest.ql
deleted file mode 100644
index 3ee344d0b87..00000000000
--- a/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-5-max-import-depth-3/NormalDataflowTest.ql
+++ /dev/null
@@ -1,2 +0,0 @@
-import python
-import experimental.dataflow.TestUtil.NormalDataflowTest
diff --git a/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-5-max-import-depth-3/Splitting.expected b/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-5-max-import-depth-3/Splitting.expected
deleted file mode 100644
index 6d281f5f299..00000000000
--- a/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-5-max-import-depth-3/Splitting.expected
+++ /dev/null
@@ -1,6 +0,0 @@
-| ../src/eval_no_problem.py | has splitting |
-| ../src/isfile_no_problem.py | has splitting |
-| ../src/simple_no_problem.py | has splitting |
-| ../src/urandom_no_if_no_problem.py | does not have splitting |
-| ../src/urandom_no_import_no_problem.py | does not have splitting |
-| ../src/urandom_problem.py | has splitting |
diff --git a/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-5-max-import-depth-3/Splitting.ql b/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-5-max-import-depth-3/Splitting.ql
deleted file mode 100644
index ce4cba33871..00000000000
--- a/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-5-max-import-depth-3/Splitting.ql
+++ /dev/null
@@ -1,16 +0,0 @@
-import python
-
-// this can be quick-eval to see which ones have splitting. But that's basically just
-// anything from line 39 and further.
-predicate exprWithSplitting(Expr e) {
- exists(e.getLocation().getFile().getRelativePath()) and
- 1 < count(ControlFlowNode cfn | cfn.getNode() = e)
-}
-
-from File f, string msg
-where
- exists(f.getRelativePath()) and
- if exists(Expr e | e.getLocation().getFile() = f and exprWithSplitting(e))
- then msg = "has splitting"
- else msg = "does not have splitting"
-select f.toString(), msg
diff --git a/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-5-max-import-depth-3/UnresolvedCalls.expected b/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-5-max-import-depth-3/UnresolvedCalls.expected
deleted file mode 100644
index d215a40ab29..00000000000
--- a/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-5-max-import-depth-3/UnresolvedCalls.expected
+++ /dev/null
@@ -1,5 +0,0 @@
-| ../src/isfile_no_problem.py:34:33:34:70 | Comment # $ unresolved_call=os.path.isfile(..) | Missing result:unresolved_call=os.path.isfile(..) |
-| ../src/urandom_no_if_no_problem.py:34:31:34:64 | Comment # $ unresolved_call=os.urandom(..) | Missing result:unresolved_call=os.urandom(..) |
-| ../src/urandom_problem.py:34:31:34:64 | Comment # $ unresolved_call=os.urandom(..) | Missing result:unresolved_call=os.urandom(..) |
-| ../src/urandom_problem.py:42:18:42:47 | Comment # $ unresolved_call=give_src() | Missing result:unresolved_call=give_src() |
-| ../src/urandom_problem.py:43:11:43:75 | Comment # $ unresolved_call=SINK(..) MISSING: flow="SOURCE, l:-15 -> foo" | Missing result:unresolved_call=SINK(..) |
diff --git a/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-5-max-import-depth-3/UnresolvedCalls.ql b/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-5-max-import-depth-3/UnresolvedCalls.ql
deleted file mode 100644
index c31dc161620..00000000000
--- a/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-5-max-import-depth-3/UnresolvedCalls.ql
+++ /dev/null
@@ -1,2 +0,0 @@
-import python
-import experimental.dataflow.TestUtil.UnresolvedCalls
diff --git a/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-5-max-import-depth-3/UnresolvedPointsToCalls.expected b/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-5-max-import-depth-3/UnresolvedPointsToCalls.expected
deleted file mode 100644
index ce3d7597c36..00000000000
--- a/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-5-max-import-depth-3/UnresolvedPointsToCalls.expected
+++ /dev/null
@@ -1 +0,0 @@
-| ../src/urandom_no_import_no_problem.py:34:8:34:20 | ../src/urandom_no_import_no_problem.py:34 | os.urandom(..) |
diff --git a/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-5-max-import-depth-3/UnresolvedPointsToCalls.ql b/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-5-max-import-depth-3/UnresolvedPointsToCalls.ql
deleted file mode 100644
index 212b840decc..00000000000
--- a/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-5-max-import-depth-3/UnresolvedPointsToCalls.ql
+++ /dev/null
@@ -1,10 +0,0 @@
-import python
-private import semmle.python.dataflow.new.internal.PrintNode
-
-from CallNode call
-where
- exists(call.getLocation().getFile().getRelativePath()) and
- not exists(Value value | call = value.getACall()) and
- // somehow print is not resolved, but that is not the focus right now
- not call.getFunction().(NameNode).getId() = "print"
-select call.getLocation(), prettyExpr(call.getNode())
diff --git a/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-5-max-import-depth-3/options b/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-5-max-import-depth-3/options
deleted file mode 100644
index fd080c5b1e1..00000000000
--- a/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-5-max-import-depth-3/options
+++ /dev/null
@@ -1 +0,0 @@
-semmle-extractor-options: --lang=3 --max-import-depth=3 -R ../src
diff --git a/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-6-max-import-depth-2/NormalDataflowTest.expected b/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-6-max-import-depth-2/NormalDataflowTest.expected
deleted file mode 100644
index 7b5784017f3..00000000000
--- a/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-6-max-import-depth-2/NormalDataflowTest.expected
+++ /dev/null
@@ -1,5 +0,0 @@
-missingAnnotationOnSink
-| ../src/isfile_no_problem.py:43:6:43:8 | ../src/isfile_no_problem.py:43 | ERROR, you should add `# $ MISSING: flow` annotation | foo |
-failures
-| ../src/isfile_no_problem.py:43:11:43:41 | Comment # $ flow="SOURCE, l:-15 -> foo" | Missing result:flow="SOURCE, l:-15 -> foo" |
-| ../src/urandom_problem.py:43:6:43:8 | ControlFlowNode for foo | Fixed missing result:flow="SOURCE, l:-15 -> foo" |
diff --git a/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-6-max-import-depth-2/NormalDataflowTest.ql b/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-6-max-import-depth-2/NormalDataflowTest.ql
deleted file mode 100644
index 3ee344d0b87..00000000000
--- a/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-6-max-import-depth-2/NormalDataflowTest.ql
+++ /dev/null
@@ -1,2 +0,0 @@
-import python
-import experimental.dataflow.TestUtil.NormalDataflowTest
diff --git a/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-6-max-import-depth-2/Splitting.expected b/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-6-max-import-depth-2/Splitting.expected
deleted file mode 100644
index 6d281f5f299..00000000000
--- a/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-6-max-import-depth-2/Splitting.expected
+++ /dev/null
@@ -1,6 +0,0 @@
-| ../src/eval_no_problem.py | has splitting |
-| ../src/isfile_no_problem.py | has splitting |
-| ../src/simple_no_problem.py | has splitting |
-| ../src/urandom_no_if_no_problem.py | does not have splitting |
-| ../src/urandom_no_import_no_problem.py | does not have splitting |
-| ../src/urandom_problem.py | has splitting |
diff --git a/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-6-max-import-depth-2/Splitting.ql b/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-6-max-import-depth-2/Splitting.ql
deleted file mode 100644
index ce4cba33871..00000000000
--- a/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-6-max-import-depth-2/Splitting.ql
+++ /dev/null
@@ -1,16 +0,0 @@
-import python
-
-// this can be quick-eval to see which ones have splitting. But that's basically just
-// anything from line 39 and further.
-predicate exprWithSplitting(Expr e) {
- exists(e.getLocation().getFile().getRelativePath()) and
- 1 < count(ControlFlowNode cfn | cfn.getNode() = e)
-}
-
-from File f, string msg
-where
- exists(f.getRelativePath()) and
- if exists(Expr e | e.getLocation().getFile() = f and exprWithSplitting(e))
- then msg = "has splitting"
- else msg = "does not have splitting"
-select f.toString(), msg
diff --git a/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-6-max-import-depth-2/UnresolvedCalls.expected b/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-6-max-import-depth-2/UnresolvedCalls.expected
deleted file mode 100644
index 7a80091654c..00000000000
--- a/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-6-max-import-depth-2/UnresolvedCalls.expected
+++ /dev/null
@@ -1,6 +0,0 @@
-| ../src/isfile_no_problem.py:42:7:42:16 | ControlFlowNode for give_src() | Unexpected result: unresolved_call=give_src() |
-| ../src/isfile_no_problem.py:43:1:43:9 | ControlFlowNode for SINK() | Unexpected result: unresolved_call=SINK(..) |
-| ../src/urandom_no_if_no_problem.py:34:31:34:64 | Comment # $ unresolved_call=os.urandom(..) | Missing result:unresolved_call=os.urandom(..) |
-| ../src/urandom_problem.py:34:31:34:64 | Comment # $ unresolved_call=os.urandom(..) | Missing result:unresolved_call=os.urandom(..) |
-| ../src/urandom_problem.py:42:18:42:47 | Comment # $ unresolved_call=give_src() | Missing result:unresolved_call=give_src() |
-| ../src/urandom_problem.py:43:11:43:75 | Comment # $ unresolved_call=SINK(..) MISSING: flow="SOURCE, l:-15 -> foo" | Missing result:unresolved_call=SINK(..) |
diff --git a/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-6-max-import-depth-2/UnresolvedCalls.ql b/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-6-max-import-depth-2/UnresolvedCalls.ql
deleted file mode 100644
index c31dc161620..00000000000
--- a/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-6-max-import-depth-2/UnresolvedCalls.ql
+++ /dev/null
@@ -1,2 +0,0 @@
-import python
-import experimental.dataflow.TestUtil.UnresolvedCalls
diff --git a/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-6-max-import-depth-2/UnresolvedPointsToCalls.expected b/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-6-max-import-depth-2/UnresolvedPointsToCalls.expected
deleted file mode 100644
index c8ec76401e0..00000000000
--- a/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-6-max-import-depth-2/UnresolvedPointsToCalls.expected
+++ /dev/null
@@ -1,4 +0,0 @@
-| ../src/isfile_no_problem.py:34:8:34:31 | ../src/isfile_no_problem.py:34 | os.path.isfile(..) |
-| ../src/isfile_no_problem.py:42:7:42:16 | ../src/isfile_no_problem.py:42 | give_src() |
-| ../src/isfile_no_problem.py:43:1:43:9 | ../src/isfile_no_problem.py:43 | SINK(..) |
-| ../src/urandom_no_import_no_problem.py:34:8:34:20 | ../src/urandom_no_import_no_problem.py:34 | os.urandom(..) |
diff --git a/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-6-max-import-depth-2/UnresolvedPointsToCalls.ql b/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-6-max-import-depth-2/UnresolvedPointsToCalls.ql
deleted file mode 100644
index 212b840decc..00000000000
--- a/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-6-max-import-depth-2/UnresolvedPointsToCalls.ql
+++ /dev/null
@@ -1,10 +0,0 @@
-import python
-private import semmle.python.dataflow.new.internal.PrintNode
-
-from CallNode call
-where
- exists(call.getLocation().getFile().getRelativePath()) and
- not exists(Value value | call = value.getACall()) and
- // somehow print is not resolved, but that is not the focus right now
- not call.getFunction().(NameNode).getId() = "print"
-select call.getLocation(), prettyExpr(call.getNode())
diff --git a/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-6-max-import-depth-2/options b/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-6-max-import-depth-2/options
deleted file mode 100644
index d902599b806..00000000000
--- a/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-6-max-import-depth-2/options
+++ /dev/null
@@ -1 +0,0 @@
-semmle-extractor-options: --lang=3 --max-import-depth=2 -R ../src
From 6f5007b810960b735796d6dc7cf002ca2fdbadc1 Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Tue, 31 May 2022 01:18:20 +0200
Subject: [PATCH 002/415] Python: Rename -> DataFlowDispatch
So diff can make more sense when introducing blank state for type-tracking based call-graph
---
.../{DataFlowDispatchPointsTo.qll => DataFlowDispatch.qll} | 0
.../lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll | 2 +-
2 files changed, 1 insertion(+), 1 deletion(-)
rename python/ql/lib/semmle/python/dataflow/new/internal/{DataFlowDispatchPointsTo.qll => DataFlowDispatch.qll} (100%)
diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatchPointsTo.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
similarity index 100%
rename from python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatchPointsTo.qll
rename to python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll
index 6f1396518aa..e120ddbedad 100644
--- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll
@@ -16,7 +16,7 @@ private import semmle.python.Frameworks
// make it more digestible.
import MatchUnpacking
import IterableUnpacking
-import DataFlowDispatchPointsTo
+import DataFlowDispatch
/** Gets the callable in which this node occurs. */
DataFlowCallable nodeGetEnclosingCallable(Node n) { result = n.getEnclosingCallable() }
From 716576b1d6faf7d3febb57e8d87102af3e2eb73b Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Fri, 28 Oct 2022 14:03:22 +0200
Subject: [PATCH 003/415] Python: Minimal type-tracking call-graph
That does absolutely nothing so far, but compiles
---
.../new/internal/DataFlowDispatch.qll | 596 +++---------------
.../dataflow/new/internal/DataFlowPrivate.qll | 133 +---
.../dataflow/new/internal/DataFlowPublic.qll | 111 +---
.../new/internal/FlowSummaryImplSpecific.qll | 28 +-
.../new/internal/TypeTrackerSpecific.qll | 42 +-
5 files changed, 158 insertions(+), 752 deletions(-)
diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
index 0efae6ae45c..90c8010739d 100644
--- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
@@ -1,280 +1,39 @@
/**
* INTERNAL: Do not use.
*
- * Points-to based call-graph.
+ * TypeTracker based call-graph.
+ *
+ * A goal of this library is to support modeling calls that happens by third-party
+ * libraries. For example `call_later(func, arg0, arg1, foo=val)`, and the fact that the
+ * library might inject it's own arguments, for example a context that will always be
+ * passed as the actual first argument to the function. Currently the aim is to provide
+ * enough predicates for such `call_later` function to be modeled by providing
+ * additional data-flow steps for the arguments/parameters. This means we cannot have
+ * any special logic that requires an AST call to be made before we care to figure out
+ * what callable this call might end up targeting.
*/
private import python
private import DataFlowPublic
-private import semmle.python.SpecialMethods
private import FlowSummaryImpl as FlowSummaryImpl
-/** A parameter position represented by an integer. */
-class ParameterPosition extends int {
- ParameterPosition() { exists(any(DataFlowCallable c).getParameter(this)) }
-
- /** Holds if this position represents a positional parameter at position `pos`. */
- predicate isPositional(int pos) { this = pos } // with the current representation, all parameters are positional
+/** A parameter position. */
+class ParameterPosition extends Unit {
+ // TODO(call-graph): implement this!
}
-/** An argument position represented by an integer. */
-class ArgumentPosition extends int {
- ArgumentPosition() { this in [-2, -1] or exists(any(Call c).getArg(this)) }
-
- /** Holds if this position represents a positional argument at position `pos`. */
- predicate isPositional(int pos) { this = pos } // with the current representation, all arguments are positional
+/** An argument position. */
+abstract class ArgumentPosition extends Unit {
+ // TODO(call-graph): implement this!
}
/** Holds if arguments at position `apos` match parameters at position `ppos`. */
pragma[inline]
-predicate parameterMatch(ParameterPosition ppos, ArgumentPosition apos) { ppos = apos }
-
-/**
- * Computes routing of arguments to parameters
- *
- * When a call contains more positional arguments than there are positional parameters,
- * the extra positional arguments are passed as a tuple to a starred parameter. This is
- * achieved by synthesizing a node `TPosOverflowNode(call, callable)`
- * that represents the tuple of extra positional arguments. There is a store step from each
- * extra positional argument to this node.
- *
- * CURRENTLY NOT SUPPORTED:
- * When a call contains an iterable unpacking argument, such as `func(*args)`, it is expanded into positional arguments.
- *
- * CURRENTLY NOT SUPPORTED:
- * If a call contains an iterable unpacking argument, such as `func(*args)`, and the callee contains a starred argument, any extra
- * positional arguments are passed to the starred argument.
- *
- * When a call contains keyword arguments that do not correspond to keyword parameters, these
- * extra keyword arguments are passed as a dictionary to a doubly starred parameter. This is
- * achieved by synthesizing a node `TKwOverflowNode(call, callable)`
- * that represents the dictionary of extra keyword arguments. There is a store step from each
- * extra keyword argument to this node.
- *
- * When a call contains a dictionary unpacking argument, such as `func(**kwargs)`, with entries corresponding to a keyword parameter,
- * the value at such a key is unpacked and passed to the parameter. This is achieved
- * by synthesizing an argument node `TKwUnpacked(call, callable, name)` representing the unpacked
- * value. This node is used as the argument passed to the matching keyword parameter. There is a read
- * step from the dictionary argument to the synthesized argument node.
- *
- * When a call contains a dictionary unpacking argument, such as `func(**kwargs)`, and the callee contains a doubly starred parameter,
- * entries which are not unpacked are passed to the doubly starred parameter. This is achieved by
- * adding a dataflow step from the dictionary argument to `TKwOverflowNode(call, callable)` and a
- * step to clear content of that node at any unpacked keys.
- *
- * ## Examples:
- * Assume that we have the callable
- * ```python
- * def f(x, y, *t, **d):
- * pass
- * ```
- * Then the call
- * ```python
- * f(0, 1, 2, a=3)
- * ```
- * will be modeled as
- * ```python
- * f(0, 1, [*t], [**d])
- * ```
- * where `[` and `]` denotes synthesized nodes, so `[*t]` is the synthesized tuple argument
- * `TPosOverflowNode` and `[**d]` is the synthesized dictionary argument `TKwOverflowNode`.
- * There will be a store step from `2` to `[*t]` at pos `0` and one from `3` to `[**d]` at key
- * `a`.
- *
- * For the call
- * ```python
- * f(0, **{"y": 1, "a": 3})
- * ```
- * no tuple argument is synthesized. It is modeled as
- * ```python
- * f(0, [y=1], [**d])
- * ```
- * where `[y=1]` is the synthesized unpacked argument `TKwUnpacked` (with `name` = `y`). There is
- * a read step from `**{"y": 1, "a": 3}` to `[y=1]` at key `y` to get the value passed to the parameter
- * `y`. There is a dataflow step from `**{"y": 1, "a": 3}` to `[**d]` to transfer the content and
- * a clearing of content at key `y` for node `[**d]`, since that value has been unpacked.
- */
-module ArgumentPassing {
- /**
- * Holds if `call` represents a `DataFlowCall` to a `DataFlowCallable` represented by `callable`.
- *
- * It _may not_ be the case that `call = callable.getACall()`, i.e. if `call` represents a `ClassCall`.
- *
- * Used to limit the size of predicates.
- */
- predicate connects(CallNode call, CallableValue callable) {
- exists(NormalCall c |
- call = c.getNode() and
- callable = c.getCallable().getCallableValue()
- )
- }
-
- /**
- * Gets the `n`th parameter of `callable`.
- * If the callable has a starred parameter, say `*tuple`, that is matched with `n=-1`.
- * If the callable has a doubly starred parameter, say `**dict`, that is matched with `n=-2`.
- * Note that, unlike other languages, we do _not_ use -1 for the position of `self` in Python,
- * as it is an explicit parameter at position 0.
- */
- NameNode getParameter(CallableValue callable, int n) {
- // positional parameter
- result = callable.getParameter(n)
- or
- // starred parameter, `*tuple`
- exists(Function f |
- f = callable.getScope() and
- n = -1 and
- result = f.getVararg().getAFlowNode()
- )
- or
- // doubly starred parameter, `**dict`
- exists(Function f |
- f = callable.getScope() and
- n = -2 and
- result = f.getKwarg().getAFlowNode()
- )
- }
-
- /**
- * A type representing a mapping from argument indices to parameter indices.
- * We currently use two mappings: NoShift, the identity, used for ordinary
- * function calls, and ShiftOneUp which is used for calls where an extra argument
- * is inserted. These include method calls, constructor calls and class calls.
- * In these calls, the argument at index `n` is mapped to the parameter at position `n+1`.
- */
- newtype TArgParamMapping =
- TNoShift() or
- TShiftOneUp()
-
- /** A mapping used for parameter passing. */
- abstract class ArgParamMapping extends TArgParamMapping {
- /** Gets the index of the parameter that corresponds to the argument at index `argN`. */
- bindingset[argN]
- abstract int getParamN(int argN);
-
- /** Gets a textual representation of this element. */
- abstract string toString();
- }
-
- /** A mapping that passes argument `n` to parameter `n`. */
- class NoShift extends ArgParamMapping, TNoShift {
- NoShift() { this = TNoShift() }
-
- override string toString() { result = "NoShift [n -> n]" }
-
- bindingset[argN]
- override int getParamN(int argN) { result = argN }
- }
-
- /** A mapping that passes argument `n` to parameter `n+1`. */
- class ShiftOneUp extends ArgParamMapping, TShiftOneUp {
- ShiftOneUp() { this = TShiftOneUp() }
-
- override string toString() { result = "ShiftOneUp [n -> n+1]" }
-
- bindingset[argN]
- override int getParamN(int argN) { result = argN + 1 }
- }
-
- /**
- * Gets the node representing the argument to `call` that is passed to the parameter at
- * (zero-based) index `paramN` in `callable`. If this is a positional argument, it must appear
- * at an index, `argN`, in `call` which satisfies `paramN = mapping.getParamN(argN)`.
- *
- * `mapping` will be the identity for function calls, but not for method- or constructor calls,
- * where the first parameter is `self` and the first positional argument is passed to the second positional parameter.
- * Similarly for classmethod calls, where the first parameter is `cls`.
- *
- * NOT SUPPORTED: Keyword-only parameters.
- */
- Node getArg(CallNode call, ArgParamMapping mapping, CallableValue callable, int paramN) {
- connects(call, callable) and
- (
- // positional argument
- exists(int argN |
- paramN = mapping.getParamN(argN) and
- result = TCfgNode(call.getArg(argN))
- )
- or
- // keyword argument
- // TODO: Since `getArgName` have no results for keyword-only parameters,
- // these are currently not supported.
- exists(Function f, string argName |
- f = callable.getScope() and
- f.getArgName(paramN) = argName and
- result = TCfgNode(call.getArgByName(unbind_string(argName)))
- )
- or
- // a synthesized argument passed to the starred parameter (at position -1)
- callable.getScope().hasVarArg() and
- paramN = -1 and
- result = TPosOverflowNode(call, callable)
- or
- // a synthesized argument passed to the doubly starred parameter (at position -2)
- callable.getScope().hasKwArg() and
- paramN = -2 and
- result = TKwOverflowNode(call, callable)
- or
- // argument unpacked from dict
- exists(string name |
- call_unpacks(call, mapping, callable, name, paramN) and
- result = TKwUnpackedNode(call, callable, name)
- )
- )
- }
-
- /** Currently required in `getArg` in order to prevent a bad join. */
- bindingset[result, s]
- private string unbind_string(string s) { result <= s and s <= result }
-
- /** Gets the control flow node that is passed as the `n`th overflow positional argument. */
- ControlFlowNode getPositionalOverflowArg(CallNode call, CallableValue callable, int n) {
- connects(call, callable) and
- exists(Function f, int posCount, int argNr |
- f = callable.getScope() and
- f.hasVarArg() and
- posCount = f.getPositionalParameterCount() and
- result = call.getArg(argNr) and
- argNr >= posCount and
- argNr = posCount + n
- )
- }
-
- /** Gets the control flow node that is passed as the overflow keyword argument with key `key`. */
- ControlFlowNode getKeywordOverflowArg(CallNode call, CallableValue callable, string key) {
- connects(call, callable) and
- exists(Function f |
- f = callable.getScope() and
- f.hasKwArg() and
- not exists(f.getArgByName(key)) and
- result = call.getArgByName(key)
- )
- }
-
- /**
- * Holds if `call` unpacks a dictionary argument in order to pass it via `name`.
- * It will then be passed to the parameter of `callable` at index `paramN`.
- */
- predicate call_unpacks(
- CallNode call, ArgParamMapping mapping, CallableValue callable, string name, int paramN
- ) {
- connects(call, callable) and
- exists(Function f |
- f = callable.getScope() and
- not exists(int argN | paramN = mapping.getParamN(argN) | exists(call.getArg(argN))) and // no positional argument available
- name = f.getArgName(paramN) and
- // not exists(call.getArgByName(name)) and // only matches keyword arguments not preceded by **
- // TODO: make the below logic respect control flow splitting (by not going to the AST).
- not call.getNode().getANamedArg().(Keyword).getArg() = name and // no keyword argument available
- paramN >= 0 and
- paramN < f.getPositionalParameterCount() + f.getKeywordOnlyParameterCount() and
- exists(call.getNode().getKwargs()) // dict argument available
- )
- }
+predicate parameterMatch(ParameterPosition ppos, ArgumentPosition apos) {
+ // TODO(call-graph): implement this!
+ none()
}
-import ArgumentPassing
-
/** A callable defined in library code, identified by a unique string. */
abstract class LibraryCallable extends string {
bindingset[this]
@@ -287,92 +46,34 @@ abstract class LibraryCallable extends string {
abstract ArgumentNode getACallback();
}
-/**
- * IPA type for DataFlowCallable.
- *
- * A callable is either a function value, a class value, or a module (for enclosing `ModuleVariableNode`s).
- * A module has no calls.
- */
newtype TDataFlowCallable =
- TCallableValue(CallableValue callable) {
- callable instanceof FunctionValue and
- not callable.(FunctionValue).isLambda()
- or
- callable instanceof ClassValue
- } or
- TLambda(Function lambda) { lambda.isLambda() } or
+ // TODO(call-graph): implement this!
+ /** For enclosing `ModuleVariableNode`s -- don't actually have calls. */
TModule(Module m) or
TLibraryCallable(LibraryCallable callable)
/** A callable. */
-class DataFlowCallable extends TDataFlowCallable {
+abstract class DataFlowCallable extends TDataFlowCallable {
/** Gets a textual representation of this element. */
- string toString() { result = "DataFlowCallable" }
-
- /** Gets a call to this callable. */
- CallNode getACall() { none() }
+ abstract string toString();
/** Gets the scope of this callable */
- Scope getScope() { none() }
+ abstract Scope getScope();
- /** Gets the specified parameter of this callable */
- NameNode getParameter(int n) { none() }
-
- /** Gets the name of this callable. */
- string getName() { none() }
-
- /** Gets a callable value for this callable, if any. */
- CallableValue getCallableValue() { none() }
+ /** Gets the parameter at position `ppos`, if any. */
+ abstract ParameterNode getParameter(ParameterPosition ppos);
/** Gets the underlying library callable, if any. */
LibraryCallable asLibraryCallable() { this = TLibraryCallable(result) }
- Location getLocation() { none() }
+ /** Gets the location of this dataflow callable. */
+ abstract Location getLocation();
}
-/** A class representing a callable value. */
-class DataFlowCallableValue extends DataFlowCallable, TCallableValue {
- CallableValue callable;
-
- DataFlowCallableValue() { this = TCallableValue(callable) }
-
- override string toString() { result = callable.toString() }
-
- override CallNode getACall() { result = callable.getACall() }
-
- override Scope getScope() { result = callable.getScope() }
-
- override NameNode getParameter(int n) { result = getParameter(callable, n) }
-
- override string getName() { result = callable.getName() }
-
- override CallableValue getCallableValue() { result = callable }
-}
-
-/** A class representing a callable lambda. */
-class DataFlowLambda extends DataFlowCallable, TLambda {
- Function lambda;
-
- DataFlowLambda() { this = TLambda(lambda) }
-
- override string toString() { result = lambda.toString() }
-
- override CallNode getACall() { result = this.getCallableValue().getACall() }
-
- override Scope getScope() { result = lambda.getEvaluatingScope() }
-
- override NameNode getParameter(int n) { result = getParameter(this.getCallableValue(), n) }
-
- override string getName() { result = "Lambda callable" }
-
- override FunctionValue getCallableValue() {
- result.getOrigin().getNode() = lambda.getDefinition()
- }
-
- Expr getDefinition() { result = lambda.getDefinition() }
-}
-
-/** A class representing the scope in which a `ModuleVariableNode` appears. */
+/**
+ * A module. This is not actually a callable, but we need this so a
+ * `ModuleVariableNode` have an enclosing callable.
+ */
class DataFlowModuleScope extends DataFlowCallable, TModule {
Module mod;
@@ -380,15 +81,11 @@ class DataFlowModuleScope extends DataFlowCallable, TModule {
override string toString() { result = mod.toString() }
- override CallNode getACall() { none() }
+ override Module getScope() { result = mod }
- override Scope getScope() { result = mod }
+ override Location getLocation() { result = mod.getLocation() }
- override NameNode getParameter(int n) { none() }
-
- override string getName() { result = mod.getName() }
-
- override CallableValue getCallableValue() { none() }
+ override ParameterNode getParameter(ParameterPosition ppos) { none() }
}
class LibraryCallableValue extends DataFlowCallable, TLibraryCallable {
@@ -398,66 +95,36 @@ class LibraryCallableValue extends DataFlowCallable, TLibraryCallable {
override string toString() { result = callable.toString() }
- override CallNode getACall() { result = callable.getACall().getNode() }
-
/** Gets a data-flow node, where this library callable is used as a call-back. */
ArgumentNode getACallback() { result = callable.getACallback() }
override Scope getScope() { none() }
- override NameNode getParameter(int n) { none() }
-
- override string getName() { result = callable }
+ override ParameterNode getParameter(ParameterPosition ppos) { none() }
override LibraryCallable asLibraryCallable() { result = callable }
+
+ override Location getLocation() { none() }
}
-/**
- * IPA type for DataFlowCall.
- *
- * Calls corresponding to `CallNode`s are either to callable values or to classes.
- * The latter is directed to the callable corresponding to the `__init__` method of the class.
- *
- * An `__init__` method can also be called directly, so that the callable can be targeted by
- * different types of calls. In that case, the parameter mappings will be different,
- * as the class call will synthesize an argument node to be mapped to the `self` parameter.
- *
- * A call corresponding to a special method call is handled by the corresponding `SpecialMethodCallNode`.
- *
- * TODO: Add `TClassMethodCall` mapping `cls` appropriately.
- */
newtype TDataFlowCall =
- /**
- * Includes function calls, method calls, class calls and library calls.
- * All these will be associated with a `CallNode`.
- */
- TNormalCall(CallNode call) or
- /**
- * Includes calls to special methods.
- * These will be associated with a `SpecialMethodCallNode`.
- */
- TSpecialCall(SpecialMethodCallNode special) or
+ // TODO(call-graph): implement this!
+ MkDataFlowCall() or
/** A synthesized call inside a summarized callable */
TSummaryCall(FlowSummaryImpl::Public::SummarizedCallable c, Node receiver) {
FlowSummaryImpl::Private::summaryCallbackRange(c, receiver)
}
-/** A call found in the program source (as opposed to a synthesised summary call). */
-class TExtractedDataFlowCall = TSpecialCall or TNormalCall;
-
/** A call that is taken into account by the global data flow computation. */
abstract class DataFlowCall extends TDataFlowCall {
/** Gets a textual representation of this element. */
abstract string toString();
- /** Get the callable to which this call goes, if such exists. */
+ /** Get the callable to which this call goes. */
abstract DataFlowCallable getCallable();
- /**
- * Gets the argument to this call that will be sent
- * to the `n`th parameter of the callable, if any.
- */
- abstract Node getArg(int n);
+ /** Gets the argument at position `apos`, if any. */
+ abstract ArgumentNode getArgument(ArgumentPosition apos);
/** Get the control flow node representing this call, if any. */
abstract ControlFlowNode getNode();
@@ -483,130 +150,10 @@ abstract class DataFlowCall extends TDataFlowCall {
}
/** A call found in the program source (as opposed to a synthesised call). */
-abstract class ExtractedDataFlowCall extends DataFlowCall, TExtractedDataFlowCall {
- final override Location getLocation() { result = this.getNode().getLocation() }
+abstract class ExtractedDataFlowCall extends DataFlowCall {
+ ExtractedDataFlowCall() { exists(this.getNode()) }
- abstract override DataFlowCallable getCallable();
-
- abstract override Node getArg(int n);
-
- abstract override ControlFlowNode getNode();
-}
-
-/** A call associated with a `CallNode`. */
-class NormalCall extends ExtractedDataFlowCall, TNormalCall {
- CallNode call;
-
- NormalCall() { this = TNormalCall(call) }
-
- override string toString() { result = call.toString() }
-
- abstract override Node getArg(int n);
-
- override CallNode getNode() { result = call }
-
- abstract override DataFlowCallable getCallable();
-
- override DataFlowCallable getEnclosingCallable() { result.getScope() = call.getNode().getScope() }
-}
-
-/**
- * A call to a function.
- * This excludes calls to bound methods, classes, and special methods.
- * Bound method calls and class calls insert an argument for the explicit
- * `self` parameter, and special method calls have special argument passing.
- */
-class FunctionCall extends NormalCall {
- DataFlowCallableValue callable;
-
- FunctionCall() {
- call = any(FunctionValue f).getAFunctionCall() and
- call = callable.getACall()
- }
-
- override Node getArg(int n) { result = getArg(call, TNoShift(), callable.getCallableValue(), n) }
-
- override DataFlowCallable getCallable() { result = callable }
-}
-
-/** A call to a lambda. */
-class LambdaCall extends NormalCall {
- DataFlowLambda callable;
-
- LambdaCall() {
- call = callable.getACall() and
- callable = TLambda(any(Function f))
- }
-
- override Node getArg(int n) { result = getArg(call, TNoShift(), callable.getCallableValue(), n) }
-
- override DataFlowCallable getCallable() { result = callable }
-}
-
-/**
- * Represents a call to a bound method call.
- * The node representing the instance is inserted as argument to the `self` parameter.
- */
-class MethodCall extends NormalCall {
- FunctionValue bm;
-
- MethodCall() { call = bm.getAMethodCall() }
-
- private CallableValue getCallableValue() { result = bm }
-
- override Node getArg(int n) {
- n > 0 and result = getArg(call, TShiftOneUp(), this.getCallableValue(), n)
- or
- n = 0 and result = TCfgNode(call.getFunction().(AttrNode).getObject())
- }
-
- override DataFlowCallable getCallable() { result = TCallableValue(this.getCallableValue()) }
-}
-
-/**
- * Represents a call to a class.
- * The pre-update node for the call is inserted as argument to the `self` parameter.
- * That makes the call node be the post-update node holding the value of the object
- * after the constructor has run.
- */
-class ClassCall extends NormalCall {
- ClassValue c;
-
- ClassCall() {
- not c.isAbsent() and
- call = c.getACall()
- }
-
- private CallableValue getCallableValue() { c.getScope().getInitMethod() = result.getScope() }
-
- override Node getArg(int n) {
- n > 0 and result = getArg(call, TShiftOneUp(), this.getCallableValue(), n)
- or
- n = 0 and result = TSyntheticPreUpdateNode(TCfgNode(call))
- }
-
- override DataFlowCallable getCallable() { result = TCallableValue(this.getCallableValue()) }
-}
-
-/** A call to a special method. */
-class SpecialCall extends ExtractedDataFlowCall, TSpecialCall {
- SpecialMethodCallNode special;
-
- SpecialCall() { this = TSpecialCall(special) }
-
- override string toString() { result = special.toString() }
-
- override Node getArg(int n) { result = TCfgNode(special.(SpecialMethod::Potential).getArg(n)) }
-
- override ControlFlowNode getNode() { result = special }
-
- override DataFlowCallable getCallable() {
- result = TCallableValue(special.getResolvedSpecialMethod())
- }
-
- override DataFlowCallable getEnclosingCallable() {
- result.getScope() = special.getNode().getScope()
- }
+ override Location getLocation() { result = this.getNode().getLocation() }
}
/**
@@ -617,27 +164,42 @@ class SpecialCall extends ExtractedDataFlowCall, TSpecialCall {
* We hope to lift this restriction in the future and include all potential calls to summaries
* in this class.
*/
-class LibraryCall extends NormalCall {
+class LibraryCall extends DataFlowCall {
LibraryCall() {
- // TODO: share this with `resolvedCall`
- not (
- call = any(DataFlowCallableValue cv).getACall()
- or
- call = any(DataFlowLambda l).getACall()
- or
- // TODO: this should be covered by `DataFlowCallableValue`, but a `ClassValue` is not a `CallableValue`.
- call = any(ClassValue c).getACall()
- )
+ // TODO(call-graph): implement this!
+ none()
}
- // TODO: Implement Python calling convention?
- override Node getArg(int n) { result = TCfgNode(call.getArg(n)) }
+ override string toString() {
+ // TODO(call-graph): implement this!
+ none()
+ }
// We cannot refer to a `LibraryCallable` here,
// as that could in turn refer to type tracking.
// This call will be tied to a `LibraryCallable` via
// `getViableCallabe` when the global data flow is assembled.
override DataFlowCallable getCallable() { none() }
+
+ override ArgumentNode getArgument(ArgumentPosition apos) {
+ // TODO(call-graph): implement this!
+ none()
+ }
+
+ override ControlFlowNode getNode() {
+ // TODO(call-graph): implement this!
+ none()
+ }
+
+ override DataFlowCallable getEnclosingCallable() {
+ // TODO(call-graph): implement this!
+ none()
+ }
+
+ override Location getLocation() {
+ // TODO(call-graph): implement this!
+ none()
+ }
}
/**
@@ -663,7 +225,7 @@ class SummaryCall extends DataFlowCall, TSummaryCall {
override DataFlowCallable getCallable() { none() }
- override Node getArg(int n) { none() }
+ override ArgumentNode getArgument(ArgumentPosition apos) { none() }
override ControlFlowNode getNode() { none() }
@@ -681,22 +243,22 @@ abstract class ParameterNodeImpl extends Node {
/**
* Holds if this node is the parameter of callable `c` at the
- * (zero-based) index `i`.
+ * position `ppos`.
*/
- abstract predicate isParameterOf(DataFlowCallable c, int i);
+ abstract predicate isParameterOf(DataFlowCallable c, ParameterPosition ppos);
}
/** A parameter for a library callable with a flow summary. */
class SummaryParameterNode extends ParameterNodeImpl, TSummaryParameterNode {
private FlowSummaryImpl::Public::SummarizedCallable sc;
- private int pos;
+ private ParameterPosition pos;
SummaryParameterNode() { this = TSummaryParameterNode(sc, pos) }
override Parameter getParameter() { none() }
- override predicate isParameterOf(DataFlowCallable c, int i) {
- sc = c.asLibraryCallable() and i = pos
+ override predicate isParameterOf(DataFlowCallable c, ParameterPosition ppos) {
+ sc = c.asLibraryCallable() and ppos = pos
}
override DataFlowCallable getEnclosingCallable() { result.asLibraryCallable() = sc }
diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll
index e120ddbedad..33a5558d232 100644
--- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll
@@ -78,7 +78,11 @@ module SyntheticPreUpdateNode {
* that is mapped to the `self` parameter. That way, constructor calls represent the value of the
* object after the constructor (currently only `__init__`) has run.
*/
- CfgNode objectCreationNode() { result.getNode() = any(ClassCall c).getNode() }
+ CfgNode objectCreationNode() {
+ // TODO(call-graph): implement this!
+ none()
+ // result.getNode().(CallNode) = any(ClassCall c).getNode()
+ }
}
import SyntheticPreUpdateNode
@@ -88,8 +92,6 @@ deprecated module syntheticPostUpdateNode = SyntheticPostUpdateNode;
/** A module collecting the different reasons for synthesising a post-update node. */
module SyntheticPostUpdateNode {
- private import semmle.python.SpecialMethods
-
/** A post-update node is synthesized for all nodes which satisfy `NeedsSyntheticPostUpdateNode`. */
class SyntheticPostUpdateNode extends PostUpdateNode, TSyntheticPostUpdateNode {
NeedsSyntheticPostUpdateNode pre;
@@ -137,29 +139,22 @@ module SyntheticPostUpdateNode {
* and should not have an extra node synthesised.
*/
Node argumentPreUpdateNode() {
- result = any(FunctionCall c).getArg(_)
- or
- result = any(LambdaCall c).getArg(_)
- or
- // Avoid argument 0 of method calls as those have read post-update nodes.
- exists(MethodCall c, int n | n > 0 | result = c.getArg(n))
- or
- result = any(SpecialCall c).getArg(_)
- or
- // Avoid argument 0 of class calls as those have non-synthetic post-update nodes.
- exists(ClassCall c, int n | n > 0 | result = c.getArg(n))
- or
- // any argument of any call that we have not been able to resolve
- exists(CallNode call | not resolvedCall(call) |
- result.(CfgNode).getNode() in [call.getArg(_), call.getArgByName(_)]
- )
- }
-
- /** Holds if `call` can be resolved as a normal call */
- private predicate resolvedCall(CallNode call) {
- call = any(DataFlowCallableValue cv).getACall()
- or
- call = any(DataFlowLambda l).getACall()
+ // TODO(call-graph): implement this!
+ none()
+ // result = any(FunctionCall c).getArg(_)
+ // or
+ // // Avoid argument 0 of method calls as those have read post-update nodes.
+ // exists(MethodCall c, int n | n > 0 | result = c.getArg(n))
+ // or
+ // result = any(SpecialCall c).getArg(_)
+ // or
+ // // Avoid argument 0 of class calls as those have non-synthetic post-update nodes.
+ // exists(ClassCall c, int n | n > 0 | result = c.getArg(n))
+ // or
+ // // any argument of any call that we have not been able to resolve
+ // exists(CallNode call | not call = any(DataFlowCall c).getNode() |
+ // result.(CfgNode).getNode() in [call.getArg(_), call.getArgByName(_)]
+ // )
}
/** Gets the pre-update node associated with a store. This is used for when an object might have its value changed after a store. */
@@ -274,13 +269,6 @@ module EssaFlow {
iterableUnpackingFlowStep(nodeFrom, nodeTo)
or
matchFlowStep(nodeFrom, nodeTo)
- or
- // Overflow keyword argument
- exists(CallNode call, CallableValue callable |
- call = callable.getACall() and
- nodeTo = TKwOverflowNode(call, callable) and
- nodeFrom.asCfgNode() = call.getNode().getKwargs().getAFlowNode()
- )
}
predicate useToNextUse(NameNode nodeFrom, NameNode nodeTo) {
@@ -521,10 +509,6 @@ predicate storeStep(Node nodeFrom, Content c, Node nodeTo) {
or
attributeStoreStep(nodeFrom, c, nodeTo)
or
- posOverflowStoreStep(nodeFrom, c, nodeTo)
- or
- kwOverflowStoreStep(nodeFrom, c, nodeTo)
- or
matchStoreStep(nodeFrom, c, nodeTo)
or
any(Orm::AdditionalOrmSteps es).storeStep(nodeFrom, c, nodeTo)
@@ -669,30 +653,6 @@ predicate attributeStoreStep(Node nodeFrom, AttributeContent c, PostUpdateNode n
)
}
-/**
- * Holds if `nodeFrom` flows into the synthesized positional overflow argument (`nodeTo`)
- * at the position indicated by `c`.
- */
-predicate posOverflowStoreStep(CfgNode nodeFrom, TupleElementContent c, Node nodeTo) {
- exists(CallNode call, CallableValue callable, int n |
- nodeFrom.asCfgNode() = getPositionalOverflowArg(call, callable, n) and
- nodeTo = TPosOverflowNode(call, callable) and
- c.getIndex() = n
- )
-}
-
-/**
- * Holds if `nodeFrom` flows into the synthesized keyword overflow argument (`nodeTo`)
- * at the key indicated by `c`.
- */
-predicate kwOverflowStoreStep(CfgNode nodeFrom, DictionaryElementContent c, Node nodeTo) {
- exists(CallNode call, CallableValue callable, string key |
- nodeFrom.asCfgNode() = getKeywordOverflowArg(call, callable, key) and
- nodeTo = TKwOverflowNode(call, callable) and
- c.getKey() = key
- )
-}
-
predicate defaultValueFlowStep(CfgNode nodeFrom, CfgNode nodeTo) {
exists(Function f, Parameter p, ParameterDefinition def |
// `getArgByName` supports, unlike `getAnArg`, keyword-only parameters
@@ -722,8 +682,6 @@ predicate readStep(Node nodeFrom, Content c, Node nodeTo) {
or
attributeReadStep(nodeFrom, c, nodeTo)
or
- kwUnpackReadStep(nodeFrom, c, nodeTo)
- or
FlowSummaryImpl::Private::Steps::summaryReadStep(nodeFrom, c, nodeTo)
}
@@ -814,38 +772,12 @@ predicate attributeReadStep(Node nodeFrom, AttributeContent c, AttrRead nodeTo)
nodeTo.accesses(nodeFrom, c.getAttribute())
}
-/**
- * Holds if `nodeFrom` is a dictionary argument being unpacked and `nodeTo` is the
- * synthesized unpacked argument with the name indicated by `c`.
- */
-predicate kwUnpackReadStep(CfgNode nodeFrom, DictionaryElementContent c, Node nodeTo) {
- exists(CallNode call, CallableValue callable, string name |
- nodeFrom.asCfgNode() = call.getNode().getKwargs().getAFlowNode() and
- nodeTo = TKwUnpackedNode(call, callable, name) and
- name = c.getKey()
- )
-}
-
-/**
- * Clear content at key `name` of the synthesized dictionary `TKwOverflowNode(call, callable)`,
- * whenever `call` unpacks `name`.
- */
-predicate kwOverflowClearStep(Node n, Content c) {
- exists(CallNode call, CallableValue callable, string name |
- call_unpacks(call, _, callable, name, _) and
- n = TKwOverflowNode(call, callable) and
- c.(DictionaryElementContent).getKey() = name
- )
-}
-
/**
* Holds if values stored inside content `c` are cleared at node `n`. For example,
* any value stored inside `f` is cleared at the pre-update node associated with `x`
* in `x.f = newValue`.
*/
predicate clearsContent(Node n, Content c) {
- kwOverflowClearStep(n, c)
- or
matchClearStep(n, c)
or
attributeClearStep(n, c)
@@ -912,17 +844,20 @@ class LambdaCallKind = Unit;
/** Holds if `creation` is an expression that creates a lambda of kind `kind` for `c`. */
predicate lambdaCreation(Node creation, LambdaCallKind kind, DataFlowCallable c) {
- // lambda
- kind = kind and
- creation.asExpr() = c.(DataFlowLambda).getDefinition()
- or
- // normal function
- exists(FunctionDef def |
- def.defines(creation.asVar().getSourceVariable()) and
- def.getDefinedFunction() = c.(DataFlowCallableValue).getCallableValue().getScope()
- )
- or
+ // TODO(call-graph): implement this!
+ //
+ // // lambda
+ // kind = kind and
+ // creation.asExpr() = c.(DataFlowLambda).getDefinition()
+ // or
+ // // normal function
+ // exists(FunctionDef def |
+ // def.defines(creation.asVar().getSourceVariable()) and
+ // def.getDefinedFunction() = c.(DataFlowCallableValue).getCallableValue().getScope()
+ // )
+ // or
// summarized function
+ exists(kind) and // avoid warning on unused 'kind'
exists(Call call |
creation.asExpr() = call.getAnArg() and
creation = c.(LibraryCallableValue).getACallback()
diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll
index 4a00d0aafc3..5eaff0815af 100644
--- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll
@@ -45,37 +45,6 @@ newtype TNode =
ImportStar::globalNameDefinedInModule(v.getId(), m)
)
} or
- /**
- * A node representing the overflow positional arguments to a call.
- * That is, `call` contains more positional arguments than there are
- * positional parameters in `callable`. The extra ones are passed as
- * a tuple to a starred parameter; this synthetic node represents that tuple.
- */
- TPosOverflowNode(CallNode call, CallableValue callable) {
- exists(getPositionalOverflowArg(call, callable, _))
- } or
- /**
- * A node representing the overflow keyword arguments to a call.
- * That is, `call` contains keyword arguments for keys that do not have
- * keyword parameters in `callable`. These extra ones are passed as
- * a dictionary to a doubly starred parameter; this synthetic node
- * represents that dictionary.
- */
- TKwOverflowNode(CallNode call, CallableValue callable) {
- exists(getKeywordOverflowArg(call, callable, _))
- or
- ArgumentPassing::connects(call, callable) and
- exists(call.getNode().getKwargs()) and
- callable.getScope().hasKwArg()
- } or
- /**
- * A node representing an unpacked element of a dictionary argument.
- * That is, `call` contains argument `**{"foo": bar}` which is passed
- * to parameter `foo` of `callable`.
- */
- TKwUnpackedNode(CallNode call, CallableValue callable, string name) {
- call_unpacks(call, _, callable, name, _)
- } or
/**
* A synthetic node representing that an iterable sequence flows to consumer.
*/
@@ -298,14 +267,12 @@ class ExtractedParameterNode extends ParameterNodeImpl, CfgNode {
//, LocalSourceNode {
ParameterDefinition def;
- ExtractedParameterNode() {
- node = def.getDefiningNode() and
- // Disregard parameters that we cannot resolve
- // TODO: Make this unnecessary
- exists(DataFlowCallable c | node = c.getParameter(_))
- }
+ ExtractedParameterNode() { node = def.getDefiningNode() }
- override predicate isParameterOf(DataFlowCallable c, int i) { node = c.getParameter(i) }
+ override predicate isParameterOf(DataFlowCallable c, ParameterPosition ppos) {
+ // TODO(call-graph): implement this!
+ none()
+ }
override DataFlowCallable getEnclosingCallable() { this.isParameterOf(result, _) }
@@ -329,14 +296,14 @@ abstract class ArgumentNode extends Node {
/** A data flow node that represents a call argument found in the source code. */
class ExtractedArgumentNode extends ArgumentNode {
- ExtractedArgumentNode() { this = any(ExtractedDataFlowCall c).getArg(_) }
+ ExtractedArgumentNode() { this = any(ExtractedDataFlowCall c).getArgument(_) }
final override predicate argumentOf(DataFlowCall call, ArgumentPosition pos) {
this.extractedArgumentOf(call, pos)
}
predicate extractedArgumentOf(ExtractedDataFlowCall call, ArgumentPosition pos) {
- this = call.getArg(pos)
+ this = call.getArgument(pos)
}
}
@@ -448,70 +415,6 @@ private predicate resolved_import_star_module(Module m, string name, Node n) {
)
}
-/**
- * The node holding the extra positional arguments to a call. This node is passed as a tuple
- * to the starred parameter of the callable.
- */
-class PosOverflowNode extends Node, TPosOverflowNode {
- CallNode call;
-
- PosOverflowNode() { this = TPosOverflowNode(call, _) }
-
- override string toString() { result = "PosOverflowNode for " + call.getNode().toString() }
-
- override DataFlowCallable getEnclosingCallable() {
- exists(Node node |
- node = TCfgNode(call) and
- result = node.getEnclosingCallable()
- )
- }
-
- override Location getLocation() { result = call.getLocation() }
-}
-
-/**
- * The node holding the extra keyword arguments to a call. This node is passed as a dictionary
- * to the doubly starred parameter of the callable.
- */
-class KwOverflowNode extends Node, TKwOverflowNode {
- CallNode call;
-
- KwOverflowNode() { this = TKwOverflowNode(call, _) }
-
- override string toString() { result = "KwOverflowNode for " + call.getNode().toString() }
-
- override DataFlowCallable getEnclosingCallable() {
- exists(Node node |
- node = TCfgNode(call) and
- result = node.getEnclosingCallable()
- )
- }
-
- override Location getLocation() { result = call.getLocation() }
-}
-
-/**
- * The node representing the synthetic argument of a call that is unpacked from a dictionary
- * argument.
- */
-class KwUnpackedNode extends Node, TKwUnpackedNode {
- CallNode call;
- string name;
-
- KwUnpackedNode() { this = TKwUnpackedNode(call, _, name) }
-
- override string toString() { result = "KwUnpacked " + name }
-
- override DataFlowCallable getEnclosingCallable() {
- exists(Node node |
- node = TCfgNode(call) and
- result = node.getEnclosingCallable()
- )
- }
-
- override Location getLocation() { result = call.getLocation() }
-}
-
/**
* A synthetic node representing an iterable sequence. Used for changing content type
* for instance from a `ListElement` to a `TupleElement`, especially if the content is
diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/FlowSummaryImplSpecific.qll b/python/ql/lib/semmle/python/dataflow/new/internal/FlowSummaryImplSpecific.qll
index 056ed02a874..5d950247369 100644
--- a/python/ql/lib/semmle/python/dataflow/new/internal/FlowSummaryImplSpecific.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/internal/FlowSummaryImplSpecific.qll
@@ -61,11 +61,11 @@ bindingset[c, rk]
DataFlowType getReturnType(SummarizedCallable c, ReturnKind rk) { any() }
/**
- * Gets the type of the `i`th parameter in a synthesized call that targets a
- * callback of type `t`.
+ * Gets the type of the parameter matching arguments at position `pos` in a
+ * synthesized call that targets a callback of type `t`.
*/
-bindingset[t, i]
-DataFlowType getCallbackParameterType(DataFlowType t, int i) { any() }
+bindingset[t, pos]
+DataFlowType getCallbackParameterType(DataFlowType t, ArgumentPosition pos) { any() }
/**
* Gets the return type of kind `rk` in a synthesized call that targets a
@@ -213,16 +213,20 @@ module ParsePositions {
/** Gets the argument position obtained by parsing `X` in `Parameter[X]`. */
ArgumentPosition parseParamBody(string s) {
- exists(int i |
- ParsePositions::isParsedParameterPosition(s, i) and
- result.isPositional(i)
- )
+ none()
+ // TODO(call-graph): implement this!
+ // exists(int i |
+ // ParsePositions::isParsedParameterPosition(s, i) and
+ // result.isPositional(i)
+ // )
}
/** Gets the parameter position obtained by parsing `X` in `Argument[X]`. */
ParameterPosition parseArgBody(string s) {
- exists(int i |
- ParsePositions::isParsedArgumentPosition(s, i) and
- result.isPositional(i)
- )
+ none()
+ // TODO(call-graph): implement this!
+ // exists(int i |
+ // ParsePositions::isParsedArgumentPosition(s, i) and
+ // result.isPositional(i)
+ // )
}
diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackerSpecific.qll b/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackerSpecific.qll
index 690216089e9..e9c97d86e8c 100644
--- a/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackerSpecific.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackerSpecific.qll
@@ -60,21 +60,21 @@ string getPossibleContentName() {
result = any(DataFlowPublic::AttrRef a).getAttributeName()
}
-/**
- * Gets a callable for the call where `nodeFrom` is used as the `i`'th argument.
- *
- * Helper predicate to avoid bad join order experienced in `callStep`.
- * This happened when `isParameterOf` was joined _before_ `getCallable`.
- */
-pragma[nomagic]
-private DataFlowPrivate::DataFlowCallable getCallableForArgument(
- DataFlowPublic::ExtractedArgumentNode nodeFrom, int i
-) {
- exists(DataFlowPrivate::ExtractedDataFlowCall call |
- nodeFrom.extractedArgumentOf(call, i) and
- result = call.getCallable()
- )
-}
+// /**
+// * Gets a callable for the call where `nodeFrom` is used as the `i`'th argument.
+// *
+// * Helper predicate to avoid bad join order experienced in `callStep`.
+// * This happened when `isParameterOf` was joined _before_ `getCallable`.
+// */
+// pragma[nomagic]
+// private DataFlowPrivate::DataFlowCallable getCallableForArgument(
+// DataFlowPublic::ExtractedArgumentNode nodeFrom, int i
+// ) {
+// exists(DataFlowPrivate::ExtractedDataFlowCall call |
+// nodeFrom.extractedArgumentOf(call, i) and
+// result = call.getCallable()
+// )
+// }
/**
* Holds if `nodeFrom` steps to `nodeTo` by being passed as a parameter in a call.
@@ -84,11 +84,13 @@ private DataFlowPrivate::DataFlowCallable getCallableForArgument(
* methods is done using API graphs (which uses type tracking).
*/
predicate callStep(DataFlowPublic::ArgumentNode nodeFrom, DataFlowPrivate::ParameterNodeImpl nodeTo) {
- // TODO: Support special methods?
- exists(DataFlowPrivate::DataFlowCallable callable, int i |
- callable = getCallableForArgument(nodeFrom, i) and
- nodeTo.isParameterOf(callable, i)
- )
+ // TODO(call-graph): implement this!
+ none()
+ // // TODO: Support special methods?
+ // exists(DataFlowPrivate::DataFlowCallable callable, int i |
+ // callable = getCallableForArgument(nodeFrom, i) and
+ // nodeTo.isParameterOf(callable, i)
+ // )
}
/** Holds if `nodeFrom` steps to `nodeTo` by being returned from a call. */
From a98554b6edf531cb538e5b6a54d041c2344436b8 Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Mon, 31 Oct 2022 14:14:05 +0100
Subject: [PATCH 004/415] Python: Accept tmp changes to flow summaries
After solving merge conflict
---
.../NormalTaintTrackingTest.expected | 12 ++++
.../dataflow/summaries/summaries.expected | 69 ++++---------------
2 files changed, 26 insertions(+), 55 deletions(-)
diff --git a/python/ql/test/experimental/dataflow/summaries/NormalTaintTrackingTest.expected b/python/ql/test/experimental/dataflow/summaries/NormalTaintTrackingTest.expected
index 3875da4e143..8e04ba142cb 100644
--- a/python/ql/test/experimental/dataflow/summaries/NormalTaintTrackingTest.expected
+++ b/python/ql/test/experimental/dataflow/summaries/NormalTaintTrackingTest.expected
@@ -1,2 +1,14 @@
missingAnnotationOnSink
+| summaries.py:33:6:33:12 | summaries.py:33 | ERROR, you should add `# $ MISSING: flow` annotation | tainted |
+| summaries.py:37:6:37:19 | summaries.py:37 | ERROR, you should add `# $ MISSING: flow` annotation | tainted_lambda |
+| summaries.py:52:6:52:22 | summaries.py:52 | ERROR, you should add `# $ MISSING: flow` annotation | tainted_mapped[0] |
+| summaries.py:58:6:58:31 | summaries.py:58 | ERROR, you should add `# $ MISSING: flow` annotation | tainted_mapped_explicit[0] |
+| summaries.py:61:6:61:30 | summaries.py:61 | ERROR, you should add `# $ MISSING: flow` annotation | tainted_mapped_summary[0] |
+| summaries.py:64:6:64:20 | summaries.py:64 | ERROR, you should add `# $ MISSING: flow` annotation | tainted_list[0] |
failures
+| summaries.py:33:16:33:49 | Comment # $ flow="SOURCE, l:-1 -> tainted" | Missing result:flow="SOURCE, l:-1 -> tainted" |
+| summaries.py:37:23:37:63 | Comment # $ flow="SOURCE, l:-1 -> tainted_lambda" | Missing result:flow="SOURCE, l:-1 -> tainted_lambda" |
+| summaries.py:52:26:52:69 | Comment # $ flow="SOURCE, l:-1 -> tainted_mapped[0]" | Missing result:flow="SOURCE, l:-1 -> tainted_mapped[0]" |
+| summaries.py:58:35:58:87 | Comment # $ flow="SOURCE, l:-1 -> tainted_mapped_explicit[0]" | Missing result:flow="SOURCE, l:-1 -> tainted_mapped_explicit[0]" |
+| summaries.py:61:34:61:85 | Comment # $ flow="SOURCE, l:-1 -> tainted_mapped_summary[0]" | Missing result:flow="SOURCE, l:-1 -> tainted_mapped_summary[0]" |
+| summaries.py:64:24:64:65 | Comment # $ flow="SOURCE, l:-1 -> tainted_list[0]" | Missing result:flow="SOURCE, l:-1 -> tainted_list[0]" |
diff --git a/python/ql/test/experimental/dataflow/summaries/summaries.expected b/python/ql/test/experimental/dataflow/summaries/summaries.expected
index 2d1190eb69c..8f5366ed6c2 100644
--- a/python/ql/test/experimental/dataflow/summaries/summaries.expected
+++ b/python/ql/test/experimental/dataflow/summaries/summaries.expected
@@ -1,70 +1,29 @@
edges
-| summaries.py:32:11:32:26 | ControlFlowNode for identity() | summaries.py:33:6:33:12 | ControlFlowNode for tainted |
-| summaries.py:32:20:32:25 | ControlFlowNode for SOURCE | summaries.py:32:11:32:26 | ControlFlowNode for identity() |
-| summaries.py:36:18:36:54 | ControlFlowNode for apply_lambda() | summaries.py:37:6:37:19 | ControlFlowNode for tainted_lambda |
-| summaries.py:36:48:36:53 | ControlFlowNode for SOURCE | summaries.py:36:18:36:54 | ControlFlowNode for apply_lambda() |
| summaries.py:44:25:44:32 | ControlFlowNode for List | summaries.py:45:6:45:20 | ControlFlowNode for Subscript |
| summaries.py:44:26:44:31 | ControlFlowNode for SOURCE | summaries.py:44:25:44:32 | ControlFlowNode for List |
-| summaries.py:51:18:51:46 | ControlFlowNode for list_map() [List element] | summaries.py:52:6:52:19 | ControlFlowNode for tainted_mapped [List element] |
-| summaries.py:51:38:51:45 | ControlFlowNode for List [List element] | summaries.py:51:18:51:46 | ControlFlowNode for list_map() [List element] |
-| summaries.py:51:39:51:44 | ControlFlowNode for SOURCE | summaries.py:51:38:51:45 | ControlFlowNode for List [List element] |
-| summaries.py:52:6:52:19 | ControlFlowNode for tainted_mapped [List element] | summaries.py:52:6:52:22 | ControlFlowNode for Subscript |
-| summaries.py:57:27:57:63 | ControlFlowNode for list_map() [List element] | summaries.py:58:6:58:28 | ControlFlowNode for tainted_mapped_explicit [List element] |
-| summaries.py:57:55:57:62 | ControlFlowNode for List [List element] | summaries.py:57:27:57:63 | ControlFlowNode for list_map() [List element] |
-| summaries.py:57:56:57:61 | ControlFlowNode for SOURCE | summaries.py:57:55:57:62 | ControlFlowNode for List [List element] |
-| summaries.py:58:6:58:28 | ControlFlowNode for tainted_mapped_explicit [List element] | summaries.py:58:6:58:31 | ControlFlowNode for Subscript |
-| summaries.py:60:26:60:53 | ControlFlowNode for list_map() [List element] | summaries.py:61:6:61:27 | ControlFlowNode for tainted_mapped_summary [List element] |
-| summaries.py:60:45:60:52 | ControlFlowNode for List [List element] | summaries.py:60:26:60:53 | ControlFlowNode for list_map() [List element] |
-| summaries.py:60:46:60:51 | ControlFlowNode for SOURCE | summaries.py:60:45:60:52 | ControlFlowNode for List [List element] |
-| summaries.py:61:6:61:27 | ControlFlowNode for tainted_mapped_summary [List element] | summaries.py:61:6:61:30 | ControlFlowNode for Subscript |
-| summaries.py:63:16:63:41 | ControlFlowNode for append_to_list() [List element] | summaries.py:64:6:64:17 | ControlFlowNode for tainted_list [List element] |
-| summaries.py:63:35:63:40 | ControlFlowNode for SOURCE | summaries.py:63:16:63:41 | ControlFlowNode for append_to_list() [List element] |
-| summaries.py:64:6:64:17 | ControlFlowNode for tainted_list [List element] | summaries.py:64:6:64:20 | ControlFlowNode for Subscript |
-| summaries.py:67:22:67:39 | ControlFlowNode for json_loads() [List element] | summaries.py:68:6:68:23 | ControlFlowNode for tainted_resultlist [List element] |
-| summaries.py:67:33:67:38 | ControlFlowNode for SOURCE | summaries.py:67:22:67:39 | ControlFlowNode for json_loads() [List element] |
| summaries.py:67:33:67:38 | ControlFlowNode for SOURCE | summaries.py:68:6:68:26 | ControlFlowNode for Subscript |
-| summaries.py:68:6:68:23 | ControlFlowNode for tainted_resultlist [List element] | summaries.py:68:6:68:26 | ControlFlowNode for Subscript |
nodes
-| summaries.py:32:11:32:26 | ControlFlowNode for identity() | semmle.label | ControlFlowNode for identity() |
-| summaries.py:32:20:32:25 | ControlFlowNode for SOURCE | semmle.label | ControlFlowNode for SOURCE |
-| summaries.py:33:6:33:12 | ControlFlowNode for tainted | semmle.label | ControlFlowNode for tainted |
-| summaries.py:36:18:36:54 | ControlFlowNode for apply_lambda() | semmle.label | ControlFlowNode for apply_lambda() |
-| summaries.py:36:48:36:53 | ControlFlowNode for SOURCE | semmle.label | ControlFlowNode for SOURCE |
-| summaries.py:37:6:37:19 | ControlFlowNode for tainted_lambda | semmle.label | ControlFlowNode for tainted_lambda |
| summaries.py:44:25:44:32 | ControlFlowNode for List | semmle.label | ControlFlowNode for List |
| summaries.py:44:26:44:31 | ControlFlowNode for SOURCE | semmle.label | ControlFlowNode for SOURCE |
| summaries.py:45:6:45:20 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
-| summaries.py:51:18:51:46 | ControlFlowNode for list_map() [List element] | semmle.label | ControlFlowNode for list_map() [List element] |
-| summaries.py:51:38:51:45 | ControlFlowNode for List [List element] | semmle.label | ControlFlowNode for List [List element] |
-| summaries.py:51:39:51:44 | ControlFlowNode for SOURCE | semmle.label | ControlFlowNode for SOURCE |
-| summaries.py:52:6:52:19 | ControlFlowNode for tainted_mapped [List element] | semmle.label | ControlFlowNode for tainted_mapped [List element] |
-| summaries.py:52:6:52:22 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
-| summaries.py:57:27:57:63 | ControlFlowNode for list_map() [List element] | semmle.label | ControlFlowNode for list_map() [List element] |
-| summaries.py:57:55:57:62 | ControlFlowNode for List [List element] | semmle.label | ControlFlowNode for List [List element] |
-| summaries.py:57:56:57:61 | ControlFlowNode for SOURCE | semmle.label | ControlFlowNode for SOURCE |
-| summaries.py:58:6:58:28 | ControlFlowNode for tainted_mapped_explicit [List element] | semmle.label | ControlFlowNode for tainted_mapped_explicit [List element] |
-| summaries.py:58:6:58:31 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
-| summaries.py:60:26:60:53 | ControlFlowNode for list_map() [List element] | semmle.label | ControlFlowNode for list_map() [List element] |
-| summaries.py:60:45:60:52 | ControlFlowNode for List [List element] | semmle.label | ControlFlowNode for List [List element] |
-| summaries.py:60:46:60:51 | ControlFlowNode for SOURCE | semmle.label | ControlFlowNode for SOURCE |
-| summaries.py:61:6:61:27 | ControlFlowNode for tainted_mapped_summary [List element] | semmle.label | ControlFlowNode for tainted_mapped_summary [List element] |
-| summaries.py:61:6:61:30 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
-| summaries.py:63:16:63:41 | ControlFlowNode for append_to_list() [List element] | semmle.label | ControlFlowNode for append_to_list() [List element] |
-| summaries.py:63:35:63:40 | ControlFlowNode for SOURCE | semmle.label | ControlFlowNode for SOURCE |
-| summaries.py:64:6:64:17 | ControlFlowNode for tainted_list [List element] | semmle.label | ControlFlowNode for tainted_list [List element] |
-| summaries.py:64:6:64:20 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
-| summaries.py:67:22:67:39 | ControlFlowNode for json_loads() [List element] | semmle.label | ControlFlowNode for json_loads() [List element] |
| summaries.py:67:33:67:38 | ControlFlowNode for SOURCE | semmle.label | ControlFlowNode for SOURCE |
-| summaries.py:68:6:68:23 | ControlFlowNode for tainted_resultlist [List element] | semmle.label | ControlFlowNode for tainted_resultlist [List element] |
| summaries.py:68:6:68:26 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
subpaths
invalidSpecComponent
+| append_to_list | Argument[0] | Argument[0] |
+| append_to_list | Argument[1] | Argument[1] |
+| apply_lambda | Argument[0].Parameter[0] | Argument[0] |
+| apply_lambda | Argument[0].Parameter[0] | Parameter[0] |
+| apply_lambda | Argument[0].ReturnValue | Argument[0] |
+| apply_lambda | Argument[1] | Argument[1] |
+| builtins.reversed | Argument[0].ListElement | Argument[0] |
+| identity | Argument[0] | Argument[0] |
+| json.loads | Argument[0] | Argument[0] |
+| list_map | Argument[0].Parameter[0] | Argument[0] |
+| list_map | Argument[0].Parameter[0] | Parameter[0] |
+| list_map | Argument[0].ReturnValue | Argument[0] |
+| list_map | Argument[1].ListElement | Argument[1] |
+| reversed | Argument[0].ListElement | Argument[0] |
#select
-| summaries.py:33:6:33:12 | ControlFlowNode for tainted | summaries.py:32:20:32:25 | ControlFlowNode for SOURCE | summaries.py:33:6:33:12 | ControlFlowNode for tainted | $@ | summaries.py:32:20:32:25 | ControlFlowNode for SOURCE | ControlFlowNode for SOURCE |
-| summaries.py:37:6:37:19 | ControlFlowNode for tainted_lambda | summaries.py:36:48:36:53 | ControlFlowNode for SOURCE | summaries.py:37:6:37:19 | ControlFlowNode for tainted_lambda | $@ | summaries.py:36:48:36:53 | ControlFlowNode for SOURCE | ControlFlowNode for SOURCE |
| summaries.py:45:6:45:20 | ControlFlowNode for Subscript | summaries.py:44:26:44:31 | ControlFlowNode for SOURCE | summaries.py:45:6:45:20 | ControlFlowNode for Subscript | $@ | summaries.py:44:26:44:31 | ControlFlowNode for SOURCE | ControlFlowNode for SOURCE |
-| summaries.py:52:6:52:22 | ControlFlowNode for Subscript | summaries.py:51:39:51:44 | ControlFlowNode for SOURCE | summaries.py:52:6:52:22 | ControlFlowNode for Subscript | $@ | summaries.py:51:39:51:44 | ControlFlowNode for SOURCE | ControlFlowNode for SOURCE |
-| summaries.py:58:6:58:31 | ControlFlowNode for Subscript | summaries.py:57:56:57:61 | ControlFlowNode for SOURCE | summaries.py:58:6:58:31 | ControlFlowNode for Subscript | $@ | summaries.py:57:56:57:61 | ControlFlowNode for SOURCE | ControlFlowNode for SOURCE |
-| summaries.py:61:6:61:30 | ControlFlowNode for Subscript | summaries.py:60:46:60:51 | ControlFlowNode for SOURCE | summaries.py:61:6:61:30 | ControlFlowNode for Subscript | $@ | summaries.py:60:46:60:51 | ControlFlowNode for SOURCE | ControlFlowNode for SOURCE |
-| summaries.py:64:6:64:20 | ControlFlowNode for Subscript | summaries.py:63:35:63:40 | ControlFlowNode for SOURCE | summaries.py:64:6:64:20 | ControlFlowNode for Subscript | $@ | summaries.py:63:35:63:40 | ControlFlowNode for SOURCE | ControlFlowNode for SOURCE |
| summaries.py:68:6:68:26 | ControlFlowNode for Subscript | summaries.py:67:33:67:38 | ControlFlowNode for SOURCE | summaries.py:68:6:68:26 | ControlFlowNode for Subscript | $@ | summaries.py:67:33:67:38 | ControlFlowNode for SOURCE | ControlFlowNode for SOURCE |
From c85ccb20038f38022f9b56271e9e97219370b7ea Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Mon, 30 May 2022 15:49:29 +0200
Subject: [PATCH 005/415] Python: Add call-graph compare meta-queries
Also changed the definition of a relevant call-target, so it's only what
is in the actual source code, which is what we want in the future! (so
what we're designing type-tracking to handle)
I also changed terminology from `callee` to `target`. It felt more
natural this way in my opinion.
---
.../analysis-quality/CallGraphQuality.qll | 131 ++++++++++++++----
.../PointsToResolvableCallsRelevantTarget.ql | 2 +-
.../src/meta/analysis-quality/TTCallGraph.ql | 17 +++
.../analysis-quality/TTCallGraphMissing.ql | 18 +++
.../meta/analysis-quality/TTCallGraphNew.ql | 18 +++
.../analysis-quality/TTCallGraphOverview.ql | 35 +++++
.../analysis-quality/TTCallGraphShared.ql | 18 +++
7 files changed, 211 insertions(+), 28 deletions(-)
create mode 100644 python/ql/src/meta/analysis-quality/TTCallGraph.ql
create mode 100644 python/ql/src/meta/analysis-quality/TTCallGraphMissing.ql
create mode 100644 python/ql/src/meta/analysis-quality/TTCallGraphNew.ql
create mode 100644 python/ql/src/meta/analysis-quality/TTCallGraphOverview.ql
create mode 100644 python/ql/src/meta/analysis-quality/TTCallGraphShared.ql
diff --git a/python/ql/src/meta/analysis-quality/CallGraphQuality.qll b/python/ql/src/meta/analysis-quality/CallGraphQuality.qll
index 46f384b89ad..a5d03063f54 100644
--- a/python/ql/src/meta/analysis-quality/CallGraphQuality.qll
+++ b/python/ql/src/meta/analysis-quality/CallGraphQuality.qll
@@ -1,16 +1,55 @@
/**
* Provides predicates for measuring the quality of the call graph, that is,
- * the number of calls that could be resolved to a callee.
+ * the number of calls that could be resolved to a target.
*/
import python
import meta.MetaMetrics
+newtype TTarget =
+ TFunction(Function func) or
+ TClass(Class cls)
+
+class Target extends TTarget {
+ /** Gets a textual representation of this element. */
+ abstract string toString();
+
+ /** Gets the location of this dataflow call. */
+ abstract Location getLocation();
+
+ /** Whether this target is relevant. */
+ predicate isRelevant() { exists(this.getLocation().getFile().getRelativePath()) }
+}
+
+class TargetFunction extends Target, TFunction {
+ Function func;
+
+ TargetFunction() { this = TFunction(func) }
+
+ override string toString() { result = func.toString() }
+
+ override Location getLocation() { result = func.getLocation() }
+
+ Function getFunction() { result = func }
+}
+
+class TargetClass extends Target, TClass {
+ Class cls;
+
+ TargetClass() { this = TClass(cls) }
+
+ override string toString() { result = cls.toString() }
+
+ override Location getLocation() { result = cls.getLocation() }
+
+ Class getClass() { result = cls }
+}
+
/**
* A call that is (possibly) relevant for analysis quality.
* See `IgnoredFile` for details on what is excluded.
*/
-class RelevantCall extends Call {
+class RelevantCall extends CallNode {
RelevantCall() { not this.getLocation().getFile() instanceof IgnoredFile }
}
@@ -18,12 +57,16 @@ class RelevantCall extends Call {
module PointsToBasedCallGraph {
/** A call that can be resolved by points-to. */
class ResolvableCall extends RelevantCall {
- Value callee;
+ Value targetValue;
- ResolvableCall() { callee.getACall() = this.getAFlowNode() }
+ ResolvableCall() { targetValue.getACall() = this }
- /** Gets a resolved callee of this call. */
- Value getCallee() { result = callee }
+ /** Gets a resolved target of this call. */
+ Target getTarget() {
+ result.(TargetFunction).getFunction() = targetValue.(CallableValue).getScope()
+ or
+ result.(TargetClass).getClass() = targetValue.(ClassValue).getScope()
+ }
}
/** A call that cannot be resolved by points-to. */
@@ -32,34 +75,68 @@ module PointsToBasedCallGraph {
}
/**
- * A call that can be resolved by points-to, where the resolved callee is relevant.
- * Relevant callees include:
- * - builtins
- * - standard library
+ * A call that can be resolved by points-to, where the resolved target is relevant.
+ * Relevant targets include:
* - source code of the project
*/
- class ResolvableCallRelevantCallee extends ResolvableCall {
- ResolvableCallRelevantCallee() {
- callee.isBuiltin()
- or
- exists(File file |
- file = callee.(CallableValue).getScope().getLocation().getFile()
- or
- file = callee.(ClassValue).getScope().getLocation().getFile()
- |
- file.inStdlib()
- or
- // part of the source code of the project
- exists(file.getRelativePath())
+ class ResolvableCallRelevantTarget extends ResolvableCall {
+ ResolvableCallRelevantTarget() {
+ exists(Target target | target = getTarget() |
+ exists(target.getLocation().getFile().getRelativePath())
)
}
}
/**
- * A call that can be resolved by points-to, where the resolved callee is not considered relevant.
- * See `ResolvableCallRelevantCallee` for the definition of relevance.
+ * A call that can be resolved by points-to, where the resolved target is not considered relevant.
+ * See `ResolvableCallRelevantTarget` for the definition of relevance.
*/
- class ResolvableCallIrrelevantCallee extends ResolvableCall {
- ResolvableCallIrrelevantCallee() { not this instanceof ResolvableCallRelevantCallee }
+ class ResolvableCallIrrelevantTarget extends ResolvableCall {
+ ResolvableCallIrrelevantTarget() { not this instanceof ResolvableCallRelevantTarget }
+ }
+}
+
+/** Provides classes for call-graph resolution by using type-tracking. */
+module TypeTrackingBasedCallGraph {
+ private import semmle.python.dataflow.new.internal.DataFlowDispatch as TT
+
+ /** A call that can be resolved by type-tracking. */
+ class ResolvableCall extends RelevantCall {
+ TT::DataFlowCallable dataflowTarget;
+
+ ResolvableCall() { dataflowTarget = TT::viableCallable(TT::TNormalCall(this)) }
+
+ /** Gets a resolved target of this call. */
+ Target getTarget() {
+ result.(TargetFunction).getFunction() = dataflowTarget.(TT::DataFlowFunction).getScope()
+ // TODO: class calls
+ // result.(TargetClass).getClass()
+ }
+ }
+
+ /** A call that cannot be resolved by type-tracking. */
+ class UnresolvableCall extends RelevantCall {
+ UnresolvableCall() { not this instanceof ResolvableCall }
+ }
+
+ /**
+ * A call that can be resolved by type-tracking, where the resolved callee is relevant.
+ * Relevant targets include:
+ * - source code of the project
+ */
+ class ResolvableCallRelevantTarget extends ResolvableCall {
+ ResolvableCallRelevantTarget() {
+ exists(Target target | target = getTarget() |
+ exists(target.getLocation().getFile().getRelativePath())
+ )
+ }
+ }
+
+ /**
+ * A call that can be resolved by type-tracking, where the resolved target is not considered relevant.
+ * See `ResolvableCallRelevantTarget` for the definition of relevance.
+ */
+ class ResolvableCallIrrelevantTarget extends ResolvableCall {
+ ResolvableCallIrrelevantTarget() { not this instanceof ResolvableCallRelevantTarget }
}
}
diff --git a/python/ql/src/meta/analysis-quality/PointsToResolvableCallsRelevantTarget.ql b/python/ql/src/meta/analysis-quality/PointsToResolvableCallsRelevantTarget.ql
index 0e9c47023c3..580d2d6b8a1 100644
--- a/python/ql/src/meta/analysis-quality/PointsToResolvableCallsRelevantTarget.ql
+++ b/python/ql/src/meta/analysis-quality/PointsToResolvableCallsRelevantTarget.ql
@@ -11,4 +11,4 @@
import python
import CallGraphQuality
-select projectRoot(), count(PointsToBasedCallGraph::ResolvableCallRelevantCallee call)
+select projectRoot(), count(PointsToBasedCallGraph::ResolvableCallRelevantTarget call)
diff --git a/python/ql/src/meta/analysis-quality/TTCallGraph.ql b/python/ql/src/meta/analysis-quality/TTCallGraph.ql
new file mode 100644
index 00000000000..67faca55893
--- /dev/null
+++ b/python/ql/src/meta/analysis-quality/TTCallGraph.ql
@@ -0,0 +1,17 @@
+/**
+ * @name New call graph edge from using type-tracking instead of points-to
+ * @kind problem
+ * @problem.severity recommendation
+ * @id py/meta/call-graph-new
+ * @tags meta
+ * @precision very-low
+ */
+
+import python
+import CallGraphQuality
+
+from CallNode call, Target target
+where
+ target.isRelevant() and
+ call.(TypeTrackingBasedCallGraph::ResolvableCall).getTarget() = target
+select call, "$@ to $@", call, "Call", target, target.toString()
diff --git a/python/ql/src/meta/analysis-quality/TTCallGraphMissing.ql b/python/ql/src/meta/analysis-quality/TTCallGraphMissing.ql
new file mode 100644
index 00000000000..bbf5b3553ef
--- /dev/null
+++ b/python/ql/src/meta/analysis-quality/TTCallGraphMissing.ql
@@ -0,0 +1,18 @@
+/**
+ * @name Missing call graph edge from using type-tracking instead of points-to
+ * @kind problem
+ * @problem.severity recommendation
+ * @id py/meta/call-graph-missing
+ * @tags meta
+ * @precision very-low
+ */
+
+import python
+import CallGraphQuality
+
+from CallNode call, Target target
+where
+ target.isRelevant() and
+ call.(PointsToBasedCallGraph::ResolvableCall).getTarget() = target and
+ not call.(TypeTrackingBasedCallGraph::ResolvableCall).getTarget() = target
+select call, "MISSING: $@ to $@", call, "Call", target, target.toString()
diff --git a/python/ql/src/meta/analysis-quality/TTCallGraphNew.ql b/python/ql/src/meta/analysis-quality/TTCallGraphNew.ql
new file mode 100644
index 00000000000..82a830265c6
--- /dev/null
+++ b/python/ql/src/meta/analysis-quality/TTCallGraphNew.ql
@@ -0,0 +1,18 @@
+/**
+ * @name New call graph edge from using type-tracking instead of points-to
+ * @kind problem
+ * @problem.severity recommendation
+ * @id py/meta/call-graph-new
+ * @tags meta
+ * @precision very-low
+ */
+
+import python
+import CallGraphQuality
+
+from CallNode call, Target target
+where
+ target.isRelevant() and
+ not call.(PointsToBasedCallGraph::ResolvableCall).getTarget() = target and
+ call.(TypeTrackingBasedCallGraph::ResolvableCall).getTarget() = target
+select call, "NEW: $@ to $@", call, "Call", target, target.toString()
diff --git a/python/ql/src/meta/analysis-quality/TTCallGraphOverview.ql b/python/ql/src/meta/analysis-quality/TTCallGraphOverview.ql
new file mode 100644
index 00000000000..5a789d1be90
--- /dev/null
+++ b/python/ql/src/meta/analysis-quality/TTCallGraphOverview.ql
@@ -0,0 +1,35 @@
+/**
+ * @name Call graph edge overview from using type-tracking instead of points-to
+ * @id py/meta/call-graph-overview
+ * @precision very-low
+ */
+
+import python
+import CallGraphQuality
+
+from string tag, int c
+where
+ tag = "SHARED" and
+ c =
+ count(CallNode call, Target target |
+ target.isRelevant() and
+ call.(PointsToBasedCallGraph::ResolvableCall).getTarget() = target and
+ call.(TypeTrackingBasedCallGraph::ResolvableCall).getTarget() = target
+ )
+ or
+ tag = "NEW" and
+ c =
+ count(CallNode call, Target target |
+ target.isRelevant() and
+ not call.(PointsToBasedCallGraph::ResolvableCall).getTarget() = target and
+ call.(TypeTrackingBasedCallGraph::ResolvableCall).getTarget() = target
+ )
+ or
+ tag = "MISSING" and
+ c =
+ count(CallNode call, Target target |
+ target.isRelevant() and
+ call.(PointsToBasedCallGraph::ResolvableCall).getTarget() = target and
+ not call.(TypeTrackingBasedCallGraph::ResolvableCall).getTarget() = target
+ )
+select tag, c
diff --git a/python/ql/src/meta/analysis-quality/TTCallGraphShared.ql b/python/ql/src/meta/analysis-quality/TTCallGraphShared.ql
new file mode 100644
index 00000000000..7a3bd794839
--- /dev/null
+++ b/python/ql/src/meta/analysis-quality/TTCallGraphShared.ql
@@ -0,0 +1,18 @@
+/**
+ * @name Shared call graph edge from using type-tracking instead of points-to
+ * @kind problem
+ * @problem.severity recommendation
+ * @id py/meta/call-graph-shared
+ * @tags meta
+ * @precision very-low
+ */
+
+import python
+import CallGraphQuality
+
+from CallNode call, Target target
+where
+ target.isRelevant() and
+ call.(PointsToBasedCallGraph::ResolvableCall).getTarget() = target and
+ call.(TypeTrackingBasedCallGraph::ResolvableCall).getTarget() = target
+select call, "SHARED: $@ to $@", call, "Call", target, target.toString()
From aa78a434070fed4bd9dfec8910c2a64abaf3a1a4 Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Tue, 31 May 2022 01:33:58 +0200
Subject: [PATCH 006/415] Python: Enable type-tracking in call-graph test
---
.../library-tests/CallGraph/InlineCallGraphTest.ql | 9 ++++++++-
1 file changed, 8 insertions(+), 1 deletion(-)
diff --git a/python/ql/test/experimental/library-tests/CallGraph/InlineCallGraphTest.ql b/python/ql/test/experimental/library-tests/CallGraph/InlineCallGraphTest.ql
index 50ad10bd191..cba9bc6e1d8 100644
--- a/python/ql/test/experimental/library-tests/CallGraph/InlineCallGraphTest.ql
+++ b/python/ql/test/experimental/library-tests/CallGraph/InlineCallGraphTest.ql
@@ -1,5 +1,6 @@
import python
import TestUtilities.InlineExpectationsTest
+private import semmle.python.dataflow.new.internal.DataFlowDispatch as TT
/** Holds when `call` is resolved to `callable` using points-to based call-graph. */
predicate pointsToCallEdge(CallNode call, Function callable) {
@@ -10,7 +11,13 @@ predicate pointsToCallEdge(CallNode call, Function callable) {
}
/** Holds when `call` is resolved to `callable` using type-tracking based call-graph. */
-predicate typeTrackerCallEdge(CallNode call, Function callable) { none() }
+predicate typeTrackerCallEdge(CallNode call, Function callable) {
+ exists(TT::DataFlowCallable dfCallable, TT::DataFlowCall dfCall |
+ dfCallable.getScope() = callable and
+ dfCall.getNode() = call and
+ dfCallable = TT::viableCallable(dfCall)
+ )
+}
class CallGraphTest extends InlineExpectationsTest {
CallGraphTest() { this = "CallGraphTest" }
From 9c275c177a5821df31da30cec0071fcaef843897 Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Tue, 31 May 2022 01:41:23 +0200
Subject: [PATCH 007/415] Python: Implement call-graph with type-trackers
This commit is a squash of 80 other commits. While developing, things
changed majorly 2-3 times, and it just wasn't feasible to go back and
write a really nice commit history.
My apologies for this HUGE commit.
Also, later on this is where I solved merge conflicts after flow-summaries
PR was merged.
For your amusement, I've included the original commit messages below.
Python: Add proper argument/parameter positions
Python: Handle normal function calls
Python: Reduce dataflow-consistency warnings
Previously there was a lot of failures for `uniqueEnclosingCallable` and
`argHasPostUpdate`
Removing the override of `getEnclosingCallable` in ParameterNode is
probably the most controversial... although from my point of view it's a
change for the better, since we're able to provide data-flow
ParameterNodes for more of the AST parameter nodes.
Python: Adjust `dataflow/calls` test
Python: Implement `isParameterOf`/`argumentOf`/`OutNode`
This makes the tests under `dataflow/basic` work as well :+1:
(initially I had these as separate commits, but it felt like it was too much noise)
Python: Accept fix for `dataflow/consistency`
Python: Changes to `coverage/argumentRoutingTest.ql`
Notice we gain a few new resolved arguments.
We loose out on stuff due to:
1. not handling `*` or `**` in either arguments/parameters (yet)
2. not handling special calls (yet)
Python: Small fix for `TestUtil/RoutingTest.qll`
Since the helper predicates do not depend on this, moved outside class.
Python: Accept changes to `dataflow/coverage/NormalDataflowTest.ql`
Most of this is due to:
- not handling any kinds of methods yet
- not handling `*` or `**`
Python: Small investigation of `test_deep_callgraph`
Python: Accept changes to `coverage/localFlow.ql`
I don't fully understand why the .expected file changed.
Since we still have the desired flow, I'm not going to worry too much
about it.
with this commit, the `dataflow/coverage` tests passes :+1:
Python: Minor doc update
Python: Add staticmethod/classmethod to `dataflow/calls`
Python: Handle method calls on class instances
without trying to deal with any class inheritance, or
staticmethod/classmethod at all.
Notice that with this change, we only have a DataFlowCall for the calls
that we can actually resolve. I'm not 100% sure if we need to add a
`UnresolvedCall` subclass of `DataFlowCall` for MaD in the future, but
it should be easy to do.
I'm still unsure about the value of `classesCallGraph`, but have just
accepted the changes.
Python: Handle direct method calls `C.foo(C, arg0)`
Python: Handle `@staticmethod`
Python: Handle class method calls... but the code is shit
WIP todo
Rewrite method calls to be better
also fixed a problem with `self` being an argument to the `x.staticmethod()` call :|
Python: Add subclass tests
Python: Split `class_advanced` test
Python: Rewrite call-graph tests to be inline expectation (1/2)
This adds inline expectations, next commit will remove old annotations
code... but I thought it would be easier to review like this.
Minor fixup
Python: Add simple subclass support
Python: more precise subclass lookup
Still not 100% precise.. but it's better
New ambiguous
Python: Add test for `self.m()` and `cls.m()` calls
Python: Handle `self.m()` and `cls.m()` calls
Python: Add tests for `__init__` and `__new__`
Python: Handle class calls
Python: Fix `self` argument passing for class calls
Now field-flow tests also pass :muscle: (although the crosstalk
fieldflow test changes were due to this specific commit)
I also copied much of the setup for pre/post update nodes from Ruby,
specifically having the abstract `PostUpdateNodeImpl` in DataFlowPrivate
seemed like a nice change.
Same for the setup with `TNode` definition having the specification
directly in the body, instead of a `NeedsSyntheticPostUpdateNode` class.
Python: Add new crosstalk test WIP
Maybe needs a bit of refactoring, and to see how it all behaves with points-to
Python: Add `super()` call-graph tests
Python: Refactor MethodCall char-pred
In anticipation of supporting `super(MyClass, self).foo()`, where the
`self` argument doesn't come from an AttrNode, but from the second
argument to super.
Without `pragma[inline]` the optimizer found a terrible join-order --
this won't guarantee a good join-order for the future, but for now it
was just so simple and could let me move on with life.
Python: Add basic `super()` support
I debated a little (with myself) whether I should really do
`superTracker`, but I thought "why not" and just rolled with it. I did
not confirm whether it was actually needed anywhere, that is if anyone
does `ref = super; ref().foo()` -- although I certainly doubt it's very
wide-spread.
Python: InlineCallGraphTest: Allow non-unique callable name in different files
Python: more MRO tests
Python: Add MRO approximation for `super()`
Although it's not 100% accurate, it seems to be on level with the one in
points-to.
Python: Remove some spurious targets for direct calls
removal of TODO from refactoring
remove TODOs class call support
Python: Add contrived subclass call example
Python: Remove more spurious call targets
NOTE: I initially forgot to use
`findFunctionAccordingToMroKnownStartingClass` instead of
`findFunctionAccordingToMro` for __init__ and __new__, and since I did
make that mistake myself, I wanted to add something to the test to
highlight this fact, and make it viewable by PR reviewer... this will be
fixed in the next commit.
Python: Proper fix for spurious __init__ targets
Python: Add call-graph example of class decorator
Python: Support decorated classes in new call-graph
Python: Add call-graph tests for `type(obj).meth()`
Python: support `type(obj).meth()`
Python: Add test for callable defined in function
Python: Add test for callable as argument
Current'y we don't find these with type-tracking, which is super
mysterious. I did check that we have proper flow from the arguments to
the parameters.
Python: Found problem for callable as argument :| MAJOR WIP
WIP commit
IT WORKS AGAIN (but terrible performance)
remove pragma[inline]
remove oops
Fix performance problem
I tried to optimize it even further, but I didn't end up achieving anything :|
Fix call-graph comparison
add comparison version with easy lookup
incomplete missing call-graph tests
unhandled tests
trying to replicate missing call-edge due to missing imports ... but it's hard
also seems to be problems with the inline-expectation-value that I used, seems like it has both missing/unexpected results with same value
Python: Add import-problem test
Python: Add shadowing problem
some cleanup of rewrite fix
a little more cleanup
Add consistency queries to call-graph tests
Python: Add post-update nodes for `self` in implicit `super()` uses
But we do need to discuss whether this is the right approach :O
Fix for field-flow tests
This came from more precise argument passing
Fixed results in type-tracking
Comes from better argument passing with super() and handling of
functions with decorators
fix of inline call graph tests
Fixup call annotation test
Many minor cleanups/fixes
NewNormalCall -> NormalCall
Python: Major restructuring + qldoc writing
Python: Accept changes from pre/post update node .toString changes
Python: Reduce `super` complexity !! WIP !!
Python: Only pass self-reference if in same enclosing-callable
Python: Add call-graph test with nested class
This was inspired by the ImpliesDataflow test that showed missing flow
for q_super, but at least for the call-graph, I'm not able to reproduce
this missing result :|
Python: Restrict `super()` to function defined directly on class
Python: Accept fixes to ImpliesDataflow
Python: Expand field-flow crosstalk tests
---
.../new/internal/DataFlowDispatch.qll | 915 +++++++++++++++++-
.../dataflow/new/internal/DataFlowPrivate.qll | 174 +---
.../dataflow/new/internal/DataFlowPublic.qll | 70 +-
.../new/internal/TypeTrackerSpecific.qll | 35 +-
.../analysis-quality/CallGraphQuality.qll | 23 +-
.../TTCallGraphNewAmbiguous.ql | 19 +
.../dataflow/TestUtil/RoutingTest.qll | 41 +-
.../dataflow/basic/callGraphSinks.expected | 1 -
.../dataflow/basic/callGraphSources.expected | 1 -
.../dataflow/basic/global.expected | 1 -
.../dataflow/basic/globalStep.expected | 1 -
.../dataflow/basic/local.expected | 8 +-
.../dataflow/basic/localStep.expected | 1 -
.../dataflow/basic/sinks.expected | 7 +-
.../dataflow/basic/sources.expected | 7 +-
.../callgraph_crosstalk/Arguments.expected | 13 +
.../dataflow/callgraph_crosstalk/Arguments.ql | 9 +
.../dataflow-consistency.expected | 19 +
.../dataflow-consistency.ql | 1 +
.../dataflow/callgraph_crosstalk/options | 1 +
.../dataflow/callgraph_crosstalk/test.py | 70 ++
.../dataflow/calls/DataFlowCallTest.ql | 18 +-
.../test/experimental/dataflow/calls/test.py | 56 +-
.../consistency/modeling-consistency.expected | 1 -
.../dataflow/coverage/argumentPassing.py | 16 +-
.../experimental/dataflow/coverage/classes.py | 64 +-
.../dataflow/coverage/datamodel.py | 47 +-
.../dataflow/coverage/localFlow.expected | 6 -
.../dataflow/coverage/localFlow.ql | 2 +-
.../experimental/dataflow/coverage/test.py | 64 +-
.../experimental/dataflow/fieldflow/test.py | 49 +-
.../dataflow/typetracking/test.py | 10 +-
.../InlineCallGraphTest.expected | 1 -
.../CallGraph-implicit-init/example.py | 2 +-
.../InlineCallGraphTest.expected | 5 +
.../InlineCallGraphTest.qlref | 1 +
.../library-tests/CallGraph-imports/README.md | 5 +
.../library-tests/CallGraph-imports/options | 1 +
.../CallGraph-imports/pkg/__init__.py | 0
.../pkg/alias_only_direct.py | 1 +
.../CallGraph-imports/pkg/alias_problem.py | 2 +
.../pkg/alias_problem_fixed.py | 3 +
.../CallGraph-imports/pkg/alias_star.py | 2 +
.../CallGraph-imports/pkg/func_def.py | 2 +
.../CallGraph-imports/pkg/other.py | 2 +
.../CallGraph-imports/pkg/use.py | 33 +
.../CallGraph/InlineCallGraphTest.expected | 53 +-
.../CallGraph/InlineCallGraphTest.ql | 45 +-
.../CallGraph/code/aliased_import.py | 1 +
.../CallGraph/code/bound_method_arg.py | 16 +
.../CallGraph/code/callable_as_argument.py | 55 ++
.../CallGraph/code/class_advanced.py | 40 -
.../CallGraph/code/class_attr_assign.py | 30 +
.../CallGraph/code/class_construction.py | 66 ++
.../CallGraph/code/class_decorator.py | 34 +
.../CallGraph/code/class_more_mro.py | 35 +
.../CallGraph/code/class_more_mro2.py | 22 +
.../CallGraph/code/class_properties.py | 43 +
.../CallGraph/code/class_simple.py | 29 -
.../CallGraph/code/class_special_methods.py | 29 +
.../CallGraph/code/class_subclass.py | 178 ++++
.../CallGraph/code/class_subclass2.py | 38 +
.../CallGraph/code/class_super.py | 108 +++
.../CallGraph/code/conditional_in_argument.py | 36 +
.../CallGraph/code/def_in_function.py | 24 +
.../code/func_defined_outside_class.py | 43 +
.../CallGraph/code/nested_class.py | 87 ++
.../CallGraph/code/relative_import.py | 7 +
.../CallGraph/code/runtime_decision.py | 4 +-
.../library-tests/CallGraph/code/shadowing.py | 22 +
.../library-tests/CallGraph/code/simple.py | 8 +-
.../CallGraph/code/through_content.py | 6 +
.../code/type_tracking_limitation.py | 8 +
.../code/underscore_prefix_func_name.py | 8 +-
.../CallGraph/dataflow-consistency.expected | 19 +
.../CallGraph/dataflow-consistency.ql | 1 +
.../PointsTo/new/ImpliesDataflow.expected | 5 -
.../django-orm/ReflectedXss.expected | 12 +-
python/ql/test/library-tests/fuck/options | 1 +
python/ql/test/library-tests/fuck/test.py | 17 +
.../ql/test/library-tests/fuck/wat.expected | 1 +
81 files changed, 2494 insertions(+), 447 deletions(-)
create mode 100644 python/ql/src/meta/analysis-quality/TTCallGraphNewAmbiguous.ql
create mode 100644 python/ql/test/experimental/dataflow/callgraph_crosstalk/Arguments.expected
create mode 100644 python/ql/test/experimental/dataflow/callgraph_crosstalk/Arguments.ql
create mode 100644 python/ql/test/experimental/dataflow/callgraph_crosstalk/dataflow-consistency.expected
create mode 100644 python/ql/test/experimental/dataflow/callgraph_crosstalk/dataflow-consistency.ql
create mode 100644 python/ql/test/experimental/dataflow/callgraph_crosstalk/options
create mode 100644 python/ql/test/experimental/dataflow/callgraph_crosstalk/test.py
create mode 100644 python/ql/test/experimental/library-tests/CallGraph-imports/InlineCallGraphTest.expected
create mode 100644 python/ql/test/experimental/library-tests/CallGraph-imports/InlineCallGraphTest.qlref
create mode 100644 python/ql/test/experimental/library-tests/CallGraph-imports/README.md
create mode 100644 python/ql/test/experimental/library-tests/CallGraph-imports/options
create mode 100644 python/ql/test/experimental/library-tests/CallGraph-imports/pkg/__init__.py
create mode 100644 python/ql/test/experimental/library-tests/CallGraph-imports/pkg/alias_only_direct.py
create mode 100644 python/ql/test/experimental/library-tests/CallGraph-imports/pkg/alias_problem.py
create mode 100644 python/ql/test/experimental/library-tests/CallGraph-imports/pkg/alias_problem_fixed.py
create mode 100644 python/ql/test/experimental/library-tests/CallGraph-imports/pkg/alias_star.py
create mode 100644 python/ql/test/experimental/library-tests/CallGraph-imports/pkg/func_def.py
create mode 100644 python/ql/test/experimental/library-tests/CallGraph-imports/pkg/other.py
create mode 100644 python/ql/test/experimental/library-tests/CallGraph-imports/pkg/use.py
create mode 100644 python/ql/test/experimental/library-tests/CallGraph/code/aliased_import.py
create mode 100644 python/ql/test/experimental/library-tests/CallGraph/code/bound_method_arg.py
create mode 100644 python/ql/test/experimental/library-tests/CallGraph/code/callable_as_argument.py
delete mode 100644 python/ql/test/experimental/library-tests/CallGraph/code/class_advanced.py
create mode 100644 python/ql/test/experimental/library-tests/CallGraph/code/class_attr_assign.py
create mode 100644 python/ql/test/experimental/library-tests/CallGraph/code/class_construction.py
create mode 100644 python/ql/test/experimental/library-tests/CallGraph/code/class_decorator.py
create mode 100644 python/ql/test/experimental/library-tests/CallGraph/code/class_more_mro.py
create mode 100644 python/ql/test/experimental/library-tests/CallGraph/code/class_more_mro2.py
create mode 100644 python/ql/test/experimental/library-tests/CallGraph/code/class_properties.py
delete mode 100644 python/ql/test/experimental/library-tests/CallGraph/code/class_simple.py
create mode 100644 python/ql/test/experimental/library-tests/CallGraph/code/class_special_methods.py
create mode 100644 python/ql/test/experimental/library-tests/CallGraph/code/class_subclass.py
create mode 100644 python/ql/test/experimental/library-tests/CallGraph/code/class_subclass2.py
create mode 100644 python/ql/test/experimental/library-tests/CallGraph/code/class_super.py
create mode 100644 python/ql/test/experimental/library-tests/CallGraph/code/conditional_in_argument.py
create mode 100644 python/ql/test/experimental/library-tests/CallGraph/code/def_in_function.py
create mode 100644 python/ql/test/experimental/library-tests/CallGraph/code/func_defined_outside_class.py
create mode 100644 python/ql/test/experimental/library-tests/CallGraph/code/nested_class.py
create mode 100644 python/ql/test/experimental/library-tests/CallGraph/code/relative_import.py
create mode 100644 python/ql/test/experimental/library-tests/CallGraph/code/shadowing.py
create mode 100644 python/ql/test/experimental/library-tests/CallGraph/code/through_content.py
create mode 100644 python/ql/test/experimental/library-tests/CallGraph/code/type_tracking_limitation.py
create mode 100644 python/ql/test/experimental/library-tests/CallGraph/dataflow-consistency.expected
create mode 100644 python/ql/test/experimental/library-tests/CallGraph/dataflow-consistency.ql
create mode 100644 python/ql/test/library-tests/fuck/options
create mode 100644 python/ql/test/library-tests/fuck/test.py
create mode 100644 python/ql/test/library-tests/fuck/wat.expected
diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
index 90c8010739d..37841765030 100644
--- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
@@ -3,6 +3,18 @@
*
* TypeTracker based call-graph.
*
+ * The overall scheme for resolving calls, is to notice that Python has different kinds
+ * of callables, and resolve those with different strategies. Currently we handle these
+ * completely separately:
+ * 1. plain functions (and lambdas)
+ * 2. methods on classes
+ * 3. class instantiation
+ *
+ * So we have type-trackers for each of the 3 categories above, with some considerable
+ * effort to handle different kinds of methods on classes (staticmethod, classmethod,
+ * normal), and resolving methods correctly in regards to MRO.
+ *
+ *
* A goal of this library is to support modeling calls that happens by third-party
* libraries. For example `call_later(func, arg0, arg1, foo=val)`, and the fact that the
* library might inject it's own arguments, for example a context that will always be
@@ -11,29 +23,104 @@
* additional data-flow steps for the arguments/parameters. This means we cannot have
* any special logic that requires an AST call to be made before we care to figure out
* what callable this call might end up targeting.
+ *
+ * Specifically this means that we cannot use type-backtrackes from the function of a
+ * `CallNode`, since there is no `CallNode` to backtrack from for `func` in the example
+ * above.
+ *
+ * Note: This hasn't been 100% realized yet, so we don't currently expose a predicate to
+ * ask what targets any data-flow node has. But it's still the plan to do this!
*/
private import python
private import DataFlowPublic
+private import DataFlowPrivate
private import FlowSummaryImpl as FlowSummaryImpl
+newtype TParameterPosition =
+ /** Used for `self` in methods, and `cls` in classmethods. */
+ TSelfParameterPosition() or
+ TPositionalParameterPosition(int pos) { pos = any(Parameter p).getPosition() } or
+ TKeywordParameterPosition(string name) { name = any(Parameter p).getName() }
+
/** A parameter position. */
-class ParameterPosition extends Unit {
- // TODO(call-graph): implement this!
+class ParameterPosition extends TParameterPosition {
+ /** Holds if this position represents a `self`/`cls` parameter. */
+ predicate isSelf() { this = TSelfParameterPosition() }
+
+ /** Holds if this position represents a positional parameter at (0-based) `index`. */
+ predicate isPositional(int index) { this = TPositionalParameterPosition(index) }
+
+ /** Holds if this position represents a keyword parameter named `name`. */
+ predicate isKeyword(string name) { this = TKeywordParameterPosition(name) }
+
+ /** Gets a textual representation of this element. */
+ string toString() {
+ this.isSelf() and result = "self"
+ or
+ exists(int index | this.isPositional(index) and result = "position " + index)
+ or
+ exists(string name | this.isKeyword(name) and result = "keyword " + name)
+ }
}
+newtype TArgumentPosition =
+ /** Used for `self` in methods, and `cls` in classmethods. */
+ TSelfArgumentPosition() or
+ TPositionalArgumentPosition(int pos) { exists(any(CallNode c).getArg(pos)) } or
+ TKeywordArgumentPosition(string name) { exists(any(CallNode c).getArgByName(name)) }
+
/** An argument position. */
-abstract class ArgumentPosition extends Unit {
- // TODO(call-graph): implement this!
+class ArgumentPosition extends TArgumentPosition {
+ /** Holds if this position represents a `self`/`cls` argument. */
+ predicate isSelf() { this = TSelfArgumentPosition() }
+
+ /** Holds if this position represents a positional argument at (0-based) `index`. */
+ predicate isPositional(int index) { this = TPositionalArgumentPosition(index) }
+
+ /** Holds if this position represents a keyword argument named `name`. */
+ predicate isKeyword(string name) { this = TKeywordArgumentPosition(name) }
+
+ /** Gets a textual representation of this element. */
+ string toString() {
+ this.isSelf() and result = "self"
+ or
+ exists(int pos | this.isPositional(pos) and result = "position " + pos)
+ or
+ exists(string name | this.isKeyword(name) and result = "keyword " + name)
+ }
}
/** Holds if arguments at position `apos` match parameters at position `ppos`. */
pragma[inline]
predicate parameterMatch(ParameterPosition ppos, ArgumentPosition apos) {
- // TODO(call-graph): implement this!
- none()
+ ppos.isSelf() and apos.isSelf()
+ or
+ exists(int index | ppos.isPositional(index) and apos.isPositional(index))
+ or
+ exists(string name | ppos.isKeyword(name) and apos.isKeyword(name))
}
+// =============================================================================
+// Helper predicates
+// =============================================================================
+/** Holds if the function has a `staticmethod` decorator. */
+predicate hasStaticmethodDecorator(Function func) {
+ exists(NameNode id | id.getId() = "staticmethod" and id.isGlobal() |
+ func.getADecorator() = id.getNode()
+ )
+}
+
+/** Holds if the function has a `classmethod` decorator. */
+predicate hasClassmethodDecorator(Function func) {
+ exists(NameNode id | id.getId() = "classmethod" and id.isGlobal() |
+ func.getADecorator() = id.getNode()
+ )
+}
+
+// =============================================================================
+// Callables
+// =============================================================================
/** A callable defined in library code, identified by a unique string. */
abstract class LibraryCallable extends string {
bindingset[this]
@@ -47,8 +134,8 @@ abstract class LibraryCallable extends string {
}
newtype TDataFlowCallable =
- // TODO(call-graph): implement this!
- /** For enclosing `ModuleVariableNode`s -- don't actually have calls. */
+ TFunction(Function func) or
+ /** see QLDoc for `DataFlowModuleScope` for why we need this. */
TModule(Module m) or
TLibraryCallable(LibraryCallable callable)
@@ -57,6 +144,9 @@ abstract class DataFlowCallable extends TDataFlowCallable {
/** Gets a textual representation of this element. */
abstract string toString();
+ /** Gets qualified name for this callable, if any. */
+ abstract string getQualifiedName();
+
/** Gets the scope of this callable */
abstract Scope getScope();
@@ -70,6 +160,71 @@ abstract class DataFlowCallable extends TDataFlowCallable {
abstract Location getLocation();
}
+/** A callable function. */
+abstract class DataFlowFunction extends DataFlowCallable, TFunction {
+ Function func;
+
+ DataFlowFunction() { this = TFunction(func) }
+
+ override string toString() { result = func.toString() }
+
+ override string getQualifiedName() { result = func.getQualifiedName() }
+
+ override Function getScope() { result = func }
+
+ override Location getLocation() { result = func.getLocation() }
+
+ /** Gets the positional parameter offset, to take into account self/cls parameters. */
+ int positionalOffset() { result = 0 }
+
+ override ParameterNode getParameter(ParameterPosition ppos) {
+ exists(int index | ppos.isPositional(index) |
+ result.getParameter() = func.getArg(index + this.positionalOffset())
+ )
+ or
+ exists(string name | ppos.isKeyword(name) | result.getParameter() = func.getArgByName(name))
+ }
+}
+
+/** A plain (non-method) function. */
+class DataFlowPlainFunction extends DataFlowFunction {
+ DataFlowPlainFunction() { not this instanceof DataFlowMethod }
+}
+
+/** A method, except staticmethods. */
+class DataFlowMethod extends DataFlowFunction {
+ Class cls;
+
+ DataFlowMethod() { cls.getAMethod() = func }
+
+ /** Gets the class this function is a method of. */
+ Class getClass() { result = cls }
+
+ override int positionalOffset() { result = 1 }
+
+ override ParameterNode getParameter(ParameterPosition ppos) {
+ ppos.isSelf() and result.getParameter() = func.getArg(0)
+ or
+ result = super.getParameter(ppos)
+ }
+}
+
+/** A classmethod. */
+class DataFlowClassmethod extends DataFlowMethod {
+ DataFlowClassmethod() { hasClassmethodDecorator(func) }
+}
+
+/** A staticmethod. */
+class DataFlowStaticmethod extends DataFlowMethod, DataFlowFunction {
+ DataFlowStaticmethod() { hasStaticmethodDecorator(func) }
+
+ override int positionalOffset() { result = 0 }
+
+ override ParameterNode getParameter(ParameterPosition ppos) {
+ result = DataFlowFunction.super.getParameter(ppos)
+ }
+}
+
/**
* A module. This is not actually a callable, but we need this so a
* `ModuleVariableNode` have an enclosing callable.
@@ -81,6 +236,8 @@ class DataFlowModuleScope extends DataFlowCallable, TModule {
override string toString() { result = mod.toString() }
+ override string getQualifiedName() { result = mod.getName() }
+
override Module getScope() { result = mod }
override Location getLocation() { result = mod.getLocation() }
@@ -95,6 +252,8 @@ class LibraryCallableValue extends DataFlowCallable, TLibraryCallable {
override string toString() { result = callable.toString() }
+ override string getQualifiedName() { result = callable.toString() }
+
/** Gets a data-flow node, where this library callable is used as a call-back. */
ArgumentNode getACallback() { result = callable.getACallback() }
@@ -107,9 +266,702 @@ class LibraryCallableValue extends DataFlowCallable, TLibraryCallable {
override Location getLocation() { none() }
}
+// =============================================================================
+// Type trackers used to resolve calls.
+// =============================================================================
+/** Gets a call to `type`. */
+private CallCfgNode getTypeCall() {
+ exists(NameNode id | id.getId() = "type" and id.isGlobal() |
+ result.getFunction().asCfgNode() = id
+ )
+}
+
+/** Gets a call to `super`. */
+private CallCfgNode getSuperCall() {
+ // While it is possible to reference super and call it later, it's almost never done in
+ // practice. From looking at top 1000 projects, there were a few uses around mocking (see
+ // link below), but otherwise only 2 edgecases. Overall it seems ok to ignore this complexity.
+ //
+ // https://github.com/python/cpython/blob/18b1782192f85bd26db89f5bc850f8bee4247c1a/Lib/unittest/mock.py#L48-L50
+ exists(NameNode id | id.getId() = "super" and id.isGlobal() |
+ result.getFunction().asCfgNode() = id
+ )
+}
+
+/**
+ * Gets a reference to the function `func`.
+ */
+private TypeTrackingNode functionTracker(TypeTracker t, Function func) {
+ t.start() and
+ (
+ result.asExpr() = func.getDefinition()
+ or
+ // when a function is decorated, it's the result of the (last) decorator call that
+ // is used
+ result.asExpr() = func.getDefinition().(FunctionExpr).getADecoratorCall()
+ )
+ or
+ exists(TypeTracker t2 | result = functionTracker(t2, func).track(t2, t))
+}
+
+/**
+ * Gets a reference to the function `func`.
+ */
+Node functionTracker(Function func) { functionTracker(TypeTracker::end(), func).flowsTo(result) }
+
+/**
+ * Gets a reference to the class `cls`.
+ */
+private TypeTrackingNode classTracker(TypeTracker t, Class cls) {
+ t.start() and
+ (
+ result.asExpr() = cls.getParent()
+ or
+ // when a class is decorated, it's the result of the (last) decorator call that
+ // is used
+ result.asExpr() = cls.getParent().(ClassExpr).getADecoratorCall()
+ or
+ // `type(obj)`, where obj is an instance of this class
+ result = getTypeCall() and
+ result.(CallCfgNode).getArg(0) = classInstanceTracker(cls)
+ )
+ or
+ exists(TypeTracker t2 | result = classTracker(t2, cls).track(t2, t))
+}
+
+/**
+ * Gets a reference to the class `cls`.
+ */
+Node classTracker(Class cls) { classTracker(TypeTracker::end(), cls).flowsTo(result) }
+
+/**
+ * Gets a reference to an instance of the class `cls`.
+ */
+private TypeTrackingNode classInstanceTracker(TypeTracker t, Class cls) {
+ t.start() and
+ result.(CallCfgNode).getFunction() = classTracker(cls)
+ or
+ exists(TypeTracker t2 | result = classInstanceTracker(t2, cls).track(t2, t))
+}
+
+/**
+ * Gets a reference to an instance of the class `cls`.
+ */
+Node classInstanceTracker(Class cls) {
+ classInstanceTracker(TypeTracker::end(), cls).flowsTo(result)
+}
+
+/**
+ * Gets a reference to the `self` argument of a method on class `classWithMethod`.
+ * The method cannot be a `staticmethod` or `classmethod`.
+ */
+private TypeTrackingNode selfTracker(TypeTracker t, Class classWithMethod) {
+ t.start() and
+ exists(Function func |
+ func = classWithMethod.getAMethod() and
+ not hasStaticmethodDecorator(func) and
+ not hasClassmethodDecorator(func)
+ |
+ result.asExpr() = func.getArg(0)
+ )
+ or
+ exists(TypeTracker t2 | result = selfTracker(t2, classWithMethod).track(t2, t))
+}
+
+/**
+ * Gets a reference to the `self` argument of a method on class `classWithMethod`.
+ * The method cannot be a `staticmethod` or `classmethod`.
+ */
+Node selfTracker(Class classWithMethod) {
+ selfTracker(TypeTracker::end(), classWithMethod).flowsTo(result)
+}
+
+/**
+ * Gets a reference to the `cls` argument of a classmethod on class `classWithMethod`.
+ */
+private TypeTrackingNode clsTracker(TypeTracker t, Class classWithMethod) {
+ t.start() and
+ (
+ exists(Function func |
+ func = classWithMethod.getAMethod() and
+ hasClassmethodDecorator(func)
+ |
+ result.asExpr() = func.getArg(0)
+ )
+ or
+ // type(self)
+ result = getTypeCall() and
+ result.(CallCfgNode).getArg(0) = selfTracker(classWithMethod)
+ )
+ or
+ exists(TypeTracker t2 | result = clsTracker(t2, classWithMethod).track(t2, t))
+}
+
+/**
+ * Gets a reference to the `cls` argument of a classmethod on class `classWithMethod`.
+ */
+Node clsTracker(Class classWithMethod) {
+ clsTracker(TypeTracker::end(), classWithMethod).flowsTo(result)
+}
+
+/**
+ * Gets a reference to the result of calling `super` without any argument, where the
+ * call happened in the method `func` (either a method or a classmethod).
+ */
+private TypeTrackingNode superCallNoArgumentTracker(TypeTracker t, Function func) {
+ not hasStaticmethodDecorator(func) and
+ t.start() and
+ exists(CallCfgNode call | result = call |
+ call = getSuperCall() and
+ not exists(call.getArg(_)) and
+ call.getScope() = func
+ )
+ or
+ exists(TypeTracker t2 | result = superCallNoArgumentTracker(t2, func).track(t2, t))
+}
+
+/**
+ * Gets a reference to the result of calling `super` without any argument, where the
+ * call happened in the method `func` (either a method or a classmethod).
+ */
+Node superCallNoArgumentTracker(Function func) {
+ superCallNoArgumentTracker(TypeTracker::end(), func).flowsTo(result)
+}
+
+/**
+ * Gets a reference to the result of calling `super` with 2 arguments, where the
+ * first is a reference to the class `cls`, and the second argument is `obj`.
+ */
+private TypeTrackingNode superCallTwoArgumentTracker(TypeTracker t, Class cls, Node obj) {
+ t.start() and
+ exists(CallCfgNode call | result = call |
+ call = getSuperCall() and
+ call.getArg(0) = classTracker(cls) and
+ call.getArg(1) = obj
+ )
+ or
+ exists(TypeTracker t2 | result = superCallTwoArgumentTracker(t2, cls, obj).track(t2, t))
+}
+
+/**
+ * Gets a reference to the result of calling `super` with 2 arguments, where the
+ * first is a reference to the class `cls`, and the second argument is `obj`.
+ */
+Node superCallTwoArgumentTracker(Class cls, Node obj) {
+ superCallTwoArgumentTracker(TypeTracker::end(), cls, obj).flowsTo(result)
+}
+
+// =============================================================================
+// MRO
+// =============================================================================
+/**
+ * Gets a direct superclass of the argument `cls`, if any.
+ *
+ * For `A` with the class definition `class A(B, C)` it will have results `B` and `C`.
+ */
+Class getADirectSuperclass(Class cls) { cls.getABase() = classTracker(result).asExpr() }
+
+/**
+ * Gets a direct subclass of the argument `cls`, if any.
+ *
+ *For `B` with the class definition `class A(B)` it will have result `A`.
+ */
+Class getADirectSubclass(Class cls) { cls = getADirectSuperclass(result) }
+
+/**
+ * Gets a class that, from an approximated MRO calculation, might be the next class used
+ * for member-lookup when `super().attr` is used inside the class `cls`.
+ *
+ * In the example below, with `cls=B`, this predicate will have `A` and `C` as results.
+ * ```py
+ * class A: pass
+ * class B(A): pass
+ * class C(A): pass
+ * class D(B, C): pass
+ * ```
+ *
+ * NOTE: This approximation does not handle all cases correctly, and in the example
+ * below, with `cls=A` will not have any results, although it should include `Y`.
+ *
+ * ```py
+ * class A: pass
+ * class B(A): pass
+ * class X: pass
+ * class Y(X): pass
+ * class Ex(B, Y): pass
+ * ```
+ *
+ * NOTE for debugging the results of this predicate: Since a class can be part of
+ * multiple MROs, results from this predicate might only be valid in some, but not all,
+ * inheritance chains (such as the result `C` for `cls=B` in the first example -- this
+ * might make it difficult to see if the definition of `D` is located in an other file)
+ *
+ * For more info on the C3 MRO used in Python see:
+ * - https://docs.python.org/3/glossary.html#term-method-resolution-order
+ * - https://www.python.org/download/releases/2.3/mro/
+ */
+private Class getNextClassInMro(Class cls) {
+ // class A(B, ...):
+ // `B` must be the next class after `A` in the MRO for A.
+ cls.getBase(0) = classTracker(result).asExpr()
+ or
+ // class A(B, C, D):
+ // - `C` could be the next class after `B` in MRO.
+ // - `D` could be the next class after `C` in MRO.
+ exists(Class sub, int i |
+ sub.getBase(i) = classTracker(cls).asExpr() and
+ sub.getBase(i + 1) = classTracker(result).asExpr() and
+ not result = cls
+ )
+ // There are two important properties for MRO computed with C3 in Python:
+ //
+ // 1) monotonicity: if C1 precedes C2 in the MRO of C, then C1 precedes C2 in the MRO
+ // of any subclass of C.
+ // 2) local precedence ordering: if C1 precedes C2 in the list of superclasses for C,
+ // they will keep the same order in the MRO for C (and due to monotonicity, any
+ // subclass).
+}
+
+/**
+ * Gets a potential definition of the function `name` according to our approximation of
+ * MRO for the class `cls` (see `getNextClassInMro` for more information).
+ */
+Function findFunctionAccordingToMro(Class cls, string name) {
+ result = cls.getAMethod() and
+ result.getName() = name
+ or
+ not exists(Function f | f.getName() = name and f = cls.getAMethod()) and
+ result = findFunctionAccordingToMro(getNextClassInMro(cls), name)
+}
+
+/**
+ * Gets a class that, from an approximated MRO calculation, might be the next class
+ * after `cls` in the MRO for `startingClass`.
+ *
+ * Note: this is almost the same as `getNextClassInMro`, except we know the
+ * `startingClass`, which can give slightly more precise results.
+ *
+ * See QLDoc for `getNextClassInMro`.
+ */
+Class getNextClassInMroKnownStartingClass(Class cls, Class startingClass) {
+ cls.getBase(0) = classTracker(result).asExpr() and
+ cls = getADirectSuperclass*(startingClass)
+ or
+ exists(Class sub, int i | sub = getADirectSuperclass*(startingClass) |
+ sub.getBase(i) = classTracker(cls).asExpr() and
+ sub.getBase(i + 1) = classTracker(result).asExpr() and
+ not result = cls
+ )
+}
+
+private Function findFunctionAccordingToMroKnownStartingClass(
+ Class cls, Class startingClass, string name
+) {
+ result = cls.getAMethod() and
+ result.getName() = name and
+ cls = getADirectSuperclass*(startingClass)
+ or
+ not exists(Function f | f.getName() = name and f = cls.getAMethod()) and
+ result =
+ findFunctionAccordingToMroKnownStartingClass(getNextClassInMroKnownStartingClass(cls,
+ startingClass), startingClass, name)
+}
+
+/**
+ * Gets a potential definition of the function `name` according to our approximation of
+ * MRO for the class `cls` (see `getNextClassInMroKnownStartingClass` for more information).
+ *
+ * Note: this is almost the same as `findFunctionAccordingToMro`, except we know the
+ * `startingClass`, which can give slightly more precise results.
+ */
+pragma[inline]
+Function findFunctionAccordingToMroKnownStartingClass(Class startingClass, string name) {
+ result = findFunctionAccordingToMroKnownStartingClass(startingClass, startingClass, name)
+}
+
+// =============================================================================
+// attribute trackers
+// =============================================================================
+/** Gets a reference to the attribute read `attr` */
+private TypeTrackingNode attrReadTracker(TypeTracker t, AttrRead attr) {
+ t.start() and
+ result = attr
+ or
+ exists(TypeTracker t2 | result = attrReadTracker(t2, attr).track(t2, t))
+}
+
+/** Gets a reference to the attribute read `attr` */
+Node attrReadTracker(AttrRead attr) { attrReadTracker(TypeTracker::end(), attr).flowsTo(result) }
+
+// =============================================================================
+// call and argument resolution
+// =============================================================================
+newtype TCallType =
+ /** A call to a function that is not part of a class. */
+ CallTypePlainFunction() or
+ /**
+ * A call to an "normal" method on a class instance.
+ * Does not include staticmethods or classmethods.
+ */
+ CallTypeNormalMethod() or
+ /** A call to a staticmethod. */
+ CallTypeStaticMethod() or
+ /** A call to a classmethod. */
+ CallTypeClassMethod() or
+ /**
+ * A call to method on a class, not going through an instance method, such as
+ *
+ * ```py
+ * class Foo:
+ * def method(self, arg):
+ * pass
+ *
+ * foo = Foo()
+ * Foo.method(foo, 42)
+ * ```
+ */
+ CallTypeMethodAsPlainFunction() or
+ /** A call to a class. */
+ CallTypeClass()
+
+/** A type of call. */
+class CallType extends TCallType {
+ string toString() {
+ this instanceof CallTypePlainFunction and
+ result = "CallTypePlainFunction"
+ or
+ this instanceof CallTypeNormalMethod and
+ result = "CallTypeNormalMethod"
+ or
+ this instanceof CallTypeStaticMethod and
+ result = "CallTypeStaticMethod"
+ or
+ this instanceof CallTypeClassMethod and
+ result = "CallTypeClassMethod"
+ or
+ this instanceof CallTypeMethodAsPlainFunction and
+ result = "CallTypeMethodAsPlainFunction"
+ or
+ this instanceof CallTypeClass and
+ result = "CallTypeClass"
+ }
+}
+
+// -------------------------------------
+// method call resolution
+// -------------------------------------
+private module MethodCalls {
+ /**
+ * Holds if `call` is a call to a method `target` on an instance or class, where the
+ * instance or class is not derived from an implicit `self`/`cls` argument to a method
+ * -- for that, see `callWithinMethodImplicitSelfOrCls`.
+ *
+ * It is found by making an attribute read `attr` with the name `functionName` on a
+ * reference to the class `cls`, or to an instance of the class `cls`. The reference the
+ * attribute-read is made on is `self`.
+ */
+ pragma[noinline]
+ private predicate directCall(
+ CallNode call, Function target, string functionName, Class cls, AttrRead attr, Node self
+ ) {
+ target = findFunctionAccordingToMroKnownStartingClass(cls, cls, functionName) and
+ directCall_join(call, functionName, cls, attr, self)
+ }
+
+ /** Extracted to give good join order */
+ pragma[noinline]
+ private predicate directCall_join(
+ CallNode call, string functionName, Class cls, AttrRead attr, Node self
+ ) {
+ (
+ call.getFunction() = attrReadTracker(attr).asCfgNode() and
+ attr.accesses(classTracker(cls), functionName)
+ or
+ call.getFunction() = attrReadTracker(attr).asCfgNode() and
+ attr.accesses(classInstanceTracker(cls), functionName)
+ ) and
+ attr.accesses(self, functionName)
+ }
+
+ /**
+ * Holds if `call` is a call to a method `target` derived from an implicit `self`/`cls`
+ * argument to a method within the class `classWithMethod`.
+ *
+ * It is found by making an attribute read `attr` with the name `functionName` on a
+ * reference to an implicit `self`/`cls` argument. The reference the attribute-read is
+ * made on is `self`.
+ */
+ pragma[noinline]
+ private predicate callWithinMethodImplicitSelfOrCls(
+ CallNode call, Function target, string functionName, Class classWithMethod, AttrRead attr,
+ Node self
+ ) {
+ target = findFunctionAccordingToMro(getADirectSubclass*(classWithMethod), functionName) and
+ callWithinMethodImplicitSelfOrCls_join(call, functionName, classWithMethod, attr, self)
+ }
+
+ /** Extracted to give good join order */
+ pragma[noinline]
+ private predicate callWithinMethodImplicitSelfOrCls_join(
+ CallNode call, string functionName, Class classWithMethod, AttrRead attr, Node self
+ ) {
+ (
+ call.getFunction() = attrReadTracker(attr).asCfgNode() and
+ attr.accesses(clsTracker(classWithMethod), functionName)
+ or
+ call.getFunction() = attrReadTracker(attr).asCfgNode() and
+ attr.accesses(selfTracker(classWithMethod), functionName)
+ ) and
+ attr.accesses(self, functionName)
+ }
+
+ /**
+ * Holds if `call` is a call to a method `target`, derived from a use of `super`, either
+ * as:
+ *
+ * (1) `super(SomeClass, obj)`, where the first argument is a reference to the class
+ * `classUsedInSuper`, and the second argument is `self`.
+ *
+ * (2) `super()`. This implicit version can only happen within a method in a class.
+ * The implicit first argument is the class the call happens within `classUsedInSuper`.
+ * The implicit second argument is the `self`/`cls` parameter of the method this happens
+ * within.
+ *
+ * The method call is found by making an attribute read `attr` with the name
+ * `functionName` on the return value from the `super` call.
+ */
+ pragma[noinline]
+ predicate fromSuper(
+ CallNode call, Function target, string functionName, Class classUsedInSuper, AttrRead attr,
+ Node self
+ ) {
+ target = findFunctionAccordingToMro(getNextClassInMro(classUsedInSuper), functionName) and
+ fromSuper_join(call, functionName, classUsedInSuper, attr, self)
+ }
+
+ /** Extracted to give good join order */
+ pragma[noinline]
+ private predicate fromSuper_join(
+ CallNode call, string functionName, Class classUsedInSuper, AttrRead attr, Node self
+ ) {
+ call.getFunction() = attrReadTracker(attr).asCfgNode() and
+ (
+ exists(Function func |
+ attr.accesses(superCallNoArgumentTracker(func), functionName) and
+ // Requiring enclosing scope of function to be a class is a little too
+ // restrictive, since it is possible to use `super()` in a function defined inside
+ // the method, where the first argument to the nested-function will be used as
+ // implicit self argument. In practice I don't expect this to be a problem, and we
+ // did not support this with points-to either.
+ func.getEnclosingScope() = classUsedInSuper and
+ self.(ParameterNode).getParameter() = func.getArg(0)
+ )
+ or
+ attr.accesses(superCallTwoArgumentTracker(classUsedInSuper, self), functionName)
+ )
+ }
+
+ predicate resolveMethodCall(ControlFlowNode call, Function target, CallType type, Node self) {
+ (
+ directCall(call, target, _, _, _, self)
+ or
+ callWithinMethodImplicitSelfOrCls(call, target, _, _, _, self)
+ or
+ fromSuper(call, target, _, _, _, self)
+ ) and
+ (
+ // normal method call
+ type instanceof CallTypeNormalMethod and
+ (
+ self = classInstanceTracker(_)
+ or
+ self = selfTracker(_)
+ ) and
+ not hasStaticmethodDecorator(target) and
+ not hasClassmethodDecorator(target)
+ or
+ // method as plain function call
+ type instanceof CallTypeMethodAsPlainFunction and
+ self = classTracker(_) and
+ not hasStaticmethodDecorator(target) and
+ not hasClassmethodDecorator(target)
+ or
+ // staticmethod call
+ type instanceof CallTypeStaticMethod and
+ hasStaticmethodDecorator(target)
+ or
+ // classmethod call
+ type instanceof CallTypeClassMethod and
+ hasClassmethodDecorator(target)
+ )
+ }
+}
+
+import MethodCalls
+
+// -------------------------------------
+// class call resolution
+// -------------------------------------
+/**
+ * Holds when `call` is a call to the class `cls`.
+ *
+ * NOTE: We have this predicate mostly to be able to compare with old point-to
+ * call-graph resolution. So it could be removed in the future.
+ */
+predicate resolveClassCall(CallNode call, Class cls) {
+ call.getFunction() = classTracker(cls).asCfgNode()
+}
+
+/**
+ * Gets a function (`__init__`/`__new__`) that will be invoked when `cls` is
+ * constructed -- where the function lookup is based on our MRO calculation.
+ */
+Function invokedFunctionFromClassConstruction(Class cls) {
+ result = findFunctionAccordingToMroKnownStartingClass(cls, "__new__")
+ or
+ // as described in https://docs.python.org/3/reference/datamodel.html#object.__new__
+ // __init__ will only be called when __new__ returns an instance of the class (which
+ // is not a requirement). However, for simplicity, we assume that __init__ will always
+ // be called.
+ result = findFunctionAccordingToMroKnownStartingClass(cls, "__init__")
+}
+
+// -------------------------------------
+// overall call resolution
+// -------------------------------------
+/**
+ * Holds if `call` is a call to the `target`, with call-type `type`.
+ */
+predicate resolveCall(ControlFlowNode call, Function target, CallType type) {
+ type instanceof CallTypePlainFunction and
+ call.(CallNode).getFunction() = functionTracker(target).asCfgNode() and
+ not exists(Class cls | cls.getAMethod() = target)
+ or
+ resolveMethodCall(call, target, type, _)
+ or
+ type instanceof CallTypeClass and
+ exists(Class cls |
+ resolveClassCall(call, cls) and
+ target = invokedFunctionFromClassConstruction(cls)
+ )
+}
+
+// =============================================================================
+// Argument resolution
+// =============================================================================
+/**
+ * Holds if the argument of `call` at position `apos` is `arg`. This is just a helper
+ * predicate that maps ArgumentPositions to the arguments of the underlying `CallNode`.
+ */
+private predicate normalCallArg(CallNode call, Node arg, ArgumentPosition apos) {
+ exists(int index |
+ apos.isPositional(index) and
+ arg.asCfgNode() = call.getArg(index)
+ )
+ or
+ exists(string name |
+ apos.isKeyword(name) and
+ arg.asCfgNode() = call.getArgByName(name)
+ )
+}
+
+/**
+ * Gets the argument of `call` at position `apos`, if any, where we can resolve `call`
+ * to `target` with CallType `type`.
+ *
+ * It might seem like it's enough to know the CallType to resolve arguments. The reason
+ * we also need the `target`, is to avoid cross-talk. In the example below, assuming
+ * that `Foo` and `Bar` define their own `meth` methods, we might end up passing _both_
+ * `foo` and `bar` to both `Foo.meth` and `Bar.meth`, which is wrong. Since the
+ * attribute access uses the same name, we need to also distinguish on the resolved
+ * target, to know which of the two objects to pass as the self argument.
+ *
+ *
+ * ```py
+ * foo = Foo()
+ * bar = Bar()
+ * if cond:
+ * func = foo.meth
+ * else:
+ * func = bar.meth
+ * func(42)
+ * ```
+ *
+ * Note: If `Bar.meth` and `Foo.meth` resolves to the same function, we will end up
+ * sending both `self` arguments to that function, which is by definition the right thing to do.
+ */
+predicate getCallArg(
+ ControlFlowNode call, Function target, CallType type, Node arg, ArgumentPosition apos
+) {
+ // normal calls with a real call node
+ resolveCall(call, target, type) and
+ call instanceof CallNode and
+ (
+ type instanceof CallTypePlainFunction and
+ normalCallArg(call, arg, apos)
+ or
+ // self argument for normal method calls
+ type instanceof CallTypeNormalMethod and
+ apos.isSelf() and
+ resolveMethodCall(call, target, type, arg) and
+ // dataflow lib has requirement that arguments and calls are in same enclosing callable.
+ exists(CfgNode cfgNode | cfgNode.getNode() = call |
+ cfgNode.getEnclosingCallable() = arg.getEnclosingCallable()
+ )
+ or
+ // cls argument for classmethod calls
+ type instanceof CallTypeClassMethod and
+ apos.isSelf() and
+ resolveMethodCall(call, target, type, arg) and
+ arg = classTracker(_) and
+ // dataflow lib has requirement that arguments and calls are in same enclosing callable.
+ exists(CfgNode cfgNode | cfgNode.getNode() = call |
+ cfgNode.getEnclosingCallable() = arg.getEnclosingCallable()
+ )
+ or
+ // normal arguments for method calls
+ (
+ type instanceof CallTypeNormalMethod or
+ type instanceof CallTypeStaticMethod or
+ type instanceof CallTypeClassMethod
+ ) and
+ normalCallArg(call, arg, apos)
+ or
+ // method as plain function call.
+ //
+ // argument index 0 of call has position self (and MUST be given as positional
+ // argument in call). This also means that call-arguments are shifted by 1, such
+ // that argument index 1 of call has argument position 0
+ type instanceof CallTypeMethodAsPlainFunction and
+ (
+ apos.isSelf() and arg.asCfgNode() = call.(CallNode).getArg(0)
+ or
+ not apos.isPositional(_) and normalCallArg(call, arg, apos)
+ or
+ exists(ArgumentPosition normalPos, int index |
+ apos.isPositional(index - 1) and
+ normalPos.isPositional(index) and
+ normalCallArg(call, arg, normalPos)
+ )
+ )
+ or
+ // class call
+ type instanceof CallTypeClass and
+ (
+ apos.isSelf() and
+ arg = TSyntheticPreUpdateNode(call)
+ or
+ normalCallArg(call, arg, apos)
+ )
+ )
+}
+
+// =============================================================================
+// DataFlowCall
+// =============================================================================
newtype TDataFlowCall =
- // TODO(call-graph): implement this!
- MkDataFlowCall() or
+ TNormalCall(CallNode call, Function target, CallType type) { resolveCall(call, target, type) }
+ or
/** A synthesized call inside a summarized callable */
TSummaryCall(FlowSummaryImpl::Public::SummarizedCallable c, Node receiver) {
FlowSummaryImpl::Private::summaryCallbackRange(c, receiver)
@@ -151,11 +1003,44 @@ abstract class DataFlowCall extends TDataFlowCall {
/** A call found in the program source (as opposed to a synthesised call). */
abstract class ExtractedDataFlowCall extends DataFlowCall {
- ExtractedDataFlowCall() { exists(this.getNode()) }
-
override Location getLocation() { result = this.getNode().getLocation() }
}
+/**
+ * A resolved call in source code with an underlying `CallNode`.
+ *
+ * This is considered normal, compared with special calls such as `obj[0]` calling the
+ * `__getitem__` method on the object.
+ */
+class NormalCall extends ExtractedDataFlowCall, TNormalCall {
+ CallNode call;
+ Function target;
+ CallType type;
+
+ NormalCall() { this = TNormalCall(call, target, type) }
+
+ override string toString() {
+ // note: if we used toString directly on the CallNode we would get
+ // `ControlFlowNode for func()`
+ // but the `ControlFlowNode` part is just clutter, so we go directly to the AST node
+ // instead.
+ result = call.getNode().toString()
+ }
+
+ override ControlFlowNode getNode() { result = call }
+
+ override DataFlowCallable getEnclosingCallable() { result.getScope() = call.getScope() }
+
+ override DataFlowCallable getCallable() { result.(DataFlowFunction).getScope() = target }
+
+ override ArgumentNode getArgument(ArgumentPosition apos) {
+ getCallArg(call, target, type, result, apos)
+ }
+
+ /** Gets the `CallType` of this call. */
+ CallType getCallType() { result = type }
+}
+
/**
* A call to a summarized callable, a `LibraryCallable`.
*
@@ -239,6 +1124,7 @@ class SummaryCall extends DataFlowCall, TSummaryCall {
* flow graph.
*/
abstract class ParameterNodeImpl extends Node {
+ /** Gets the `Parameter` this `ParameterNode` represents. */
abstract Parameter getParameter();
/**
@@ -316,7 +1202,7 @@ private class SummaryArgumentNode extends SummaryNode, ArgumentNode {
}
}
-private class SummaryPostUpdateNode extends SummaryNode, PostUpdateNode {
+private class SummaryPostUpdateNode extends SummaryNode, PostUpdateNodeImpl {
private Node pre;
SummaryPostUpdateNode() { FlowSummaryImpl::Private::summaryPostUpdateNode(this, pre) }
@@ -338,6 +1224,9 @@ DataFlowCallable viableCallable(ExtractedDataFlowCall call) {
)
}
+// =============================================================================
+// Remaining required data-flow things
+// =============================================================================
private newtype TReturnKind = TNormalReturnKind()
/**
diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll
index 33a5558d232..8c4c807d3a8 100644
--- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll
@@ -39,157 +39,47 @@ predicate isArgumentNode(ArgumentNode arg, DataFlowCall c, ArgumentPosition pos)
//--------
predicate isExpressionNode(ControlFlowNode node) { node.getNode() instanceof Expr }
-/** DEPRECATED: Alias for `SyntheticPreUpdateNode` */
-deprecated module syntheticPreUpdateNode = SyntheticPreUpdateNode;
+class SyntheticPreUpdateNode extends Node, TSyntheticPreUpdateNode {
+ CallNode node;
-/** A module collecting the different reasons for synthesising a pre-update node. */
-module SyntheticPreUpdateNode {
- class SyntheticPreUpdateNode extends Node, TSyntheticPreUpdateNode {
- NeedsSyntheticPreUpdateNode post;
+ SyntheticPreUpdateNode() { this = TSyntheticPreUpdateNode(node) }
- SyntheticPreUpdateNode() { this = TSyntheticPreUpdateNode(post) }
+ /** Gets the node for which this is a synthetic pre-update node. */
+ CfgNode getPostUpdateNode() { result.getNode() = node }
- /** Gets the node for which this is a synthetic pre-update node. */
- Node getPostUpdateNode() { result = post }
+ override string toString() { result = "[pre] " + node.toString() }
- override string toString() { result = "[pre " + post.label() + "] " + post.toString() }
+ override Scope getScope() { result = node.getScope() }
- override Scope getScope() { result = post.getScope() }
-
- override Location getLocation() { result = post.getLocation() }
- }
-
- /** A data flow node for which we should synthesise an associated pre-update node. */
- class NeedsSyntheticPreUpdateNode extends PostUpdateNode {
- NeedsSyntheticPreUpdateNode() { this = objectCreationNode() }
-
- override Node getPreUpdateNode() { result.(SyntheticPreUpdateNode).getPostUpdateNode() = this }
-
- /**
- * Gets the label for this kind of node. This will figure in the textual representation of the synthesized pre-update node.
- *
- * There is currently only one reason for needing a pre-update node, so we always use that as the label.
- */
- string label() { result = "objCreate" }
- }
-
- /**
- * Calls to constructors are treated as post-update nodes for the synthesized argument
- * that is mapped to the `self` parameter. That way, constructor calls represent the value of the
- * object after the constructor (currently only `__init__`) has run.
- */
- CfgNode objectCreationNode() {
- // TODO(call-graph): implement this!
- none()
- // result.getNode().(CallNode) = any(ClassCall c).getNode()
- }
+ override Location getLocation() { result = node.getLocation() }
}
-import SyntheticPreUpdateNode
-
-/** DEPRECATED: Alias for `SyntheticPostUpdateNode` */
-deprecated module syntheticPostUpdateNode = SyntheticPostUpdateNode;
-
-/** A module collecting the different reasons for synthesising a post-update node. */
-module SyntheticPostUpdateNode {
- /** A post-update node is synthesized for all nodes which satisfy `NeedsSyntheticPostUpdateNode`. */
- class SyntheticPostUpdateNode extends PostUpdateNode, TSyntheticPostUpdateNode {
- NeedsSyntheticPostUpdateNode pre;
-
- SyntheticPostUpdateNode() { this = TSyntheticPostUpdateNode(pre) }
-
- override Node getPreUpdateNode() { result = pre }
-
- override string toString() { result = "[post " + pre.label() + "] " + pre.toString() }
-
- override Scope getScope() { result = pre.getScope() }
-
- override Location getLocation() { result = pre.getLocation() }
- }
-
- /** A data flow node for which we should synthesise an associated post-update node. */
- class NeedsSyntheticPostUpdateNode extends Node {
- NeedsSyntheticPostUpdateNode() {
- this = argumentPreUpdateNode()
- or
- this = storePreUpdateNode()
- or
- this = readPreUpdateNode()
- }
-
- /**
- * Gets the label for this kind of node. This will figure in the textual representation of the synthesized post-update node.
- * We favour being an arguments as the reason for the post-update node in case multiple reasons apply.
- */
- string label() {
- if this = argumentPreUpdateNode()
- then result = "arg"
- else
- if this = storePreUpdateNode()
- then result = "store"
- else result = "read"
- }
- }
-
- /**
- * Gets the pre-update node for this node.
- *
- * An argument might have its value changed as a result of a call.
- * Certain arguments, such as implicit self arguments are already post-update nodes
- * and should not have an extra node synthesised.
- */
- Node argumentPreUpdateNode() {
- // TODO(call-graph): implement this!
- none()
- // result = any(FunctionCall c).getArg(_)
- // or
- // // Avoid argument 0 of method calls as those have read post-update nodes.
- // exists(MethodCall c, int n | n > 0 | result = c.getArg(n))
- // or
- // result = any(SpecialCall c).getArg(_)
- // or
- // // Avoid argument 0 of class calls as those have non-synthetic post-update nodes.
- // exists(ClassCall c, int n | n > 0 | result = c.getArg(n))
- // or
- // // any argument of any call that we have not been able to resolve
- // exists(CallNode call | not call = any(DataFlowCall c).getNode() |
- // result.(CfgNode).getNode() in [call.getArg(_), call.getArgByName(_)]
- // )
- }
-
- /** Gets the pre-update node associated with a store. This is used for when an object might have its value changed after a store. */
- CfgNode storePreUpdateNode() {
- exists(Attribute a |
- result.getNode() = a.getObject().getAFlowNode() and
- a.getCtx() instanceof Store
- )
- }
-
- /**
- * Gets a node marking the state change of an object after a read.
- *
- * A reverse read happens when the result of a read is modified, e.g. in
- * ```python
- * l = [ mutable ]
- * l[0].mutate()
- * ```
- * we may now have changed the content of `l`. To track this, there must be
- * a postupdate node for `l`.
- */
- CfgNode readPreUpdateNode() {
- exists(Attribute a |
- result.getNode() = a.getObject().getAFlowNode() and
- a.getCtx() instanceof Load
- )
- or
- result.getNode() = any(SubscriptNode s).getObject()
- or
- // The dictionary argument is read from if the callable has parameters matching the keys.
- result.getNode().getNode() = any(Call call).getKwargs()
- }
+abstract class PostUpdateNodeImpl extends Node {
+ /** Gets the node before the state update. */
+ abstract Node getPreUpdateNode();
}
-import SyntheticPostUpdateNode
+class SyntheticPostUpdateNode extends PostUpdateNodeImpl, TSyntheticPostUpdateNode {
+ ControlFlowNode node;
+
+ SyntheticPostUpdateNode() { this = TSyntheticPostUpdateNode(node) }
+
+ override Node getPreUpdateNode() { result.(CfgNode).getNode() = node }
+
+ override string toString() { result = "[post] " + node.toString() }
+
+ override Scope getScope() { result = node.getScope() }
+
+ override Location getLocation() { result = node.getLocation() }
+}
+
+class NonSyntheticPostUpdateNode extends PostUpdateNodeImpl, CfgNode {
+ SyntheticPreUpdateNode pre;
+
+ NonSyntheticPostUpdateNode() { this = pre.getPostUpdateNode() }
+
+ override Node getPreUpdateNode() { result = pre }
+}
class DataFlowExpr = Expr;
diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll
index 5eaff0815af..aa48df91c0c 100644
--- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll
@@ -31,10 +31,41 @@ newtype TNode =
or
node.getNode() instanceof Pattern
} or
- /** A synthetic node representing the value of an object before a state change */
- TSyntheticPreUpdateNode(NeedsSyntheticPreUpdateNode post) or
- /** A synthetic node representing the value of an object after a state change. */
- TSyntheticPostUpdateNode(NeedsSyntheticPostUpdateNode pre) or
+ /**
+ * A synthetic node representing the value of an object before a state change.
+ *
+ * For class calls we pass a synthetic self argument, so attribute writes in
+ * `__init__` is reflected on the resulting object (we need special logic for this
+ * since there is no `return` in `__init__`)
+ */
+ // NOTE: since we can't rely on the call graph, but we want to have synthetic
+ // pre-update nodes for class calls, we end up getting synthetic pre-update nodes for
+ // ALL calls :|
+ TSyntheticPreUpdateNode(CallNode call) or
+ /**
+ * A synthetic node representing the value of an object after a state change.
+ * See QLDoc for `PostUpdateNode`.
+ */
+ TSyntheticPostUpdateNode(ControlFlowNode node) {
+ exists(CallNode call |
+ node = call.getArg(_)
+ or
+ node = call.getArgByName(_)
+ )
+ or
+ node = any(AttrNode a).getObject()
+ or
+ node = any(SubscriptNode s).getObject()
+ or
+ // self parameter when used implicitly in `super()`
+ exists(Class cls, Function func, ParameterDefinition def |
+ func = cls.getAMethod() and
+ not hasStaticmethodDecorator(func) and
+ // this matches what we do in ParameterNode
+ def.getDefiningNode() = node and
+ def.getParameter() = func.getArg(0)
+ )
+ } or
/** A node representing a global (module-level) variable in a specific module. */
TModuleVariableNode(Module m, GlobalVariable v) {
v.getScope() = m and
@@ -270,13 +301,9 @@ class ExtractedParameterNode extends ParameterNodeImpl, CfgNode {
ExtractedParameterNode() { node = def.getDefiningNode() }
override predicate isParameterOf(DataFlowCallable c, ParameterPosition ppos) {
- // TODO(call-graph): implement this!
- none()
+ this = c.getParameter(ppos)
}
- override DataFlowCallable getEnclosingCallable() { this.isParameterOf(result, _) }
-
- /** Gets the `Parameter` this `ParameterNode` represents. */
override Parameter getParameter() { result = def.getParameter() }
}
@@ -294,16 +321,16 @@ abstract class ArgumentNode extends Node {
final ExtractedDataFlowCall getCall() { this.argumentOf(result, _) }
}
-/** A data flow node that represents a call argument found in the source code. */
+/**
+ * A data flow node that represents a call argument found in the source code,
+ * where the call can be resolved.
+ */
class ExtractedArgumentNode extends ArgumentNode {
- ExtractedArgumentNode() { this = any(ExtractedDataFlowCall c).getArgument(_) }
+ ExtractedArgumentNode() { getCallArg(_, _, _, this, _) }
final override predicate argumentOf(DataFlowCall call, ArgumentPosition pos) {
- this.extractedArgumentOf(call, pos)
- }
-
- predicate extractedArgumentOf(ExtractedDataFlowCall call, ArgumentPosition pos) {
- this = call.getArgument(pos)
+ this = call.getArgument(pos) and
+ call instanceof ExtractedDataFlowCall
}
}
@@ -312,16 +339,17 @@ class ExtractedArgumentNode extends ArgumentNode {
* changed its state.
*
* This can be either the argument to a callable after the callable returns
- * (which might have mutated the argument), or the qualifier of a field after
- * an update to the field.
+ * (which might have mutated the argument), the qualifier of a field after
+ * an update to the field, or a container such as a list/dictionary after an element
+ * update.
*
* Nodes corresponding to AST elements, for example `ExprNode`s, usually refer
- * to the value before the update with the exception of `ObjectCreationNode`s,
+ * to the value before the update with the exception of class calls,
* which represents the value _after_ the constructor has run.
*/
-abstract class PostUpdateNode extends Node {
+class PostUpdateNode extends Node instanceof PostUpdateNodeImpl {
/** Gets the node before the state update. */
- abstract Node getPreUpdateNode();
+ Node getPreUpdateNode() { result = super.getPreUpdateNode() }
}
/**
diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackerSpecific.qll b/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackerSpecific.qll
index e9c97d86e8c..e00303d750b 100644
--- a/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackerSpecific.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackerSpecific.qll
@@ -60,22 +60,6 @@ string getPossibleContentName() {
result = any(DataFlowPublic::AttrRef a).getAttributeName()
}
-// /**
-// * Gets a callable for the call where `nodeFrom` is used as the `i`'th argument.
-// *
-// * Helper predicate to avoid bad join order experienced in `callStep`.
-// * This happened when `isParameterOf` was joined _before_ `getCallable`.
-// */
-// pragma[nomagic]
-// private DataFlowPrivate::DataFlowCallable getCallableForArgument(
-// DataFlowPublic::ExtractedArgumentNode nodeFrom, int i
-// ) {
-// exists(DataFlowPrivate::ExtractedDataFlowCall call |
-// nodeFrom.extractedArgumentOf(call, i) and
-// result = call.getCallable()
-// )
-// }
-
/**
* Holds if `nodeFrom` steps to `nodeTo` by being passed as a parameter in a call.
*
@@ -83,14 +67,17 @@ string getPossibleContentName() {
* recursion (or, at best, terrible performance), since identifying calls to library
* methods is done using API graphs (which uses type tracking).
*/
-predicate callStep(DataFlowPublic::ArgumentNode nodeFrom, DataFlowPrivate::ParameterNodeImpl nodeTo) {
- // TODO(call-graph): implement this!
- none()
- // // TODO: Support special methods?
- // exists(DataFlowPrivate::DataFlowCallable callable, int i |
- // callable = getCallableForArgument(nodeFrom, i) and
- // nodeTo.isParameterOf(callable, i)
- // )
+predicate callStep(DataFlowPublic::ArgumentNode nodeFrom, DataFlowPublic::ParameterNode nodeTo) {
+ // TODO: Fix performance problem with pandas
+ exists(
+ DataFlowPrivate::DataFlowCall call, DataFlowPrivate::DataFlowCallable callable,
+ DataFlowPrivate::ArgumentPosition apos, DataFlowPrivate::ParameterPosition ppos
+ |
+ nodeFrom = call.getArgument(apos) and
+ nodeTo = callable.getParameter(ppos) and
+ DataFlowPrivate::parameterMatch(ppos, apos) and
+ callable = call.getCallable()
+ )
}
/** Holds if `nodeFrom` steps to `nodeTo` by being returned from a call. */
diff --git a/python/ql/src/meta/analysis-quality/CallGraphQuality.qll b/python/ql/src/meta/analysis-quality/CallGraphQuality.qll
index a5d03063f54..cdb143017db 100644
--- a/python/ql/src/meta/analysis-quality/CallGraphQuality.qll
+++ b/python/ql/src/meta/analysis-quality/CallGraphQuality.qll
@@ -102,15 +102,26 @@ module TypeTrackingBasedCallGraph {
/** A call that can be resolved by type-tracking. */
class ResolvableCall extends RelevantCall {
- TT::DataFlowCallable dataflowTarget;
-
- ResolvableCall() { dataflowTarget = TT::viableCallable(TT::TNormalCall(this)) }
+ ResolvableCall() {
+ exists(TT::TNormalCall(this, _, _))
+ or
+ TT::resolveClassCall(this, _)
+ }
/** Gets a resolved target of this call. */
Target getTarget() {
- result.(TargetFunction).getFunction() = dataflowTarget.(TT::DataFlowFunction).getScope()
- // TODO: class calls
- // result.(TargetClass).getClass()
+ exists(TT::DataFlowCall call, TT::CallType ct, Function targetFunc |
+ call = TT::TNormalCall(this, targetFunc, ct) and
+ not ct instanceof TT::CallTypeClass and
+ targetFunc = result.(TargetFunction).getFunction()
+ )
+ or
+ // a TT::TNormalCall only exists when the call can be resolved to a function.
+ // Since points-to just says the call goes directly to the class itself, and
+ // type-tracking based wants to resolve this to the constructor, which might not
+ // exist. So to do a proper comparison, we don't require the call to be resolve to
+ // a specific function.
+ TT::resolveClassCall(this, result.(TargetClass).getClass())
}
}
diff --git a/python/ql/src/meta/analysis-quality/TTCallGraphNewAmbiguous.ql b/python/ql/src/meta/analysis-quality/TTCallGraphNewAmbiguous.ql
new file mode 100644
index 00000000000..dc27dcf262c
--- /dev/null
+++ b/python/ql/src/meta/analysis-quality/TTCallGraphNewAmbiguous.ql
@@ -0,0 +1,19 @@
+/**
+ * @name New call graph edge from using type-tracking instead of points-to, that is ambiguous
+ * @kind problem
+ * @problem.severity recommendation
+ * @id py/meta/call-graph-new-ambiguous
+ * @tags meta
+ * @precision very-low
+ */
+
+import python
+import CallGraphQuality
+
+from CallNode call, Target target
+where
+ target.isRelevant() and
+ not call.(PointsToBasedCallGraph::ResolvableCall).getTarget() = target and
+ call.(TypeTrackingBasedCallGraph::ResolvableCall).getTarget() = target and
+ 1 < count(call.(TypeTrackingBasedCallGraph::ResolvableCall).getTarget())
+select call, "NEW: $@ to $@", call, "Call", target, target.toString()
diff --git a/python/ql/test/experimental/dataflow/TestUtil/RoutingTest.qll b/python/ql/test/experimental/dataflow/TestUtil/RoutingTest.qll
index e96922bc25e..36b603baa78 100644
--- a/python/ql/test/experimental/dataflow/TestUtil/RoutingTest.qll
+++ b/python/ql/test/experimental/dataflow/TestUtil/RoutingTest.qll
@@ -26,29 +26,30 @@ abstract class RoutingTest extends InlineExpectationsTest {
element = fromNode.toString() and
(
tag = this.flowTag() and
- if "\"" + tag + "\"" = this.fromValue(fromNode)
- then value = ""
- else value = this.fromValue(fromNode)
+ if "\"" + tag + "\"" = fromValue(fromNode) then value = "" else value = fromValue(fromNode)
or
+ // only have result for `func` tag if the function where `arg` is used, is
+ // different from the function name of the call where `arg` was specified as
+ // an argument
tag = "func" and
- value = this.toFunc(toNode) and
- not value = this.fromFunc(fromNode)
+ value = toFunc(toNode) and
+ not value = fromFunc(fromNode)
)
)
}
-
- pragma[inline]
- private string fromValue(DataFlow::Node fromNode) {
- result = "\"" + prettyNode(fromNode).replaceAll("\"", "'") + "\""
- }
-
- pragma[inline]
- private string fromFunc(DataFlow::ArgumentNode fromNode) {
- result = fromNode.getCall().getNode().(CallNode).getFunction().getNode().(Name).getId()
- }
-
- pragma[inline]
- private string toFunc(DataFlow::Node toNode) {
- result = toNode.getEnclosingCallable().getCallableValue().getScope().getQualifiedName() // TODO: More robust pretty printing?
- }
+}
+
+pragma[inline]
+private string fromValue(DataFlow::Node fromNode) {
+ result = "\"" + prettyNode(fromNode).replaceAll("\"", "'") + "\""
+}
+
+pragma[inline]
+private string fromFunc(DataFlow::ArgumentNode fromNode) {
+ result = fromNode.getCall().getNode().(CallNode).getFunction().getNode().(Name).getId()
+}
+
+pragma[inline]
+private string toFunc(DataFlow::Node toNode) {
+ result = toNode.getEnclosingCallable().getQualifiedName()
}
diff --git a/python/ql/test/experimental/dataflow/basic/callGraphSinks.expected b/python/ql/test/experimental/dataflow/basic/callGraphSinks.expected
index 01ae02a1ad0..17f3028ae23 100644
--- a/python/ql/test/experimental/dataflow/basic/callGraphSinks.expected
+++ b/python/ql/test/experimental/dataflow/basic/callGraphSinks.expected
@@ -1,3 +1,2 @@
-| file://:0:0:0:0 | parameter 0 of builtins.reversed |
| test.py:1:19:1:19 | ControlFlowNode for x |
| test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() |
diff --git a/python/ql/test/experimental/dataflow/basic/callGraphSources.expected b/python/ql/test/experimental/dataflow/basic/callGraphSources.expected
index 0b4613c42de..4023ba8f3ea 100644
--- a/python/ql/test/experimental/dataflow/basic/callGraphSources.expected
+++ b/python/ql/test/experimental/dataflow/basic/callGraphSources.expected
@@ -1,3 +1,2 @@
-| file://:0:0:0:0 | [summary] to write: return (return) in builtins.reversed |
| test.py:4:10:4:10 | ControlFlowNode for z |
| test.py:7:19:7:19 | ControlFlowNode for a |
diff --git a/python/ql/test/experimental/dataflow/basic/global.expected b/python/ql/test/experimental/dataflow/basic/global.expected
index 885d61309d4..8894bcc190a 100644
--- a/python/ql/test/experimental/dataflow/basic/global.expected
+++ b/python/ql/test/experimental/dataflow/basic/global.expected
@@ -1,4 +1,3 @@
-| file://:0:0:0:0 | [summary] read: argument 0.List element in builtins.reversed | file://:0:0:0:0 | [summary] to write: return (return).List element in builtins.reversed |
| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:1:5:1:17 | GSSA Variable obfuscated_id |
| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:7:5:7:17 | ControlFlowNode for obfuscated_id |
| test.py:1:5:1:17 | GSSA Variable obfuscated_id | test.py:7:5:7:17 | ControlFlowNode for obfuscated_id |
diff --git a/python/ql/test/experimental/dataflow/basic/globalStep.expected b/python/ql/test/experimental/dataflow/basic/globalStep.expected
index 38611776824..9f228998b9c 100644
--- a/python/ql/test/experimental/dataflow/basic/globalStep.expected
+++ b/python/ql/test/experimental/dataflow/basic/globalStep.expected
@@ -1,4 +1,3 @@
-| file://:0:0:0:0 | [summary] read: argument 0.List element in builtins.reversed | file://:0:0:0:0 | [summary] to write: return (return).List element in builtins.reversed |
| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:1:5:1:17 | GSSA Variable obfuscated_id |
| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:1:5:1:17 | GSSA Variable obfuscated_id |
| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:7:5:7:17 | ControlFlowNode for obfuscated_id |
diff --git a/python/ql/test/experimental/dataflow/basic/local.expected b/python/ql/test/experimental/dataflow/basic/local.expected
index 33636a8e81d..74263f31a52 100644
--- a/python/ql/test/experimental/dataflow/basic/local.expected
+++ b/python/ql/test/experimental/dataflow/basic/local.expected
@@ -1,8 +1,3 @@
-| file://:0:0:0:0 | [summary] read: argument 0.List element in builtins.reversed | file://:0:0:0:0 | [summary] read: argument 0.List element in builtins.reversed |
-| file://:0:0:0:0 | [summary] read: argument 0.List element in builtins.reversed | file://:0:0:0:0 | [summary] to write: return (return).List element in builtins.reversed |
-| file://:0:0:0:0 | [summary] to write: return (return) in builtins.reversed | file://:0:0:0:0 | [summary] to write: return (return) in builtins.reversed |
-| file://:0:0:0:0 | [summary] to write: return (return).List element in builtins.reversed | file://:0:0:0:0 | [summary] to write: return (return).List element in builtins.reversed |
-| file://:0:0:0:0 | parameter 0 of builtins.reversed | file://:0:0:0:0 | parameter 0 of builtins.reversed |
| test.py:0:0:0:0 | GSSA Variable __name__ | test.py:0:0:0:0 | GSSA Variable __name__ |
| test.py:0:0:0:0 | GSSA Variable __package__ | test.py:0:0:0:0 | GSSA Variable __package__ |
| test.py:0:0:0:0 | GSSA Variable b | test.py:0:0:0:0 | GSSA Variable b |
@@ -55,5 +50,6 @@
| test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() | test.py:7:1:7:1 | GSSA Variable b |
| test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() |
| test.py:7:5:7:20 | GSSA Variable a | test.py:7:5:7:20 | GSSA Variable a |
+| test.py:7:5:7:20 | [pre] ControlFlowNode for obfuscated_id() | test.py:7:5:7:20 | [pre] ControlFlowNode for obfuscated_id() |
| test.py:7:19:7:19 | ControlFlowNode for a | test.py:7:19:7:19 | ControlFlowNode for a |
-| test.py:7:19:7:19 | [post arg] ControlFlowNode for a | test.py:7:19:7:19 | [post arg] ControlFlowNode for a |
+| test.py:7:19:7:19 | [post] ControlFlowNode for a | test.py:7:19:7:19 | [post] ControlFlowNode for a |
diff --git a/python/ql/test/experimental/dataflow/basic/localStep.expected b/python/ql/test/experimental/dataflow/basic/localStep.expected
index 900e4ac3900..e147bb9f4fc 100644
--- a/python/ql/test/experimental/dataflow/basic/localStep.expected
+++ b/python/ql/test/experimental/dataflow/basic/localStep.expected
@@ -1,4 +1,3 @@
-| file://:0:0:0:0 | [summary] read: argument 0.List element in builtins.reversed | file://:0:0:0:0 | [summary] to write: return (return).List element in builtins.reversed |
| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:1:5:1:17 | GSSA Variable obfuscated_id |
| test.py:1:5:1:17 | GSSA Variable obfuscated_id | test.py:7:5:7:17 | ControlFlowNode for obfuscated_id |
| test.py:1:19:1:19 | ControlFlowNode for x | test.py:1:19:1:19 | SSA variable x |
diff --git a/python/ql/test/experimental/dataflow/basic/sinks.expected b/python/ql/test/experimental/dataflow/basic/sinks.expected
index 3aa3245c465..97d7e313dac 100644
--- a/python/ql/test/experimental/dataflow/basic/sinks.expected
+++ b/python/ql/test/experimental/dataflow/basic/sinks.expected
@@ -1,7 +1,3 @@
-| file://:0:0:0:0 | [summary] read: argument 0.List element in builtins.reversed |
-| file://:0:0:0:0 | [summary] to write: return (return) in builtins.reversed |
-| file://:0:0:0:0 | [summary] to write: return (return).List element in builtins.reversed |
-| file://:0:0:0:0 | parameter 0 of builtins.reversed |
| test.py:0:0:0:0 | GSSA Variable __name__ |
| test.py:0:0:0:0 | GSSA Variable __package__ |
| test.py:0:0:0:0 | GSSA Variable b |
@@ -26,5 +22,6 @@
| test.py:7:5:7:17 | ControlFlowNode for obfuscated_id |
| test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() |
| test.py:7:5:7:20 | GSSA Variable a |
+| test.py:7:5:7:20 | [pre] ControlFlowNode for obfuscated_id() |
| test.py:7:19:7:19 | ControlFlowNode for a |
-| test.py:7:19:7:19 | [post arg] ControlFlowNode for a |
+| test.py:7:19:7:19 | [post] ControlFlowNode for a |
diff --git a/python/ql/test/experimental/dataflow/basic/sources.expected b/python/ql/test/experimental/dataflow/basic/sources.expected
index 3aa3245c465..97d7e313dac 100644
--- a/python/ql/test/experimental/dataflow/basic/sources.expected
+++ b/python/ql/test/experimental/dataflow/basic/sources.expected
@@ -1,7 +1,3 @@
-| file://:0:0:0:0 | [summary] read: argument 0.List element in builtins.reversed |
-| file://:0:0:0:0 | [summary] to write: return (return) in builtins.reversed |
-| file://:0:0:0:0 | [summary] to write: return (return).List element in builtins.reversed |
-| file://:0:0:0:0 | parameter 0 of builtins.reversed |
| test.py:0:0:0:0 | GSSA Variable __name__ |
| test.py:0:0:0:0 | GSSA Variable __package__ |
| test.py:0:0:0:0 | GSSA Variable b |
@@ -26,5 +22,6 @@
| test.py:7:5:7:17 | ControlFlowNode for obfuscated_id |
| test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() |
| test.py:7:5:7:20 | GSSA Variable a |
+| test.py:7:5:7:20 | [pre] ControlFlowNode for obfuscated_id() |
| test.py:7:19:7:19 | ControlFlowNode for a |
-| test.py:7:19:7:19 | [post arg] ControlFlowNode for a |
+| test.py:7:19:7:19 | [post] ControlFlowNode for a |
diff --git a/python/ql/test/experimental/dataflow/callgraph_crosstalk/Arguments.expected b/python/ql/test/experimental/dataflow/callgraph_crosstalk/Arguments.expected
new file mode 100644
index 00000000000..99c2d987d16
--- /dev/null
+++ b/python/ql/test/experimental/dataflow/callgraph_crosstalk/Arguments.expected
@@ -0,0 +1,13 @@
+| test.py:32:8:32:23 | CrosstalkTestX() | test.py:9:5:9:23 | Function __init__ | test.py:32:8:32:23 | [pre] ControlFlowNode for CrosstalkTestX() | self |
+| test.py:33:8:33:23 | CrosstalkTestY() | test.py:21:5:21:23 | Function __init__ | test.py:33:8:33:23 | [pre] ControlFlowNode for CrosstalkTestY() | self |
+| test.py:43:1:43:8 | func() | test.py:13:5:13:26 | Function setx | test.py:36:12:36:15 | ControlFlowNode for objx | self |
+| test.py:43:1:43:8 | func() | test.py:13:5:13:26 | Function setx | test.py:43:6:43:7 | ControlFlowNode for IntegerLiteral | position 0 |
+| test.py:43:1:43:8 | func() | test.py:25:5:25:26 | Function sety | test.py:38:12:38:15 | ControlFlowNode for objy | self |
+| test.py:43:1:43:8 | func() | test.py:25:5:25:26 | Function sety | test.py:43:6:43:7 | ControlFlowNode for IntegerLiteral | position 0 |
+| test.py:51:1:51:8 | func() | test.py:16:5:16:30 | Function setvalue | test.py:47:12:47:15 | ControlFlowNode for objx | self |
+| test.py:51:1:51:8 | func() | test.py:16:5:16:30 | Function setvalue | test.py:51:6:51:7 | ControlFlowNode for IntegerLiteral | position 0 |
+| test.py:51:1:51:8 | func() | test.py:28:5:28:30 | Function setvalue | test.py:49:12:49:15 | ControlFlowNode for objy | self |
+| test.py:51:1:51:8 | func() | test.py:28:5:28:30 | Function setvalue | test.py:51:6:51:7 | ControlFlowNode for IntegerLiteral | position 0 |
+| test.py:70:1:70:8 | func() | test.py:58:5:58:33 | Function foo | test.py:63:12:63:12 | ControlFlowNode for a | self |
+| test.py:70:1:70:8 | func() | test.py:58:5:58:33 | Function foo | test.py:70:6:70:7 | ControlFlowNode for IntegerLiteral | position 0 |
+| test.py:70:1:70:8 | func() | test.py:58:5:58:33 | Function foo | test.py:70:6:70:7 | ControlFlowNode for IntegerLiteral | self |
diff --git a/python/ql/test/experimental/dataflow/callgraph_crosstalk/Arguments.ql b/python/ql/test/experimental/dataflow/callgraph_crosstalk/Arguments.ql
new file mode 100644
index 00000000000..28699b1f9a7
--- /dev/null
+++ b/python/ql/test/experimental/dataflow/callgraph_crosstalk/Arguments.ql
@@ -0,0 +1,9 @@
+private import python
+private import semmle.python.dataflow.new.internal.DataFlowPrivate
+private import semmle.python.dataflow.new.internal.DataFlowPublic
+
+from DataFlowCall call, DataFlowCallable callable, ArgumentNode arg, ArgumentPosition apos
+where
+ callable = call.getCallable() and
+ arg = call.getArgument(apos)
+select call, callable, arg, apos
diff --git a/python/ql/test/experimental/dataflow/callgraph_crosstalk/dataflow-consistency.expected b/python/ql/test/experimental/dataflow/callgraph_crosstalk/dataflow-consistency.expected
new file mode 100644
index 00000000000..9fedaf9f663
--- /dev/null
+++ b/python/ql/test/experimental/dataflow/callgraph_crosstalk/dataflow-consistency.expected
@@ -0,0 +1,19 @@
+uniqueEnclosingCallable
+uniqueType
+uniqueNodeLocation
+missingLocation
+uniqueNodeToString
+missingToString
+parameterCallable
+localFlowIsLocal
+compatibleTypesReflexive
+unreachableNodeCCtx
+localCallNodes
+postIsNotPre
+postHasUniquePre
+uniquePostUpdate
+postIsInSameCallable
+reverseRead
+argHasPostUpdate
+postWithInFlow
+viableImplInCallContextTooLarge
diff --git a/python/ql/test/experimental/dataflow/callgraph_crosstalk/dataflow-consistency.ql b/python/ql/test/experimental/dataflow/callgraph_crosstalk/dataflow-consistency.ql
new file mode 100644
index 00000000000..6743fa10d27
--- /dev/null
+++ b/python/ql/test/experimental/dataflow/callgraph_crosstalk/dataflow-consistency.ql
@@ -0,0 +1 @@
+import semmle.python.dataflow.new.internal.DataFlowImplConsistency::Consistency
diff --git a/python/ql/test/experimental/dataflow/callgraph_crosstalk/options b/python/ql/test/experimental/dataflow/callgraph_crosstalk/options
new file mode 100644
index 00000000000..efa237f03c4
--- /dev/null
+++ b/python/ql/test/experimental/dataflow/callgraph_crosstalk/options
@@ -0,0 +1 @@
+semmle-extractor-options: --max-import-depth=0
diff --git a/python/ql/test/experimental/dataflow/callgraph_crosstalk/test.py b/python/ql/test/experimental/dataflow/callgraph_crosstalk/test.py
new file mode 100644
index 00000000000..1b676d30b89
--- /dev/null
+++ b/python/ql/test/experimental/dataflow/callgraph_crosstalk/test.py
@@ -0,0 +1,70 @@
+import random
+cond = random.randint(0,1) == 1
+
+# ------------------------------------------------------------------------------
+# Calling different bound-methods based on conditional
+# ------------------------------------------------------------------------------
+
+class CrosstalkTestX:
+ def __init__(self):
+ self.x = None
+ self.y = None
+
+ def setx(self, value):
+ self.x = value
+
+ def setvalue(self, value):
+ self.x = value
+
+
+class CrosstalkTestY:
+ def __init__(self):
+ self.x = None
+ self.y = None
+
+ def sety(self ,value):
+ self.y = value
+
+ def setvalue(self, value):
+ self.y = value
+
+
+objx = CrosstalkTestX()
+objy = CrosstalkTestY()
+
+if cond:
+ func = objx.setx
+else:
+ func = objy.sety
+
+# What we're testing for is whether both objects are passed as self to both methods,
+# which is wrong.
+
+func(42)
+
+
+if cond:
+ func = objx.setvalue
+else:
+ func = objy.setvalue
+
+func(43)
+
+# ------------------------------------------------------------------------------
+# Calling methods in different ways
+# ------------------------------------------------------------------------------
+
+class A(object):
+ def foo(self, arg="Default"):
+ print("A.foo", self, arg)
+
+a = A()
+if cond:
+ func = a.foo # `44` is passed as arg
+else:
+ func = A.foo # `44` is passed as self
+
+# What we're testing for is whether a single call ends up having both `a` and `44` is
+# passed as self to `A.foo`, which is wrong.
+
+func(44)
diff --git a/python/ql/test/experimental/dataflow/calls/DataFlowCallTest.ql b/python/ql/test/experimental/dataflow/calls/DataFlowCallTest.ql
index 4536e8f40ad..b71e92db337 100644
--- a/python/ql/test/experimental/dataflow/calls/DataFlowCallTest.ql
+++ b/python/ql/test/experimental/dataflow/calls/DataFlowCallTest.ql
@@ -1,6 +1,6 @@
import python
import semmle.python.dataflow.new.DataFlow
-import semmle.python.dataflow.new.internal.DataFlowPrivate
+import semmle.python.dataflow.new.internal.DataFlowDispatch as DataFlowDispatch
import TestUtilities.InlineExpectationsTest
private import semmle.python.dataflow.new.internal.PrintNode
@@ -8,26 +8,28 @@ class DataFlowCallTest extends InlineExpectationsTest {
DataFlowCallTest() { this = "DataFlowCallTest" }
override string getARelevantTag() {
- result in ["call", "qlclass"]
+ result in ["call", "callType"]
or
- result = "arg_" + [0 .. 10]
+ result = "arg[" + any(DataFlowDispatch::ArgumentPosition pos).toString() + "]"
}
override predicate hasActualResult(Location location, string element, string tag, string value) {
exists(location.getFile().getRelativePath()) and
- exists(DataFlowCall call |
+ exists(DataFlowDispatch::DataFlowCall call |
location = call.getLocation() and
element = call.toString()
|
value = prettyExpr(call.getNode().getNode()) and
tag = "call"
or
- value = call.getAQlClass() and
- tag = "qlclass"
+ value = call.(DataFlowDispatch::NormalCall).getCallType().toString() and
+ tag = "callType"
or
- exists(int n, DataFlow::Node arg | arg = call.getArg(n) |
+ exists(DataFlowDispatch::ArgumentPosition pos, DataFlow::Node arg |
+ arg = call.getArgument(pos)
+ |
value = prettyNodeForInlineTest(arg) and
- tag = "arg_" + n
+ tag = "arg[" + pos + "]"
)
)
}
diff --git a/python/ql/test/experimental/dataflow/calls/test.py b/python/ql/test/experimental/dataflow/calls/test.py
index 144a6a79fe1..3332d2caa9e 100644
--- a/python/ql/test/experimental/dataflow/calls/test.py
+++ b/python/ql/test/experimental/dataflow/calls/test.py
@@ -14,24 +14,60 @@ class MyClass(object):
def my_method(self, arg):
pass
+ @staticmethod
+ def staticmethod(arg):
+ pass
+
+ @classmethod
+ def classmethod(cls, arg):
+ pass
+
def __getitem__(self, key):
pass
+func(0) # $ call=func(..) arg[position 0]=0 callType=CallTypePlainFunction
-func("foo") # $ call=func(..) qlclass=FunctionCall arg_0="foo"
-x = MyClass(1) # $ call=MyClass(..) qlclass=ClassCall arg_0=[pre]MyClass(..) arg_1=1
-x.my_method(2) # $ call=x.my_method(..) qlclass=MethodCall arg_0=x arg_1=2
+x = MyClass(1) # $ call=MyClass(..) arg[self]=[pre]MyClass(..) arg[position 0]=1 callType=CallTypeClass
+
+x.my_method(2) # $ call=x.my_method(..) arg[self]=x arg[position 0]=2 callType=CallTypeNormalMethod
mm = x.my_method
-mm(2) # $ call=mm(..) qlclass=MethodCall arg_1=2 MISSING: arg_0=x
-x[3] # $ call=x[3] qlclass=SpecialCall arg_0=x arg_1=3
+mm(2) # $ call=mm(..) arg[self]=x arg[position 0]=2 callType=CallTypeNormalMethod
+MyClass.my_method(x, 2) # $ call=MyClass.my_method(..) arg[position 0]=2 arg[self]=x callType=CallTypeMethodAsPlainFunction
+
+x.staticmethod(3) # $ call=x.staticmethod(..) arg[position 0]=3 callType=CallTypeStaticMethod
+MyClass.staticmethod(3) # $ call=MyClass.staticmethod(..) arg[position 0]=3 callType=CallTypeStaticMethod
+
+x.classmethod(4) # $ call=x.classmethod(..) arg[position 0]=4 callType=CallTypeClassMethod
+MyClass.classmethod(4) # $ call=MyClass.classmethod(..) arg[position 0]=4 arg[self]=MyClass callType=CallTypeClassMethod
+
+x[5] # $ MISSING: call=x[5] arg[self]=x arg[position 0]=5
+
+
+class Subclass(MyClass):
+ pass
+
+y = Subclass(1) # $ call=Subclass(..) arg[self]=[pre]Subclass(..) arg[position 0]=1 callType=CallTypeClass
+
+y.my_method(2) # $ call=y.my_method(..) arg[self]=y arg[position 0]=2 callType=CallTypeNormalMethod
+mm = y.my_method
+mm(2) # $ call=mm(..) arg[self]=y arg[position 0]=2 callType=CallTypeNormalMethod
+Subclass.my_method(y, 2) # $ call=Subclass.my_method(..) arg[self]=y arg[position 0]=2 callType=CallTypeMethodAsPlainFunction
+
+y.staticmethod(3) # $ call=y.staticmethod(..) arg[position 0]=3 callType=CallTypeStaticMethod
+Subclass.staticmethod(3) # $ call=Subclass.staticmethod(..) arg[position 0]=3 callType=CallTypeStaticMethod
+
+y.classmethod(4) # $ call=y.classmethod(..) arg[position 0]=4 callType=CallTypeClassMethod
+Subclass.classmethod(4) # $ call=Subclass.classmethod(..) arg[self]=Subclass arg[position 0]=4 callType=CallTypeClassMethod
+
+y[5] # $ MISSING: call=y[5] arg[self]=y arg[position 0]=5
try:
- # These are included to show how we handle absent things with points-to where
- # `mypkg.foo` is a `missing module variable`, but `mypkg.subpkg.bar` is compeltely
- # ignored.
+ # These are included to show whether we have a DataFlowCall for things we can't
+ # resolve. Both are interesting since with points-to we used to have a DataFlowCall
+ # for _one_ but not the other
import mypkg
- mypkg.foo(42) # $ call=mypkg.foo(..) qlclass=NormalCall
- mypkg.subpkg.bar(43) # $ call=mypkg.subpkg.bar(..) qlclass=LibraryCall arg_0=43
+ mypkg.foo(42)
+ mypkg.subpkg.bar(43)
except:
pass
diff --git a/python/ql/test/experimental/dataflow/consistency/modeling-consistency.expected b/python/ql/test/experimental/dataflow/consistency/modeling-consistency.expected
index 2cbecf5ba66..e69de29bb2d 100644
--- a/python/ql/test/experimental/dataflow/consistency/modeling-consistency.expected
+++ b/python/ql/test/experimental/dataflow/consistency/modeling-consistency.expected
@@ -1 +0,0 @@
-| test.py:239:27:239:27 | Parameter | There is no `ParameterNode` associated with this parameter. |
diff --git a/python/ql/test/experimental/dataflow/coverage/argumentPassing.py b/python/ql/test/experimental/dataflow/coverage/argumentPassing.py
index 92b466cd25a..7129cded015 100644
--- a/python/ql/test/experimental/dataflow/coverage/argumentPassing.py
+++ b/python/ql/test/experimental/dataflow/coverage/argumentPassing.py
@@ -64,12 +64,12 @@ def argument_passing(
@expects(7)
def test_argument_passing1():
- argument_passing(arg1, *(arg2, arg3, arg4), e=arg5, **{"f": arg6, "g": arg7}) #$ arg1 arg7 func=argument_passing MISSING: arg2 arg3="arg3 arg4 arg5 arg6
+ argument_passing(arg1, *(arg2, arg3, arg4), e=arg5, **{"f": arg6, "g": arg7}) #$ arg1 arg5 MISSING: arg2 arg3 arg4 arg6 arg7
@expects(7)
def test_argument_passing2():
- argument_passing(arg1, arg2, arg3, f=arg6) #$ arg1 arg2 arg3
+ argument_passing(arg1, arg2, arg3, f=arg6) #$ arg1 arg2 arg3 arg6
def with_pos_only(a, /, b):
@@ -94,8 +94,8 @@ def with_multiple_kw_args(a, b, c):
def test_multiple_kw_args():
with_multiple_kw_args(b=arg2, c=arg3, a=arg1) #$ arg1 arg2 arg3
with_multiple_kw_args(arg1, *(arg2,), arg3) #$ arg1 MISSING: arg2 arg3
- with_multiple_kw_args(arg1, **{"c": arg3}, b=arg2) #$ arg1 arg2 arg3 func=with_multiple_kw_args MISSING:
- with_multiple_kw_args(**{"b": arg2}, **{"c": arg3}, **{"a": arg1}) #$ arg1 arg2 arg3 func=with_multiple_kw_args
+ with_multiple_kw_args(arg1, **{"c": arg3}, b=arg2) #$ arg1 arg2 MISSING: arg3
+ with_multiple_kw_args(**{"b": arg2}, **{"c": arg3}, **{"a": arg1}) #$ MISSING: arg1 arg2 arg3
def with_default_arguments(a=arg1, b=arg2, c=arg3): #$ arg1 arg2 arg3 func=with_default_arguments
@@ -109,7 +109,7 @@ def test_default_arguments():
with_default_arguments()
with_default_arguments(arg1) #$ arg1
with_default_arguments(b=arg2) #$ arg2
- with_default_arguments(**{"c": arg3}) #$ arg3 func=with_default_arguments
+ with_default_arguments(**{"c": arg3}) #$ MISSING: arg3
# Nested constructor pattern
@@ -135,7 +135,7 @@ def grab_baz(baz):
@expects(4)
def test_grab():
- grab_foo_bar_baz(baz=arg3, bar=arg2, foo=arg1) #$ arg1 arg2 arg3 func=grab_bar_baz func=grab_baz
+ grab_foo_bar_baz(baz=arg3, bar=arg2, foo=arg1) #$ arg1 MISSING: arg2 func=grab_bar_baz arg3 func=grab_baz
# All combinations
@@ -158,7 +158,7 @@ def test_pos_star():
if len(a) > 0:
SINK1(a[0])
- with_star(arg1) #$ arg1 func=test_pos_star.with_star
+ with_star(arg1) #$ MISSING: arg1 func=test_pos_star.with_star
def test_pos_kw():
@@ -186,4 +186,4 @@ def test_kw_doublestar():
def with_doublestar(**a):
SINK1(a["a"])
- with_doublestar(a=arg1) #$ arg1 func=test_kw_doublestar.with_doublestar
+ with_doublestar(a=arg1) #$ MISSING: arg1 func=test_kw_doublestar.with_doublestar
diff --git a/python/ql/test/experimental/dataflow/coverage/classes.py b/python/ql/test/experimental/dataflow/coverage/classes.py
index 0636eafcd7b..af8d19354e7 100644
--- a/python/ql/test/experimental/dataflow/coverage/classes.py
+++ b/python/ql/test/experimental/dataflow/coverage/classes.py
@@ -560,9 +560,9 @@ class With_getitem:
def test_getitem():
- with_getitem = With_getitem() #$ arg1="SSA variable with_getitem" func=With_getitem.__getitem__
+ with_getitem = With_getitem() #$ MISSING: arg1="SSA variable with_getitem" func=With_getitem.__getitem__
arg2 = 0
- with_getitem[arg2] #$ arg2 func=With_getitem.__getitem__
+ with_getitem[arg2] #$ MISSING: arg2 func=With_getitem.__getitem__
# object.__setitem__(self, key, value)
@@ -575,10 +575,10 @@ class With_setitem:
def test_setitem():
- with_setitem = With_setitem() #$ arg1="SSA variable with_setitem" func=With_setitem.__setitem__
+ with_setitem = With_setitem() #$ MISSING: arg1="SSA variable with_setitem" func=With_setitem.__setitem__
arg2 = 0
arg3 = ""
- with_setitem[arg2] = arg3 #$ arg2 arg3 func=With_setitem.__setitem__
+ with_setitem[arg2] = arg3 #$ MISSING: arg2 arg3 func=With_setitem.__setitem__
# object.__delitem__(self, key)
@@ -590,9 +590,9 @@ class With_delitem:
def test_delitem():
- with_delitem = With_delitem() #$ arg1="SSA variable with_delitem" func=With_delitem.__delitem__
+ with_delitem = With_delitem() #$ MISSING: arg1="SSA variable with_delitem" func=With_delitem.__delitem__
arg2 = 0
- del with_delitem[arg2] #$ arg2 func=With_delitem.__delitem__
+ del with_delitem[arg2] #$ MISSING: arg2 func=With_delitem.__delitem__
# object.__missing__(self, key)
@@ -662,9 +662,9 @@ class With_add:
def test_add():
- with_add = With_add() #$ arg1="SSA variable with_add" func=With_add.__add__
+ with_add = With_add() #$ MISSING: arg1="SSA variable with_add" func=With_add.__add__
arg2 = with_add
- with_add + arg2 #$ arg2 func=With_add.__add__
+ with_add + arg2 #$ MISSING: arg2 func=With_add.__add__
# object.__sub__(self, other)
@@ -677,9 +677,9 @@ class With_sub:
def test_sub():
- with_sub = With_sub() #$ arg1="SSA variable with_sub" func=With_sub.__sub__
+ with_sub = With_sub() #$ MISSING: arg1="SSA variable with_sub" func=With_sub.__sub__
arg2 = with_sub
- with_sub - arg2 #$ arg2 func=With_sub.__sub__
+ with_sub - arg2 #$ MISSING: arg2 func=With_sub.__sub__
# object.__mul__(self, other)
@@ -692,9 +692,9 @@ class With_mul:
def test_mul():
- with_mul = With_mul() #$ arg1="SSA variable with_mul" func=With_mul.__mul__
+ with_mul = With_mul() #$ MISSING: arg1="SSA variable with_mul" func=With_mul.__mul__
arg2 = with_mul
- with_mul * arg2 #$ arg2 func=With_mul.__mul__
+ with_mul * arg2 #$ MISSING: arg2 func=With_mul.__mul__
# object.__matmul__(self, other)
@@ -707,9 +707,9 @@ class With_matmul:
def test_matmul():
- with_matmul = With_matmul() #$ arg1="SSA variable with_matmul" func=With_matmul.__matmul__
+ with_matmul = With_matmul() #$ MISSING: arg1="SSA variable with_matmul" func=With_matmul.__matmul__
arg2 = with_matmul
- with_matmul @ arg2 #$ arg2 func=With_matmul.__matmul__
+ with_matmul @ arg2 #$ MISSING: arg2 func=With_matmul.__matmul__
# object.__truediv__(self, other)
@@ -722,9 +722,9 @@ class With_truediv:
def test_truediv():
- with_truediv = With_truediv() #$ arg1="SSA variable with_truediv" func=With_truediv.__truediv__
+ with_truediv = With_truediv() #$ MISSING: arg1="SSA variable with_truediv" func=With_truediv.__truediv__
arg2 = with_truediv
- with_truediv / arg2 #$ arg2 func=With_truediv.__truediv__
+ with_truediv / arg2 #$ MISSING: arg2 func=With_truediv.__truediv__
# object.__floordiv__(self, other)
@@ -737,9 +737,9 @@ class With_floordiv:
def test_floordiv():
- with_floordiv = With_floordiv() #$ arg1="SSA variable with_floordiv" func=With_floordiv.__floordiv__
+ with_floordiv = With_floordiv() #$ MISSING: arg1="SSA variable with_floordiv" func=With_floordiv.__floordiv__
arg2 = with_floordiv
- with_floordiv // arg2 #$ arg2 func=With_floordiv.__floordiv__
+ with_floordiv // arg2 #$ MISSING: arg2 func=With_floordiv.__floordiv__
# object.__mod__(self, other)
@@ -752,9 +752,9 @@ class With_mod:
def test_mod():
- with_mod = With_mod() #$ arg1="SSA variable with_mod" func=With_mod.__mod__
+ with_mod = With_mod() #$ MISSING: arg1="SSA variable with_mod" func=With_mod.__mod__
arg2 = with_mod
- with_mod % arg2 #$ arg2 func=With_mod.__mod__
+ with_mod % arg2 #$ MISSING: arg2 func=With_mod.__mod__
# object.__divmod__(self, other)
@@ -788,9 +788,9 @@ def test_pow():
def test_pow_op():
- with_pow = With_pow() #$ arg1="SSA variable with_pow" func=With_pow.__pow__
+ with_pow = With_pow() #$ MISSING: arg1="SSA variable with_pow" func=With_pow.__pow__
arg2 = with_pow
- with_pow ** arg2 #$ arg2 func=With_pow.__pow__
+ with_pow ** arg2 #$ MISSING: arg2 func=With_pow.__pow__
# object.__lshift__(self, other)
@@ -803,9 +803,9 @@ class With_lshift:
def test_lshift():
- with_lshift = With_lshift() #$ arg1="SSA variable with_lshift" func=With_lshift.__lshift__
+ with_lshift = With_lshift() #$ MISSING: arg1="SSA variable with_lshift" func=With_lshift.__lshift__
arg2 = with_lshift
- with_lshift << arg2 #$ arg2 func=With_lshift.__lshift__
+ with_lshift << arg2 #$ MISSING: arg2 func=With_lshift.__lshift__
# object.__rshift__(self, other)
@@ -818,9 +818,9 @@ class With_rshift:
def test_rshift():
- with_rshift = With_rshift() #$ arg1="SSA variable with_rshift" func=With_rshift.__rshift__
+ with_rshift = With_rshift() #$ MISSING: arg1="SSA variable with_rshift" func=With_rshift.__rshift__
arg2 = with_rshift
- with_rshift >> arg2 #$ arg2 func=With_rshift.__rshift__
+ with_rshift >> arg2 #$ MISSING: arg2 func=With_rshift.__rshift__
# object.__and__(self, other)
@@ -833,9 +833,9 @@ class With_and:
def test_and():
- with_and = With_and() #$ arg1="SSA variable with_and" func=With_and.__and__
+ with_and = With_and() #$ MISSING: arg1="SSA variable with_and" func=With_and.__and__
arg2 = with_and
- with_and & arg2 #$ arg2 func=With_and.__and__
+ with_and & arg2 #$ MISSING: arg2 func=With_and.__and__
# object.__xor__(self, other)
@@ -848,9 +848,9 @@ class With_xor:
def test_xor():
- with_xor = With_xor() #$ arg1="SSA variable with_xor" func=With_xor.__xor__
+ with_xor = With_xor() #$ MISSING: arg1="SSA variable with_xor" func=With_xor.__xor__
arg2 = with_xor
- with_xor ^ arg2 #$ arg2 func=With_xor.__xor__
+ with_xor ^ arg2 #$ MISSING: arg2 func=With_xor.__xor__
# object.__or__(self, other)
@@ -863,9 +863,9 @@ class With_or:
def test_or():
- with_or = With_or() #$ arg1="SSA variable with_or" func=With_or.__or__
+ with_or = With_or() #$ MISSING: arg1="SSA variable with_or" func=With_or.__or__
arg2 = with_or
- with_or | arg2 #$ arg2 func=With_or.__or__
+ with_or | arg2 #$ MISSING: arg2 func=With_or.__or__
# object.__radd__(self, other)
diff --git a/python/ql/test/experimental/dataflow/coverage/datamodel.py b/python/ql/test/experimental/dataflow/coverage/datamodel.py
index 364dbb299d7..e30ffea164a 100644
--- a/python/ql/test/experimental/dataflow/coverage/datamodel.py
+++ b/python/ql/test/experimental/dataflow/coverage/datamodel.py
@@ -124,6 +124,40 @@ def test_staticmethod_call():
C.staticmethod(arg1, arg2) # $ func=C.staticmethod arg1 arg2
+# subclass
+class SC(C):
+ pass
+sc = SC()
+
+@expects(6)
+def test_subclass_method_call():
+ func_obj = sc.method.__func__
+
+ sc.method(arg1, arg2) # $ func=C.method arg1 arg2
+ SC.method(sc, arg1, arg2) # $ func=C.method arg1 arg2
+ func_obj(sc, arg1, arg2) # $ MISSING: func=C.method arg1 arg2
+
+
+@expects(6)
+def test_subclass_classmethod_call():
+ c_func_obj = SC.classmethod.__func__
+
+ sc.classmethod(arg1, arg2) # $ func=C.classmethod arg1 arg2
+ SC.classmethod(arg1, arg2) # $ func=C.classmethod arg1 arg2
+ c_func_obj(SC, arg1, arg2) # $ MISSING: func=C.classmethod arg1 arg2
+
+
+@expects(5)
+def test_subclass_staticmethod_call():
+ try:
+ SC.staticmethod.__func__
+ except AttributeError:
+ print("OK")
+
+ sc.staticmethod(arg1, arg2) # $ func=C.staticmethod arg1 arg2
+ SC.staticmethod(arg1, arg2) # $ func=C.staticmethod arg1 arg2
+
+
# Generator functions
# A function or method which uses the yield statement (see section The yield statement) is called a generator function. Such a function, when called, always returns an iterator object which can be used to execute the body of the function: calling the iterator’s iterator.__next__() method will cause the function to execute until it provides a value using the yield statement. When the function executes a return statement or falls off the end, a StopIteration exception is raised and the iterator will have reached the end of the set of values to be returned.
def gen(x, count):
@@ -198,5 +232,16 @@ class Customized:
customized = Customized()
SINK(Customized.a) #$ MISSING:flow="SOURCE, l:-8 -> customized.a"
SINK_F(Customized.b)
-SINK(customized.a) #$ MISSING:flow="SOURCE, l:-10 -> customized.a"
+SINK(customized.a) #$ flow="SOURCE, l:-10 -> customized.a"
SINK(customized.b) #$ flow="SOURCE, l:-7 -> customized.b"
+
+
+class Test2:
+
+ def __init__(self, arg):
+ self.x = SOURCE
+ self.y = arg
+
+t = Test2(SOURCE)
+SINK(t.x) # $ flow="SOURCE, l:-4 -> t.x"
+SINK(t.y) # $ flow="SOURCE, l:-2 -> t.y"
diff --git a/python/ql/test/experimental/dataflow/coverage/localFlow.expected b/python/ql/test/experimental/dataflow/coverage/localFlow.expected
index 30b25979df3..7ca11daba51 100644
--- a/python/ql/test/experimental/dataflow/coverage/localFlow.expected
+++ b/python/ql/test/experimental/dataflow/coverage/localFlow.expected
@@ -8,10 +8,4 @@
| test.py:187:1:187:53 | GSSA Variable SINK | test.py:189:5:189:8 | ControlFlowNode for SINK |
| test.py:187:1:187:53 | GSSA Variable SOURCE | test.py:188:25:188:30 | ControlFlowNode for SOURCE |
| test.py:188:5:188:5 | SSA variable x | test.py:189:10:189:10 | ControlFlowNode for x |
-| test.py:188:9:188:68 | ControlFlowNode for .0 | test.py:188:9:188:68 | SSA variable .0 |
| test.py:188:9:188:68 | ControlFlowNode for ListComp | test.py:188:5:188:5 | SSA variable x |
-| test.py:188:9:188:68 | SSA variable .0 | test.py:188:9:188:68 | ControlFlowNode for .0 |
-| test.py:188:16:188:16 | SSA variable v | test.py:188:45:188:45 | ControlFlowNode for v |
-| test.py:188:40:188:40 | SSA variable u | test.py:188:56:188:56 | ControlFlowNode for u |
-| test.py:188:51:188:51 | SSA variable z | test.py:188:67:188:67 | ControlFlowNode for z |
-| test.py:188:62:188:62 | SSA variable y | test.py:188:10:188:10 | ControlFlowNode for y |
diff --git a/python/ql/test/experimental/dataflow/coverage/localFlow.ql b/python/ql/test/experimental/dataflow/coverage/localFlow.ql
index 6522dcf5c68..8fcbf74c64f 100644
--- a/python/ql/test/experimental/dataflow/coverage/localFlow.ql
+++ b/python/ql/test/experimental/dataflow/coverage/localFlow.ql
@@ -4,5 +4,5 @@ import semmle.python.dataflow.new.DataFlow
from DataFlow::Node nodeFrom, DataFlow::Node nodeTo
where
DataFlow::localFlowStep(nodeFrom, nodeTo) and
- nodeFrom.getEnclosingCallable().getName().matches("%\\_with\\_local\\_flow")
+ nodeFrom.getEnclosingCallable().getQualifiedName().matches("%\\_with\\_local\\_flow")
select nodeFrom, nodeTo
diff --git a/python/ql/test/experimental/dataflow/coverage/test.py b/python/ql/test/experimental/dataflow/coverage/test.py
index b09b2aa9984..0e06a828700 100644
--- a/python/ql/test/experimental/dataflow/coverage/test.py
+++ b/python/ql/test/experimental/dataflow/coverage/test.py
@@ -393,7 +393,7 @@ def test_call_unpack_iterable():
def test_call_unpack_mapping():
- SINK(second(NONSOURCE, **{"b": SOURCE})) #$ flow="SOURCE -> second(..)"
+ SINK(second(NONSOURCE, **{"b": SOURCE})) #$ MISSING: flow="SOURCE -> second(..)"
def f_extra_pos(a, *b):
@@ -401,7 +401,7 @@ def f_extra_pos(a, *b):
def test_call_extra_pos():
- SINK(f_extra_pos(NONSOURCE, SOURCE)) #$ flow="SOURCE -> f_extra_pos(..)"
+ SINK(f_extra_pos(NONSOURCE, SOURCE)) #$ MISSING: flow="SOURCE -> f_extra_pos(..)"
def f_extra_keyword(a, **b):
@@ -409,7 +409,7 @@ def f_extra_keyword(a, **b):
def test_call_extra_keyword():
- SINK(f_extra_keyword(NONSOURCE, b=SOURCE)) #$ flow="SOURCE -> f_extra_keyword(..)"
+ SINK(f_extra_keyword(NONSOURCE, b=SOURCE)) #$ MISSING: flow="SOURCE -> f_extra_keyword(..)"
# return the name of the first extra keyword argument
@@ -509,17 +509,17 @@ def test_lambda_unpack_mapping():
def second(a, b):
return b
- SINK(second(NONSOURCE, **{"b": SOURCE})) #$ flow="SOURCE -> second(..)"
+ SINK(second(NONSOURCE, **{"b": SOURCE})) #$ MISSING: flow="SOURCE -> second(..)"
def test_lambda_extra_pos():
f_extra_pos = lambda a, *b: b[0]
- SINK(f_extra_pos(NONSOURCE, SOURCE)) #$ flow="SOURCE -> f_extra_pos(..)"
+ SINK(f_extra_pos(NONSOURCE, SOURCE)) #$ MISSING: flow="SOURCE -> f_extra_pos(..)"
def test_lambda_extra_keyword():
f_extra_keyword = lambda a, **b: b["b"]
- SINK(f_extra_keyword(NONSOURCE, b=SOURCE)) #$ flow="SOURCE -> f_extra_keyword(..)"
+ SINK(f_extra_keyword(NONSOURCE, b=SOURCE)) #$ MISSING: flow="SOURCE -> f_extra_keyword(..)"
# call the function with our source as the name of the keyword argument
@@ -689,7 +689,7 @@ def test_iterable_star_unpacking_in_for_2():
def iterate_star_args(first, second, *args):
for arg in args:
- SINK(arg) #$ flow="SOURCE, l:+5 -> arg" flow="SOURCE, l:+6 -> arg"
+ SINK(arg) #$ MISSING: flow="SOURCE, l:+5 -> arg" flow="SOURCE, l:+6 -> arg"
# FP reported here: https://github.com/github/codeql-python-team/issues/49
@expects(2)
@@ -697,9 +697,16 @@ def test_overflow_iteration():
s = SOURCE
iterate_star_args(NONSOURCE, NONSOURCE, SOURCE, s)
+@expects(6)
def test_deep_callgraph():
# port of python/ql/test/library-tests/taint/general/deep.py
+ # based on the fact that `test_deep_callgraph_defined_in_module` works the problem
+ # seems to be that we're defining these functions inside another function and that
+ # the flow of these function definitions DOESN'T flow into the body of the `f`
+ # functions (they DO flow into the body of `test_deep_callgraph`, otherwise the
+ # `f1` call wouldn't work).
+
def f1(arg):
return arg
@@ -720,8 +727,51 @@ def test_deep_callgraph():
x = f6(SOURCE)
SINK(x) #$ MISSING:flow="SOURCE, l:-1 -> x"
+ x = f5(SOURCE)
+ SINK(x) #$ MISSING:flow="SOURCE, l:-1 -> x"
+ x = f4(SOURCE)
+ SINK(x) #$ MISSING:flow="SOURCE, l:-1 -> x"
+ x = f3(SOURCE)
+ SINK(x) #$ MISSING:flow="SOURCE, l:-1 -> x"
+ x = f2(SOURCE)
+ SINK(x) #$ MISSING:flow="SOURCE, l:-1 -> x"
+ x = f1(SOURCE)
+ SINK(x) #$ flow="SOURCE, l:-1 -> x"
+def wat_f1(arg):
+ return arg
+
+def wat_f2(arg):
+ return wat_f1(arg)
+
+def wat_f3(arg):
+ return wat_f2(arg)
+
+def wat_f4(arg):
+ return wat_f3(arg)
+
+def wat_f5(arg):
+ return wat_f4(arg)
+
+def wat_f6(arg):
+ return wat_f5(arg)
+
+@expects(6)
+def test_deep_callgraph_defined_in_module():
+ x = wat_f6(SOURCE)
+ SINK(x) #$ flow="SOURCE, l:-1 -> x"
+ x = wat_f5(SOURCE)
+ SINK(x) #$ flow="SOURCE, l:-1 -> x"
+ x = wat_f4(SOURCE)
+ SINK(x) #$ flow="SOURCE, l:-1 -> x"
+ x = wat_f3(SOURCE)
+ SINK(x) #$ flow="SOURCE, l:-1 -> x"
+ x = wat_f2(SOURCE)
+ SINK(x) #$ flow="SOURCE, l:-1 -> x"
+ x = wat_f1(SOURCE)
+ SINK(x) #$ flow="SOURCE, l:-1 -> x"
+
@expects(2)
def test_dynamic_tuple_creation_1():
tup = tuple()
diff --git a/python/ql/test/experimental/dataflow/fieldflow/test.py b/python/ql/test/experimental/dataflow/fieldflow/test.py
index 100ab6aac70..68bb71bd278 100644
--- a/python/ql/test/experimental/dataflow/fieldflow/test.py
+++ b/python/ql/test/experimental/dataflow/fieldflow/test.py
@@ -84,10 +84,10 @@ def test_indirect_assign_bound_method():
sf = myobj.setFoo
sf(SOURCE)
- SINK(myobj.foo) # $ MISSING: flow="SOURCE, l:-1 -> myobj.foo"
+ SINK(myobj.foo) # $ flow="SOURCE, l:-1 -> myobj.foo"
sf(NONSOURCE)
- SINK_F(myobj.foo)
+ SINK_F(myobj.foo) # $ SPURIOUS: flow="SOURCE, l:-4 -> myobj.foo"
@expects(3) # $ unresolved_call=expects(..) unresolved_call=expects(..)(..)
@@ -167,6 +167,17 @@ def fields_with_local_flow(x):
def test_fields():
SINK(fields_with_local_flow(SOURCE)) # $ flow="SOURCE -> fields_with_local_flow(..)"
+
+def call_with_source(func):
+ func(SOURCE)
+
+
+def test_bound_method_passed_as_arg():
+ myobj = MyObj(NONSOURCE)
+ call_with_source(myobj.setFoo)
+ SINK(myobj.foo) # $ MISSING: flow="SOURCE, l:-5 -> foo.x"
+
+
# ------------------------------------------------------------------------------
# Nested Object
# ------------------------------------------------------------------------------
@@ -244,6 +255,9 @@ class CrosstalkTestX:
def setvalue(self, value):
self.x = value
+ def do_nothing(self, value):
+ pass
+
class CrosstalkTestY:
def __init__(self):
@@ -295,10 +309,10 @@ def test_potential_crosstalk_different_name(cond=True):
func(SOURCE)
- SINK(objx.x) # $ MISSING: flow="SOURCE, l:-2 -> objx.x"
+ SINK(objx.x) # $ flow="SOURCE, l:-2 -> objx.x"
SINK_F(objx.y)
SINK_F(objy.x)
- SINK(objy.y, not_present_at_runtime=True) # $ MISSING: flow="SOURCE, l:-5 -> objy.y"
+ SINK(objy.y, not_present_at_runtime=True) # $ flow="SOURCE, l:-5 -> objy.y"
@expects(8) # $ unresolved_call=expects(..) unresolved_call=expects(..)(..)
@@ -318,10 +332,10 @@ def test_potential_crosstalk_same_name(cond=True):
func(SOURCE)
- SINK(objx.x) # $ MISSING: flow="SOURCE, l:-2 -> objx.x"
+ SINK(objx.x) # $ flow="SOURCE, l:-2 -> objx.x"
SINK_F(objx.y)
SINK_F(objy.x)
- SINK(objy.y, not_present_at_runtime=True) # $ MISSING: flow="SOURCE, l:-5 -> objy.y"
+ SINK(objy.y, not_present_at_runtime=True) # $ flow="SOURCE, l:-5 -> objy.y"
@expects(10) # $ unresolved_call=expects(..) unresolved_call=expects(..)(..)
@@ -350,6 +364,27 @@ def test_potential_crosstalk_same_name_object_reference(cond=True):
SINK(obj.y, not_present_at_runtime=True) # $ flow="SOURCE, l:-8 -> obj.y"
+@expects(4) # $ unresolved_call=expects(..) unresolved_call=expects(..)(..)
+def test_potential_crosstalk_same_class(cond=True):
+ objx1 = CrosstalkTestX()
+ SINK_F(objx1.x)
+
+ objx2 = CrosstalkTestX()
+ SINK_F(objx2.x)
+
+ if cond:
+ func = objx1.setvalue
+ else:
+ func = objx2.do_nothing
+
+ # We want to ensure that objx2.x does not end up getting tainted, since that would
+ # be cross-talk between the self arguments are their functions.
+ func(SOURCE)
+
+ SINK(objx1.x) # $ flow="SOURCE, l:-2 -> objx1.x"
+ SINK_F(objx2.x)
+
+
# ------------------------------------------------------------------------------
# Global scope
# ------------------------------------------------------------------------------
@@ -400,7 +435,7 @@ SINK(obj2.foo) # $ flow="SOURCE, l:-1 -> obj2.foo"
# apparently these if statements below makes a difference :O
# but one is not enough
-cond = os.urandom(1)[0] > 128
+cond = os.urandom(1)[0] > 128 # $ unresolved_call=os.urandom(..)
if cond:
pass
diff --git a/python/ql/test/experimental/dataflow/typetracking/test.py b/python/ql/test/experimental/dataflow/typetracking/test.py
index 5277450c151..8de0a3ded92 100644
--- a/python/ql/test/experimental/dataflow/typetracking/test.py
+++ b/python/ql/test/experimental/dataflow/typetracking/test.py
@@ -91,7 +91,7 @@ def unrelated_func():
return "foo"
def use_funcs_with_decorators():
- x = get_tracked2() # $ MISSING: tracked
+ x = get_tracked2() # $ tracked
y = unrelated_func()
# ------------------------------------------------------------------------------
@@ -117,11 +117,11 @@ class Foo(object):
def meth1(self):
do_stuff(self)
- def meth2(self): # $ MISSING: tracked_self
- do_stuff(self) # $ MISSING: tracked_self
+ def meth2(self): # $ tracked_self
+ do_stuff(self) # $ tracked_self
- def meth3(self): # $ MISSING: tracked_self
- do_stuff(self) # $ MISSING: tracked_self
+ def meth3(self): # $ tracked_self
+ do_stuff(self) # $ tracked_self
class Bar(Foo):
diff --git a/python/ql/test/experimental/library-tests/CallGraph-implicit-init/InlineCallGraphTest.expected b/python/ql/test/experimental/library-tests/CallGraph-implicit-init/InlineCallGraphTest.expected
index c847f9a8aa2..d5ed453c51a 100644
--- a/python/ql/test/experimental/library-tests/CallGraph-implicit-init/InlineCallGraphTest.expected
+++ b/python/ql/test/experimental/library-tests/CallGraph-implicit-init/InlineCallGraphTest.expected
@@ -1,5 +1,4 @@
failures
debug_callableNotUnique
pointsTo_found_typeTracker_notFound
-| example.py:22:1:22:16 | ControlFlowNode for explicit_afunc() | explicit_afunc |
typeTracker_found_pointsTo_notFound
diff --git a/python/ql/test/experimental/library-tests/CallGraph-implicit-init/example.py b/python/ql/test/experimental/library-tests/CallGraph-implicit-init/example.py
index 75ad8a9db11..f14669948bc 100644
--- a/python/ql/test/experimental/library-tests/CallGraph-implicit-init/example.py
+++ b/python/ql/test/experimental/library-tests/CallGraph-implicit-init/example.py
@@ -19,4 +19,4 @@ from foo_explicit.bar.a import explicit_afunc
afunc() # $ MISSING: pt,tt=afunc
-explicit_afunc() # $ pt=explicit_afunc MISSING: tt=explicit_afunc
+explicit_afunc() # $ pt,tt="foo_explicit/bar/a.py:explicit_afunc"
diff --git a/python/ql/test/experimental/library-tests/CallGraph-imports/InlineCallGraphTest.expected b/python/ql/test/experimental/library-tests/CallGraph-imports/InlineCallGraphTest.expected
new file mode 100644
index 00000000000..2836800d300
--- /dev/null
+++ b/python/ql/test/experimental/library-tests/CallGraph-imports/InlineCallGraphTest.expected
@@ -0,0 +1,5 @@
+failures
+debug_callableNotUnique
+pointsTo_found_typeTracker_notFound
+| pkg/use.py:10:5:10:10 | ControlFlowNode for func() | func |
+typeTracker_found_pointsTo_notFound
diff --git a/python/ql/test/experimental/library-tests/CallGraph-imports/InlineCallGraphTest.qlref b/python/ql/test/experimental/library-tests/CallGraph-imports/InlineCallGraphTest.qlref
new file mode 100644
index 00000000000..25117a4582b
--- /dev/null
+++ b/python/ql/test/experimental/library-tests/CallGraph-imports/InlineCallGraphTest.qlref
@@ -0,0 +1 @@
+../CallGraph/InlineCallGraphTest.ql
diff --git a/python/ql/test/experimental/library-tests/CallGraph-imports/README.md b/python/ql/test/experimental/library-tests/CallGraph-imports/README.md
new file mode 100644
index 00000000000..4063a2851f9
--- /dev/null
+++ b/python/ql/test/experimental/library-tests/CallGraph-imports/README.md
@@ -0,0 +1,5 @@
+A testcase observed in real code, where mixing `from .this import that` with `from .other import *` (in that order) causes import resolution to not work properly.
+
+This needs to be in a separate folder, since using relative imports requires a valid top-level package. We emulate real extractor behavior using `-R` extractor option.
+
+From this directory, you can run the code with `python -m pkg.use`.
diff --git a/python/ql/test/experimental/library-tests/CallGraph-imports/options b/python/ql/test/experimental/library-tests/CallGraph-imports/options
new file mode 100644
index 00000000000..1bbc489d153
--- /dev/null
+++ b/python/ql/test/experimental/library-tests/CallGraph-imports/options
@@ -0,0 +1 @@
+semmle-extractor-options: --max-import-depth=1 -R ./pkg/
diff --git a/python/ql/test/experimental/library-tests/CallGraph-imports/pkg/__init__.py b/python/ql/test/experimental/library-tests/CallGraph-imports/pkg/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/python/ql/test/experimental/library-tests/CallGraph-imports/pkg/alias_only_direct.py b/python/ql/test/experimental/library-tests/CallGraph-imports/pkg/alias_only_direct.py
new file mode 100644
index 00000000000..dc687a4344e
--- /dev/null
+++ b/python/ql/test/experimental/library-tests/CallGraph-imports/pkg/alias_only_direct.py
@@ -0,0 +1 @@
+from .func_def import func
diff --git a/python/ql/test/experimental/library-tests/CallGraph-imports/pkg/alias_problem.py b/python/ql/test/experimental/library-tests/CallGraph-imports/pkg/alias_problem.py
new file mode 100644
index 00000000000..dd9f6a8f215
--- /dev/null
+++ b/python/ql/test/experimental/library-tests/CallGraph-imports/pkg/alias_problem.py
@@ -0,0 +1,2 @@
+from .func_def import func
+from .other import *
diff --git a/python/ql/test/experimental/library-tests/CallGraph-imports/pkg/alias_problem_fixed.py b/python/ql/test/experimental/library-tests/CallGraph-imports/pkg/alias_problem_fixed.py
new file mode 100644
index 00000000000..5c57e676d44
--- /dev/null
+++ b/python/ql/test/experimental/library-tests/CallGraph-imports/pkg/alias_problem_fixed.py
@@ -0,0 +1,3 @@
+# this ordering makes the problem go away
+from .other import *
+from .func_def import func
diff --git a/python/ql/test/experimental/library-tests/CallGraph-imports/pkg/alias_star.py b/python/ql/test/experimental/library-tests/CallGraph-imports/pkg/alias_star.py
new file mode 100644
index 00000000000..c34c7f1df7e
--- /dev/null
+++ b/python/ql/test/experimental/library-tests/CallGraph-imports/pkg/alias_star.py
@@ -0,0 +1,2 @@
+from .func_def import *
+from .other import *
diff --git a/python/ql/test/experimental/library-tests/CallGraph-imports/pkg/func_def.py b/python/ql/test/experimental/library-tests/CallGraph-imports/pkg/func_def.py
new file mode 100644
index 00000000000..531031c0358
--- /dev/null
+++ b/python/ql/test/experimental/library-tests/CallGraph-imports/pkg/func_def.py
@@ -0,0 +1,2 @@
+def func():
+ print("func")
diff --git a/python/ql/test/experimental/library-tests/CallGraph-imports/pkg/other.py b/python/ql/test/experimental/library-tests/CallGraph-imports/pkg/other.py
new file mode 100644
index 00000000000..c652349c911
--- /dev/null
+++ b/python/ql/test/experimental/library-tests/CallGraph-imports/pkg/other.py
@@ -0,0 +1,2 @@
+def something():
+ pass
diff --git a/python/ql/test/experimental/library-tests/CallGraph-imports/pkg/use.py b/python/ql/test/experimental/library-tests/CallGraph-imports/pkg/use.py
new file mode 100644
index 00000000000..861359b5d91
--- /dev/null
+++ b/python/ql/test/experimental/library-tests/CallGraph-imports/pkg/use.py
@@ -0,0 +1,33 @@
+def test_direct_import():
+ from .func_def import func
+ func() # $ pt,tt="pkg/func_def.py:func"
+
+test_direct_import() # $ pt,tt=test_direct_import
+
+
+def test_alias_problem():
+ from .alias_problem import func
+ func() # $ pt="pkg/func_def.py:func" MISSING: tt="pkg/func_def.py:func"
+
+test_alias_problem() # $ pt,tt=test_alias_problem
+
+
+def test_alias_problem_fixed():
+ from .alias_problem_fixed import func
+ func() # $ pt,tt="pkg/func_def.py:func"
+
+test_alias_problem_fixed() # $ pt,tt=test_alias_problem_fixed
+
+
+def test_alias_star():
+ from .alias_star import func
+ func() # $ pt,tt="pkg/func_def.py:func"
+
+test_alias_star() # $ pt,tt=test_alias_star
+
+
+def test_alias_only_direct():
+ from .alias_only_direct import func
+ func() # $ pt,tt="pkg/func_def.py:func"
+
+test_alias_only_direct() # $ pt,tt=test_alias_only_direct
diff --git a/python/ql/test/experimental/library-tests/CallGraph/InlineCallGraphTest.expected b/python/ql/test/experimental/library-tests/CallGraph/InlineCallGraphTest.expected
index 02d08ac4c81..10031cecde5 100644
--- a/python/ql/test/experimental/library-tests/CallGraph/InlineCallGraphTest.expected
+++ b/python/ql/test/experimental/library-tests/CallGraph/InlineCallGraphTest.expected
@@ -1,22 +1,39 @@
failures
debug_callableNotUnique
-| code/class_advanced.py:18:5:18:18 | Function arg | Qualified function name 'B.arg' is not unique. Please fix. |
-| code/class_advanced.py:23:5:23:25 | Function arg | Qualified function name 'B.arg' is not unique. Please fix. |
+| code/class_properties.py:7:5:7:18 | Function arg | Qualified function name 'Prop.arg' is not unique within its file. Please fix. |
+| code/class_properties.py:12:5:12:25 | Function arg | Qualified function name 'Prop.arg' is not unique within its file. Please fix. |
+| code/class_properties.py:17:5:17:18 | Function arg | Qualified function name 'Prop.arg' is not unique within its file. Please fix. |
pointsTo_found_typeTracker_notFound
-| code/class_simple.py:24:1:24:15 | ControlFlowNode for Attribute() | A.some_method |
-| code/class_simple.py:25:1:25:21 | ControlFlowNode for Attribute() | A.some_staticmethod |
-| code/class_simple.py:26:1:26:20 | ControlFlowNode for Attribute() | A.some_classmethod |
-| code/class_simple.py:28:1:28:21 | ControlFlowNode for Attribute() | A.some_staticmethod |
-| code/class_simple.py:29:1:29:20 | ControlFlowNode for Attribute() | A.some_classmethod |
-| code/runtime_decision.py:18:1:18:6 | ControlFlowNode for func() | rd_bar |
-| code/runtime_decision.py:18:1:18:6 | ControlFlowNode for func() | rd_foo |
-| code/runtime_decision.py:26:1:26:7 | ControlFlowNode for func2() | rd_bar |
-| code/runtime_decision.py:26:1:26:7 | ControlFlowNode for func2() | rd_foo |
-| code/simple.py:15:1:15:5 | ControlFlowNode for foo() | foo |
-| code/simple.py:16:1:16:14 | ControlFlowNode for indirect_foo() | foo |
-| code/simple.py:17:1:17:5 | ControlFlowNode for bar() | bar |
-| code/simple.py:18:1:18:5 | ControlFlowNode for lam() | lambda[simple.py:12:7] |
-| code/underscore_prefix_func_name.py:18:5:18:19 | ControlFlowNode for some_function() | some_function |
-| code/underscore_prefix_func_name.py:21:5:21:19 | ControlFlowNode for some_function() | some_function |
-| code/underscore_prefix_func_name.py:24:1:24:21 | ControlFlowNode for _works_since_called() | _works_since_called |
+| code/class_attr_assign.py:10:9:10:27 | ControlFlowNode for Attribute() | my_func |
+| code/class_attr_assign.py:11:9:11:25 | ControlFlowNode for Attribute() | my_func |
+| code/class_attr_assign.py:26:9:26:25 | ControlFlowNode for Attribute() | DummyObject.method |
+| code/class_super.py:50:1:50:6 | ControlFlowNode for Attribute() | outside_def |
+| code/conditional_in_argument.py:18:5:18:11 | ControlFlowNode for Attribute() | X.bar |
+| code/func_defined_outside_class.py:21:1:21:11 | ControlFlowNode for Attribute() | A.foo |
+| code/func_defined_outside_class.py:22:1:22:15 | ControlFlowNode for Attribute() | outside |
+| code/func_defined_outside_class.py:24:1:24:14 | ControlFlowNode for Attribute() | outside_sm |
+| code/func_defined_outside_class.py:25:1:25:14 | ControlFlowNode for Attribute() | outside_cm |
+| code/func_defined_outside_class.py:38:11:38:21 | ControlFlowNode for _gen() | B._gen |
+| code/func_defined_outside_class.py:39:11:39:21 | ControlFlowNode for _gen() | B._gen |
+| code/func_defined_outside_class.py:42:1:42:7 | ControlFlowNode for Attribute() | B._gen.func |
+| code/func_defined_outside_class.py:43:1:43:7 | ControlFlowNode for Attribute() | B._gen.func |
+| code/type_tracking_limitation.py:8:1:8:3 | ControlFlowNode for x() | my_func |
typeTracker_found_pointsTo_notFound
+| code/callable_as_argument.py:29:5:29:12 | ControlFlowNode for Attribute() | test_class.InsideTestFunc.sm |
+| code/class_more_mro2.py:18:9:18:21 | ControlFlowNode for Attribute() | A.foo |
+| code/class_more_mro2.py:21:1:21:8 | ControlFlowNode for Attribute() | A.foo |
+| code/class_more_mro.py:24:9:24:21 | ControlFlowNode for Attribute() | A.foo |
+| code/class_more_mro.py:34:1:34:16 | ControlFlowNode for Attribute() | A.foo |
+| code/class_super.py:43:9:43:21 | ControlFlowNode for Attribute() | A.bar |
+| code/class_super.py:44:9:44:27 | ControlFlowNode for Attribute() | A.bar |
+| code/class_super.py:63:1:63:18 | ControlFlowNode for Attribute() | A.foo |
+| code/class_super.py:78:9:78:28 | ControlFlowNode for Attribute() | A.foo |
+| code/class_super.py:81:1:81:12 | ControlFlowNode for Attribute() | C.foo_on_A |
+| code/class_super.py:92:9:92:21 | ControlFlowNode for Attribute() | X.foo |
+| code/class_super.py:97:9:97:21 | ControlFlowNode for Attribute() | X.foo |
+| code/class_super.py:97:9:97:21 | ControlFlowNode for Attribute() | Y.foo |
+| code/class_super.py:101:1:101:7 | ControlFlowNode for Attribute() | Z.foo |
+| code/class_super.py:108:1:108:8 | ControlFlowNode for Attribute() | Z.foo |
+| code/def_in_function.py:22:5:22:11 | ControlFlowNode for Attribute() | test.A.foo |
+| code/nested_class.py:83:9:83:16 | ControlFlowNode for Attribute() | X.class_def_in_func.Y.meth |
+| code/underscore_prefix_func_name.py:14:5:14:19 | ControlFlowNode for some_function() | some_function |
diff --git a/python/ql/test/experimental/library-tests/CallGraph/InlineCallGraphTest.ql b/python/ql/test/experimental/library-tests/CallGraph/InlineCallGraphTest.ql
index cba9bc6e1d8..d00d0ae1301 100644
--- a/python/ql/test/experimental/library-tests/CallGraph/InlineCallGraphTest.ql
+++ b/python/ql/test/experimental/library-tests/CallGraph/InlineCallGraphTest.ql
@@ -4,6 +4,8 @@ private import semmle.python.dataflow.new.internal.DataFlowDispatch as TT
/** Holds when `call` is resolved to `callable` using points-to based call-graph. */
predicate pointsToCallEdge(CallNode call, Function callable) {
+ exists(call.getLocation().getFile().getRelativePath()) and
+ exists(callable.getLocation().getFile().getRelativePath()) and
exists(PythonFunctionValue funcValue |
funcValue.getScope() = callable and
call = funcValue.getACall()
@@ -12,6 +14,8 @@ predicate pointsToCallEdge(CallNode call, Function callable) {
/** Holds when `call` is resolved to `callable` using type-tracking based call-graph. */
predicate typeTrackerCallEdge(CallNode call, Function callable) {
+ exists(call.getLocation().getFile().getRelativePath()) and
+ exists(callable.getLocation().getFile().getRelativePath()) and
exists(TT::DataFlowCallable dfCallable, TT::DataFlowCall dfCall |
dfCallable.getScope() = callable and
dfCall.getNode() = call and
@@ -19,6 +23,16 @@ predicate typeTrackerCallEdge(CallNode call, Function callable) {
)
}
+/** Holds if the call edge is from a class call. */
+predicate typeTrackerClassCall(CallNode call, Function callable) {
+ exists(call.getLocation().getFile().getRelativePath()) and
+ exists(callable.getLocation().getFile().getRelativePath()) and
+ exists(TT::NormalCall cc |
+ cc = TT::TNormalCall(call, _, any(TT::TCallType t | t instanceof TT::CallTypeClass)) and
+ TT::TFunction(callable) = TT::viableCallable(cc)
+ )
+}
+
class CallGraphTest extends InlineExpectationsTest {
CallGraphTest() { this = "CallGraphTest" }
@@ -35,7 +49,20 @@ class CallGraphTest extends InlineExpectationsTest {
|
location = call.getLocation() and
element = call.toString() and
- value = betterQualName(target)
+ if call.getLocation().getFile() = target.getLocation().getFile()
+ then value = betterQualName(target)
+ else
+ exists(string fixedRelativePath |
+ fixedRelativePath =
+ target
+ .getLocation()
+ .getFile()
+ .getRelativePath()
+ .regexpCapture(".*/CallGraph[^/]*/(.*)", 1)
+ |
+ // the value needs to be enclosed in quotes to allow special characters
+ value = "\"" + fixedRelativePath + ":" + betterQualName(target) + "\""
+ )
)
}
}
@@ -53,9 +80,15 @@ string betterQualName(Function func) {
}
query predicate debug_callableNotUnique(Function callable, string message) {
- exists(Function f | f != callable and f.getQualifiedName() = callable.getQualifiedName()) and
+ exists(callable.getLocation().getFile().getRelativePath()) and
+ exists(Function f |
+ f != callable and
+ f.getQualifiedName() = callable.getQualifiedName() and
+ f.getLocation().getFile() = callable.getLocation().getFile()
+ ) and
message =
- "Qualified function name '" + callable.getQualifiedName() + "' is not unique. Please fix."
+ "Qualified function name '" + callable.getQualifiedName() +
+ "' is not unique within its file. Please fix."
}
query predicate pointsTo_found_typeTracker_notFound(CallNode call, string qualname) {
@@ -70,6 +103,10 @@ query predicate typeTracker_found_pointsTo_notFound(CallNode call, string qualna
exists(Function target |
not pointsToCallEdge(call, target) and
typeTrackerCallEdge(call, target) and
- qualname = betterQualName(target)
+ qualname = betterQualName(target) and
+ // We filter out result differences for points-to and type-tracking for class calls,
+ // since otherwise it gives too much noise (these are just handled differently
+ // between the two).
+ not typeTrackerClassCall(call, target)
)
}
diff --git a/python/ql/test/experimental/library-tests/CallGraph/code/aliased_import.py b/python/ql/test/experimental/library-tests/CallGraph/code/aliased_import.py
new file mode 100644
index 00000000000..ac0c7184881
--- /dev/null
+++ b/python/ql/test/experimental/library-tests/CallGraph/code/aliased_import.py
@@ -0,0 +1 @@
+from .simple import foo
diff --git a/python/ql/test/experimental/library-tests/CallGraph/code/bound_method_arg.py b/python/ql/test/experimental/library-tests/CallGraph/code/bound_method_arg.py
new file mode 100644
index 00000000000..a59510ec681
--- /dev/null
+++ b/python/ql/test/experimental/library-tests/CallGraph/code/bound_method_arg.py
@@ -0,0 +1,16 @@
+class Foo(object):
+ def meth(self, arg):
+ print("Foo.meth", arg)
+
+ @classmethod
+ def cm(cls, arg):
+ print("Foo.cm", arg)
+
+
+def call_func(func):
+ func(42) # $ pt,tt=Foo.meth pt,tt=Foo.cm
+
+
+foo = Foo()
+call_func(foo.meth) # $ pt,tt=call_func
+call_func(Foo.cm) # $ pt,tt=call_func
diff --git a/python/ql/test/experimental/library-tests/CallGraph/code/callable_as_argument.py b/python/ql/test/experimental/library-tests/CallGraph/code/callable_as_argument.py
new file mode 100644
index 00000000000..ab8e0cf6ba9
--- /dev/null
+++ b/python/ql/test/experimental/library-tests/CallGraph/code/callable_as_argument.py
@@ -0,0 +1,55 @@
+# ==============================================================================
+# function
+# ==============================================================================
+
+def call_func(f):
+ f() # $ pt,tt=my_func pt,tt=test_func.inside_test_func
+
+
+def my_func():
+ print("my_func")
+
+call_func(my_func) # $ pt,tt=call_func
+
+
+def test_func():
+ def inside_test_func():
+ print("inside_test_func")
+
+ call_func(inside_test_func) # $ pt,tt=call_func
+
+test_func() # $ pt,tt=test_func
+
+
+# ==============================================================================
+# class
+# ==============================================================================
+
+def class_func(cls):
+ cls.sm() # $ pt,tt=MyClass.sm tt=test_class.InsideTestFunc.sm
+ cls(42) # $ tt=MyClass.__init__ tt=test_class.InsideTestFunc.__init__
+
+
+class MyClass(object):
+ def __init__(self, arg):
+ print(self, arg)
+
+ @staticmethod
+ def sm():
+ print("MyClass.staticmethod")
+
+class_func(MyClass) # $ pt,tt=class_func
+
+
+def test_class():
+ class InsideTestFunc(object):
+ def __init__(self, arg):
+ print(self, arg)
+
+ @staticmethod
+ def sm():
+ print("InsideTestFunc.staticmethod")
+
+ class_func(InsideTestFunc) # $ pt,tt=class_func
+
+test_class() # $ pt,tt=test_class
diff --git a/python/ql/test/experimental/library-tests/CallGraph/code/class_advanced.py b/python/ql/test/experimental/library-tests/CallGraph/code/class_advanced.py
deleted file mode 100644
index 94667621f34..00000000000
--- a/python/ql/test/experimental/library-tests/CallGraph/code/class_advanced.py
+++ /dev/null
@@ -1,40 +0,0 @@
-class B(object):
-
- def __init__(self, arg):
- print('B.__init__', arg)
- self._arg = arg
-
- def __str__(self):
- print('B.__str__')
- return 'B (arg={})'.format(self.arg)
-
- def __add__(self, other):
- print('B.__add__')
- if isinstance(other, B):
- return B(self.arg + other.arg)
- return B(self.arg + other)
-
- @property
- def arg(self):
- print('B.arg getter')
- return self._arg
-
- @arg.setter
- def arg(self, value):
- print('B.arg setter')
- self._arg = value
-
-
-b1 = B(1)
-b2 = B(2)
-b3 = b1 + b2
-
-print('value printing:', str(b1))
-print('value printing:', str(b2))
-print('value printing:', str(b3))
-
-b3.arg = 42
-b4 = b3 + 100
-
-# this calls `str(b4)` inside
-print('value printing:', b4)
diff --git a/python/ql/test/experimental/library-tests/CallGraph/code/class_attr_assign.py b/python/ql/test/experimental/library-tests/CallGraph/code/class_attr_assign.py
new file mode 100644
index 00000000000..605375925f7
--- /dev/null
+++ b/python/ql/test/experimental/library-tests/CallGraph/code/class_attr_assign.py
@@ -0,0 +1,30 @@
+def my_func():
+ print("my_func")
+
+class Foo(object):
+ def __init__(self, func):
+ self.indirect_ref = func
+ self.direct_ref = my_func
+
+ def later(self):
+ self.indirect_ref() # $ pt=my_func MISSING: tt=my_func
+ self.direct_ref() # $ pt=my_func MISSING: tt=my_func
+
+foo = Foo(my_func) # $ tt=Foo.__init__
+foo.later() # $ pt,tt=Foo.later
+
+
+class DummyObject(object):
+ def method(self):
+ print("DummyObject.method")
+
+class Bar(object):
+ def __init__(self):
+ self.obj = DummyObject()
+
+ def later(self):
+ self.obj.method() # $ pt=DummyObject.method MISSING: tt=DummyObject.method
+
+
+bar = Bar(my_func) # $ tt=Bar.__init__
+bar.later() # $ pt,tt=Bar.later
diff --git a/python/ql/test/experimental/library-tests/CallGraph/code/class_construction.py b/python/ql/test/experimental/library-tests/CallGraph/code/class_construction.py
new file mode 100644
index 00000000000..ce348fee15f
--- /dev/null
+++ b/python/ql/test/experimental/library-tests/CallGraph/code/class_construction.py
@@ -0,0 +1,66 @@
+class X(object):
+ def __init__(self, arg):
+ print("X.__init__", arg)
+
+X(42) # $ tt=X.__init__
+print()
+
+
+class Y(X):
+ def __init__(self, arg):
+ print("Y.__init__", arg)
+ super().__init__(-arg) # $ pt,tt=X.__init__
+
+Y(43) # $ tt=Y.__init__
+print()
+
+# ---
+
+class WithNew(object):
+ def __new__(cls, arg):
+ print("WithNew.__new__", arg)
+ inst = super().__new__(cls)
+ assert isinstance(inst, cls)
+ inst.some_method() # $ MISSING: pt,tt=WithNew.some_method
+ return inst
+
+ def __init__(self, arg):
+ print("WithNew.__init__", arg)
+
+ def some_method(self):
+ print("WithNew.__init__")
+
+WithNew(44) # $ tt=WithNew.__new__ tt=WithNew.__init__
+print()
+
+
+class ExtraCallToInit(object):
+ def __new__(cls, arg):
+ print("ExtraCallToInit.__new__", arg)
+ inst = super().__new__(cls)
+ assert isinstance(inst, cls)
+ # you're not supposed to do this, since it will cause the __init__ method will be run twice.
+ inst.__init__(1001) # $ MISSING: pt,tt=ExtraCallToInit.__init__
+ return inst
+
+ def __init__(self, arg):
+ print("ExtraCallToInit.__init__", arg, self)
+
+ExtraCallToInit(1000) # $ tt=ExtraCallToInit.__new__ tt=ExtraCallToInit.__init__
+print()
+
+
+class InitNotCalled(object):
+ """as described in https://docs.python.org/3/reference/datamodel.html#object.__new__
+ __init__ will only be called when the returned object from __new__ is an instance of
+ the `cls` parameter...
+ """
+ def __new__(cls, arg):
+ print("InitNotCalled.__new__", arg)
+ return False
+
+ def __init__(self, arg):
+ print("InitNotCalled.__init__", arg)
+
+InitNotCalled(2000) # $ tt=InitNotCalled.__new__ SPURIOUS: tt=InitNotCalled.__init__
+print()
diff --git a/python/ql/test/experimental/library-tests/CallGraph/code/class_decorator.py b/python/ql/test/experimental/library-tests/CallGraph/code/class_decorator.py
new file mode 100644
index 00000000000..910e24d2519
--- /dev/null
+++ b/python/ql/test/experimental/library-tests/CallGraph/code/class_decorator.py
@@ -0,0 +1,34 @@
+# decorated class
+
+def my_class_decorator(cls):
+ print("dummy decorator")
+ return cls
+
+@my_class_decorator # $ pt=my_class_decorator tt=my_class_decorator
+class A(object):
+ def foo(self):
+ pass
+
+a = A()
+a.foo() # $ pt,tt=A.foo
+
+class B(A):
+ def bar(self):
+ self.foo() # $ pt,tt=A.foo
+
+
+# decorated class, unknown decorator
+
+from some_unknown_module import unknown_class_decorator
+
+@unknown_class_decorator
+class X(object):
+ def foo(self):
+ pass
+
+x = X()
+x.foo() # $ pt,tt=X.foo
+
+class Y(X):
+ def bar(self):
+ self.foo() # $ pt,tt=X.foo
diff --git a/python/ql/test/experimental/library-tests/CallGraph/code/class_more_mro.py b/python/ql/test/experimental/library-tests/CallGraph/code/class_more_mro.py
new file mode 100644
index 00000000000..d8eae002cb4
--- /dev/null
+++ b/python/ql/test/experimental/library-tests/CallGraph/code/class_more_mro.py
@@ -0,0 +1,35 @@
+class Base(object):
+ def foo(self):
+ print("Base.foo")
+
+
+class A(Base):
+ def foo(self):
+ print("A.foo")
+ super().foo() # $ pt,tt=Base.foo
+
+class ASub(A):
+ pass
+
+class B(Base):
+ def foo(self):
+ print("B.foo")
+ # NOTE: If this missing result is fixed, please update the QLDoc for
+ # `getNextClassInMro` as well
+ super().foo() # $ pt,tt=Base.foo MISSING: pt,tt=A.foo
+
+class BSub(B):
+ def bar(self):
+ print("BSub.bar")
+ super().foo() # $ pt,tt=B.foo SPURIOUS: tt=A.foo
+
+bs = BSub()
+bs.foo() # $ pt,tt=B.foo
+bs.bar() # $ pt,tt=BSub.bar
+
+print("! Indirect")
+class Indirect(BSub, ASub):
+ pass
+
+Indirect().foo() # $ pt,tt=B.foo SPURIOUS: tt=A.foo
+Indirect().bar() # $ pt,tt=BSub.bar
diff --git a/python/ql/test/experimental/library-tests/CallGraph/code/class_more_mro2.py b/python/ql/test/experimental/library-tests/CallGraph/code/class_more_mro2.py
new file mode 100644
index 00000000000..6a64f905412
--- /dev/null
+++ b/python/ql/test/experimental/library-tests/CallGraph/code/class_more_mro2.py
@@ -0,0 +1,22 @@
+class A(object):
+ def foo(self):
+ print("A.foo")
+
+class B(A):
+ pass
+
+b = B()
+b.foo() # $ pt,tt=A.foo
+
+class C(A):
+ def foo(self):
+ print("C.foo")
+
+class BC(B, C):
+ def bar(self):
+ print("BC.bar")
+ super().foo() # $ pt,tt=C.foo SPURIOUS: tt=A.foo
+
+bc = BC()
+bc.foo() # $ pt,tt=C.foo SPURIOUS: tt=A.foo
+bc.bar() # $ pt,tt=BC.bar
diff --git a/python/ql/test/experimental/library-tests/CallGraph/code/class_properties.py b/python/ql/test/experimental/library-tests/CallGraph/code/class_properties.py
new file mode 100644
index 00000000000..06e4f3f3bd2
--- /dev/null
+++ b/python/ql/test/experimental/library-tests/CallGraph/code/class_properties.py
@@ -0,0 +1,43 @@
+class Prop(object):
+ def __init__(self, arg):
+ self._arg = arg
+ self._arg2 = arg
+
+ @property
+ def arg(self):
+ print('Prop.arg getter')
+ return self._arg
+
+ @arg.setter
+ def arg(self, value):
+ print('Prop.arg setter')
+ self._arg = value
+
+ @arg.deleter
+ def arg(self):
+ print('Prop.arg deleter')
+ # haha, you cannot delete me!
+
+ def _arg2_getter(self):
+ print('Prop.arg2 getter')
+ return self._arg2
+
+ def _arg2_setter(self, value):
+ print('Prop.arg2 setter')
+ self._arg2 = value
+
+ def _arg2_deleter(self):
+ print('Prop.arg2 deleter')
+ # haha, you cannot delete me!
+
+ arg2 = property(_arg2_getter, _arg2_setter, _arg2_deleter)
+
+prop = Prop(42) # $ tt=Prop.__init__
+
+prop.arg
+prop.arg = 43
+del prop.arg
+
+prop.arg2
+prop.arg2 = 43
+del prop.arg2
diff --git a/python/ql/test/experimental/library-tests/CallGraph/code/class_simple.py b/python/ql/test/experimental/library-tests/CallGraph/code/class_simple.py
deleted file mode 100644
index f201e648e3a..00000000000
--- a/python/ql/test/experimental/library-tests/CallGraph/code/class_simple.py
+++ /dev/null
@@ -1,29 +0,0 @@
-class A(object):
-
- def __init__(self, arg):
- print('A.__init__', arg)
- self.arg = arg
-
- def some_method(self):
- print('A.some_method', self)
-
- @staticmethod
- def some_staticmethod():
- print('A.some_staticmethod')
-
- @classmethod
- def some_classmethod(cls):
- print('A.some_classmethod', cls)
-
-
-# TODO: Figure out how to annotate class instantiation (and add one here).
-# Current points-to says it's a call to the class (instead of __init__/__new__/metaclass-something).
-# However, current test setup uses "callable" for naming, and expects things to be Function.
-a = A(42)
-
-a.some_method() # $ pt=A.some_method
-a.some_staticmethod() # $ pt=A.some_staticmethod
-a.some_classmethod() # $ pt=A.some_classmethod
-
-A.some_staticmethod() # $ pt=A.some_staticmethod
-A.some_classmethod() # $ pt=A.some_classmethod
diff --git a/python/ql/test/experimental/library-tests/CallGraph/code/class_special_methods.py b/python/ql/test/experimental/library-tests/CallGraph/code/class_special_methods.py
new file mode 100644
index 00000000000..7b8df9c4139
--- /dev/null
+++ b/python/ql/test/experimental/library-tests/CallGraph/code/class_special_methods.py
@@ -0,0 +1,29 @@
+class B(object):
+
+ def __init__(self, arg):
+ print('B.__init__', arg)
+ self.arg = arg
+
+ def __str__(self):
+ print('B.__str__')
+ return 'B (arg={})'.format(self.arg)
+
+ def __add__(self, other):
+ print('B.__add__')
+ if isinstance(other, B):
+ return B(self.arg + other.arg) # $ tt=B.__init__
+ return B(self.arg + other) # $ tt=B.__init__
+
+b = B(1) # $ tt=B.__init__
+
+print(str(b))
+# this calls `str(b)` inside
+print(b)
+
+
+
+b2 = B(2) # $ tt=B.__init__
+
+# __add__ is called
+b + b2
+b + 100
diff --git a/python/ql/test/experimental/library-tests/CallGraph/code/class_subclass.py b/python/ql/test/experimental/library-tests/CallGraph/code/class_subclass.py
new file mode 100644
index 00000000000..cd2ee42fa03
--- /dev/null
+++ b/python/ql/test/experimental/library-tests/CallGraph/code/class_subclass.py
@@ -0,0 +1,178 @@
+class A(object):
+
+ def __init__(self, arg):
+ print('A.__init__', arg)
+ self.arg = arg
+
+ def some_method(self):
+ print('A.some_method', self)
+
+ @staticmethod
+ def some_staticmethod():
+ print('A.some_staticmethod')
+
+ @classmethod
+ def some_classmethod(cls):
+ print('A.some_classmethod', cls)
+
+
+a = A(42) # $ tt=A.__init__
+
+a.some_method() # $ pt,tt=A.some_method
+a.some_staticmethod() # $ pt,tt=A.some_staticmethod
+a.some_classmethod() # $ pt,tt=A.some_classmethod
+
+A.some_method(a) # $ pt,tt=A.some_method
+A.some_staticmethod() # $ pt,tt=A.some_staticmethod
+A.some_classmethod() # $ pt,tt=A.some_classmethod
+
+print("- type()")
+type(a).some_method(a) # $ pt,tt=A.some_method
+type(a).some_staticmethod() # $ pt,tt=A.some_staticmethod
+type(a).some_classmethod() # $ pt,tt=A.some_classmethod
+
+# Subclass test
+print("\n! B")
+class B(A):
+ pass
+
+b = B(42) # $ tt=A.__init__
+
+b.some_method() # $ pt,tt=A.some_method
+b.some_staticmethod() # $ pt,tt=A.some_staticmethod
+b.some_classmethod() # $ pt,tt=A.some_classmethod
+
+B.some_method(b) # $ pt,tt=A.some_method
+B.some_staticmethod() # $ pt,tt=A.some_staticmethod
+B.some_classmethod() # $ pt,tt=A.some_classmethod
+
+print("- type()")
+type(b).some_method(b) # $ pt,tt=A.some_method
+type(b).some_staticmethod() # $ pt,tt=A.some_staticmethod
+type(b).some_classmethod() # $ pt,tt=A.some_classmethod
+
+# Subclass with method override
+print("\n! Subclass with method override")
+class C(A):
+ def some_method(self):
+ print('C.some_method', self)
+
+c = C(42) # $ tt=A.__init__
+c.some_method() # $ pt,tt=C.some_method
+
+
+class D(object):
+ def some_method(self):
+ print('D.some_method', self)
+
+class E(C, D):
+ pass
+
+e = E(42) # $ tt=A.__init__
+e.some_method() # $ pt,tt=C.some_method
+
+class F(D, C):
+ pass
+
+f = F(42) # $ tt=A.__init__
+f.some_method() # $ pt,tt=D.some_method
+
+# ------------------------------------------------------------------------------
+# self/cls in methods
+# ------------------------------------------------------------------------------
+
+class Base(object):
+ def foo(self):
+ print('Base.foo')
+
+ def bar(self):
+ print('Base.bar')
+
+ def call_stuff(self):
+ self.foo() # $ pt,tt=Base.foo pt,tt=Sub.foo pt,tt=Mixin.foo
+ self.bar() # $ pt,tt=Base.bar
+
+ self.sm() # $ pt,tt=Base.sm
+ self.cm() # $ pt,tt=Base.cm
+
+ self.sm2() # $ pt,tt=Base.sm2 pt,tt=Sub.sm2
+ self.cm2() # $ pt,tt=Base.cm2 pt,tt=Sub.cm2
+
+ type(self).sm2() # $ pt,tt=Base.sm2 pt,tt=Sub.sm2
+ type(self).cm2() # $ pt,tt=Base.cm2 pt,tt=Sub.cm2
+
+ @staticmethod
+ def sm():
+ print("Base.sm")
+
+ @classmethod
+ def cm(cls):
+ print("Base.cm")
+
+ @staticmethod
+ def sm2():
+ print("Base.sm2")
+
+ @classmethod
+ def cm2(cls):
+ print("Base.cm2")
+
+ @classmethod
+ def call_from_cm(cls):
+ cls.sm() # $ pt,tt=Base.sm
+ cls.cm() # $ pt,tt=Base.cm
+
+ cls.sm2() # $ pt,tt=Base.sm2 pt,tt=Sub.sm2
+ cls.cm2() # $ pt,tt=Base.cm2 pt,tt=Sub.cm2
+
+base = Base()
+print("! base.call_stuff()")
+base.call_stuff() # $ pt,tt=Base.call_stuff
+print("! Base.call_from_cm()")
+Base.call_from_cm() # $ pt,tt=Base.call_from_cm
+
+class Sub(Base):
+ def foo(self):
+ print("Sub.foo")
+
+ def foo_on_super(self):
+ sup = super()
+ sup.foo() # $ pt,tt=Base.foo
+
+ def also_call_stuff(self):
+ self.sm() # $ pt,tt=Base.sm
+ self.cm() # $ pt,tt=Base.cm
+
+ self.sm2() # $ pt,tt=Sub.sm2
+ self.cm2() # $ pt,tt=Sub.cm2
+
+ @staticmethod
+ def sm2():
+ print("Sub.sm2")
+
+ @classmethod
+ def cm2(cls):
+ print("Sub.cm2")
+
+sub = Sub()
+print("! sub.foo_on_super()")
+sub.foo_on_super() # $ pt,tt=Sub.foo_on_super
+print("! sub.call_stuff()")
+sub.call_stuff() # $ pt,tt=Base.call_stuff
+print("! sub.also_call_stuff()")
+sub.also_call_stuff() # $ pt,tt=Sub.also_call_stuff
+print("! Sub.call_from_cm()")
+Sub.call_from_cm() # $ pt,tt=Base.call_from_cm
+
+
+class Mixin(object):
+ def foo(self):
+ print("Mixin.foo")
+
+class SubWithMixin(Mixin, Base):
+ # the ordering here means that in Base.call_stuff, the call to self.foo will go to Mixin.foo
+ pass
+
+swm = SubWithMixin()
+print("! swm.call_stuff()")
+swm.call_stuff() # $ pt,tt=Base.call_stuff
diff --git a/python/ql/test/experimental/library-tests/CallGraph/code/class_subclass2.py b/python/ql/test/experimental/library-tests/CallGraph/code/class_subclass2.py
new file mode 100644
index 00000000000..43f5c2d81f8
--- /dev/null
+++ b/python/ql/test/experimental/library-tests/CallGraph/code/class_subclass2.py
@@ -0,0 +1,38 @@
+class Base(object):
+ def foo(self):
+ print("Base.foo")
+
+ def call_stuff(self):
+ print("Base.call_stuff")
+ self.foo() # $ pt,tt=Base.foo pt,tt=X.foo
+
+class X(object):
+ def __init__(self):
+ print("X.__init__")
+
+ def foo(self):
+ print("X.foo")
+
+class Y(object):
+ def __init__(self):
+ print("Y.__init__")
+
+ def foo(self):
+ print("Y.foo")
+
+class Contrived(X, Y, Base):
+ pass
+
+contrived = Contrived() # $ tt=X.__init__
+contrived.foo() # $ pt,tt=X.foo
+contrived.call_stuff() # $ pt,tt=Base.call_stuff
+
+# Ensure that we don't mix up __init__ resolution for Contrived() due to MRO
+# approximation
+
+class HasInit(object):
+ def __init__(self):
+ pass
+
+class TryingToTrickYou(Contrived, HasInit):
+ pass
diff --git a/python/ql/test/experimental/library-tests/CallGraph/code/class_super.py b/python/ql/test/experimental/library-tests/CallGraph/code/class_super.py
new file mode 100644
index 00000000000..dc3a58fb36c
--- /dev/null
+++ b/python/ql/test/experimental/library-tests/CallGraph/code/class_super.py
@@ -0,0 +1,108 @@
+def outside_def(self):
+ print("outside_def")
+ try:
+ super().foo()
+ except RuntimeError:
+ pass
+
+
+class A(object):
+ def foo(self):
+ print("A.foo")
+
+ @classmethod
+ def bar(cls):
+ print("A.bar")
+
+class B(A):
+ def foo(self):
+ print("B.foo")
+
+ def foo_on_super(self):
+ print("B.foo_on_super")
+ super().foo() # $ pt,tt=A.foo
+ super(B, self).foo() # $ pt,tt=A.foo
+
+ od = outside_def
+
+ @staticmethod
+ def sm():
+ try:
+ super().foo()
+ except RuntimeError:
+ print("B.sm")
+ pass
+
+ @classmethod
+ def bar(cls):
+ print("B.bar")
+
+ @classmethod
+ def bar_on_super(cls):
+ print("B.bar_on_super")
+ super().bar() # $ tt=A.bar
+ super(B, cls).bar() # $ tt=A.bar
+
+
+b = B()
+b.foo() # $ pt,tt=B.foo
+b.foo_on_super() # $ pt,tt=B.foo_on_super
+b.od() # $ pt=outside_def
+b.sm() # $ pt,tt=B.sm
+
+print("="*10, "static method")
+B.bar() # $ pt,tt=B.bar
+B.bar_on_super() # $ pt,tt=B.bar_on_super
+
+
+print("="*10, "Manual calls to super")
+
+super(B, b).foo() # $ pt,tt=A.foo
+
+assert A.foo == super(B, B).foo
+super(B, B).foo(b) # $ tt=A.foo
+
+try:
+ super(B, 42).foo()
+except TypeError:
+ pass
+
+# For some reason, points-to isn't able to resolve any calls from here on. I've tried to
+# comment out both try-except blocks, but that did not solve the problem :|
+
+print("="*10, "C")
+
+class C(B):
+ def foo_on_A(self):
+ print('C.foo_on_A')
+ super(B, self).foo() # $ tt=A.foo
+
+c = C()
+c.foo_on_A() # $ tt=C.foo_on_A
+
+print("="*10, "Diamon hierachy")
+
+class X(object):
+ def foo(self):
+ print('X.foo')
+
+class Y(X):
+ def foo(self):
+ print('Y.foo')
+ super().foo() # $ tt=X.foo
+
+class Z(X):
+ def foo(self):
+ print('Z.foo')
+ super().foo() # $ tt=X.foo tt=Y.foo
+
+print("! z.foo()")
+z = Z()
+z.foo() # $ tt=Z.foo
+
+class ZY(Z, Y):
+ pass
+
+print("! zy.foo()")
+zy = ZY()
+zy.foo() # $ tt=Z.foo
diff --git a/python/ql/test/experimental/library-tests/CallGraph/code/conditional_in_argument.py b/python/ql/test/experimental/library-tests/CallGraph/code/conditional_in_argument.py
new file mode 100644
index 00000000000..885393fae90
--- /dev/null
+++ b/python/ql/test/experimental/library-tests/CallGraph/code/conditional_in_argument.py
@@ -0,0 +1,36 @@
+class X(object):
+ def foo(self, *args):
+ print("X.foo", args)
+
+ def bar(self, *args):
+ print("X.bar", args)
+
+
+def func(cond=True):
+ x = X()
+
+ # ok
+ x.foo() # $ pt,tt=X.foo
+ x.bar() # $ pt,tt=X.bar
+
+ # the conditional in the argument makes us stop tracking the class instance :|
+ x.foo(1 if cond else 0) # $ pt,tt=X.foo
+ x.bar() # $ pt=X.bar MISSING: tt=X.bar
+
+
+func() # $ pt,tt=func
+
+def func2(cond=True):
+ y = X()
+
+ # ok
+ y.foo() # $ pt,tt=X.foo
+ y.bar() # $ pt,tt=X.bar
+
+ if cond:
+ arg = 1
+ else:
+ arg = 0
+
+ y.foo(arg) # $ pt,tt=X.foo
+ y.bar() # $ pt,tt=X.bar
diff --git a/python/ql/test/experimental/library-tests/CallGraph/code/def_in_function.py b/python/ql/test/experimental/library-tests/CallGraph/code/def_in_function.py
new file mode 100644
index 00000000000..8b7e0dcfb89
--- /dev/null
+++ b/python/ql/test/experimental/library-tests/CallGraph/code/def_in_function.py
@@ -0,0 +1,24 @@
+def test():
+ def foo():
+ print("foo")
+
+ foo() # $ pt,tt=test.foo
+
+ def bar():
+ print("bar")
+ def baz():
+ print("baz")
+ baz() # $ pt,tt=test.bar.baz
+ return baz
+
+ baz_ref = bar() # $ pt,tt=test.bar
+ baz_ref() # $ pt,tt=test.bar.baz
+
+ class A(object):
+ def foo(self):
+ print("A.foo")
+
+ a = A()
+ a.foo() # $ tt=test.A.foo
+
+test() # $ pt,tt=test
diff --git a/python/ql/test/experimental/library-tests/CallGraph/code/func_defined_outside_class.py b/python/ql/test/experimental/library-tests/CallGraph/code/func_defined_outside_class.py
new file mode 100644
index 00000000000..c0ff09d4987
--- /dev/null
+++ b/python/ql/test/experimental/library-tests/CallGraph/code/func_defined_outside_class.py
@@ -0,0 +1,43 @@
+def outside(self):
+ print("outside", self)
+
+def outside_sm():
+ print("outside_sm")
+
+def outside_cm(cls):
+ print("outside_cm", cls)
+
+class A(object):
+ def foo(self):
+ print("A.foo")
+
+ foo_ref = foo
+
+ outside_ref = outside
+ outside_sm = staticmethod(outside_sm)
+ outside_cm = classmethod(outside_cm)
+
+a = A()
+a.foo_ref() # $ pt=A.foo
+a.outside_ref() # $ pt=outside
+
+a.outside_sm() # $ pt=outside_sm
+a.outside_cm() # $ pt=outside_cm
+
+# ===
+
+print("\n! B")
+
+# this pattern was seen in django
+class B(object):
+ def _gen(value):
+ def func(self):
+ print("B._gen.func", value)
+ return func
+
+ foo = _gen("foo") # $ pt=B._gen
+ bar = _gen("bar") # $ pt=B._gen
+
+b = B()
+b.foo() # $ pt=B._gen.func
+b.bar() # $ pt=B._gen.func
diff --git a/python/ql/test/experimental/library-tests/CallGraph/code/nested_class.py b/python/ql/test/experimental/library-tests/CallGraph/code/nested_class.py
new file mode 100644
index 00000000000..1248dbdb426
--- /dev/null
+++ b/python/ql/test/experimental/library-tests/CallGraph/code/nested_class.py
@@ -0,0 +1,87 @@
+class A(object):
+ class B(object):
+ @staticmethod
+ def foo():
+ print("A.B.foo")
+
+ @staticmethod
+ def bar():
+ print("A.B.bar")
+ A.B.foo() # $ pt,tt=A.B.foo
+
+
+A.B.bar() # $ pt,tt=A.B.bar
+
+
+ab = A.B()
+ab.bar() # $ pt,tt=A.B.bar
+
+# ==============================================================================
+
+class OuterBase(object):
+ def foo(self):
+ print("OuterBase.foo")
+
+class InnerBase(object):
+ def foo(self):
+ print("InnerBase.foo")
+
+class Outer(OuterBase):
+ def foo(self):
+ print("Outer.foo")
+ super().foo() # $ pt,tt=OuterBase.foo
+
+ class Inner(InnerBase):
+ def foo(self):
+ print("Inner.foo")
+ super().foo() # $ pt,tt=InnerBase.foo
+
+outer = Outer()
+outer.foo() # $ pt,tt=Outer.foo
+
+inner = Outer.Inner()
+inner.foo() # $ pt,tt=Outer.Inner.foo
+
+# ==============================================================================
+
+class Base(object):
+ def foo(self):
+ print("Base.foo")
+
+class Base2(object):
+ def foo(self):
+ print("Base2.foo")
+
+class X(Base):
+ def meth(self):
+ print("X.meth")
+ super().foo() # $ pt,tt=Base.foo
+
+ def inner_func():
+ print("inner_func")
+ try:
+ super().foo()
+ except RuntimeError:
+ print("RuntimeError, as expected")
+
+ inner_func() # $ pt,tt=X.meth.inner_func
+
+ def inner_func2(this_works):
+ print("inner_func2")
+ super().foo() # $ MISSING: tt=Base.foo
+
+ inner_func2(self) # $ pt,tt=X.meth.inner_func2
+
+ def class_def_in_func(self):
+ print("X.class_def_in_func")
+ class Y(Base2):
+ def meth(self):
+ print("Y.meth")
+ super().foo() # $ pt,tt=Base2.foo
+
+ y = Y()
+ y.meth() # $ tt=X.class_def_in_func.Y.meth
+
+x = X()
+x.meth() # $ pt,tt=X.meth
+x.class_def_in_func() # $ pt=X.class_def_in_func tt=X.class_def_in_func
diff --git a/python/ql/test/experimental/library-tests/CallGraph/code/relative_import.py b/python/ql/test/experimental/library-tests/CallGraph/code/relative_import.py
new file mode 100644
index 00000000000..06191fed7e9
--- /dev/null
+++ b/python/ql/test/experimental/library-tests/CallGraph/code/relative_import.py
@@ -0,0 +1,7 @@
+def test_relative_import():
+ from .simple import foo
+ foo() # $ pt,tt="code/simple.py:foo"
+
+def test_aliased_relative_import():
+ from .aliased_import import foo
+ foo() # $ pt,tt="code/simple.py:foo"
diff --git a/python/ql/test/experimental/library-tests/CallGraph/code/runtime_decision.py b/python/ql/test/experimental/library-tests/CallGraph/code/runtime_decision.py
index 3c4ebbb73e1..3901a770188 100644
--- a/python/ql/test/experimental/library-tests/CallGraph/code/runtime_decision.py
+++ b/python/ql/test/experimental/library-tests/CallGraph/code/runtime_decision.py
@@ -15,7 +15,7 @@ if len(sys.argv) >= 2 and not sys.argv[1] in ['0', 'False', 'false']:
else:
func = rd_bar
-func() # $ pt=rd_foo pt=rd_bar
+func() # $ pt,tt=rd_foo pt,tt=rd_bar
# Random doesn't work with points-to :O
if random.random() < 0.5:
@@ -23,4 +23,4 @@ if random.random() < 0.5:
else:
func2 = rd_bar
-func2() # $ pt=rd_foo pt=rd_bar
+func2() # $ pt,tt=rd_foo pt,tt=rd_bar
diff --git a/python/ql/test/experimental/library-tests/CallGraph/code/shadowing.py b/python/ql/test/experimental/library-tests/CallGraph/code/shadowing.py
new file mode 100644
index 00000000000..fdbf554084e
--- /dev/null
+++ b/python/ql/test/experimental/library-tests/CallGraph/code/shadowing.py
@@ -0,0 +1,22 @@
+def foo(n=0):
+ print("foo", n)
+ if n > 0:
+ foo(n-1) # $ pt,tt=foo
+
+foo(1) # $ pt,tt=foo
+
+
+def test():
+ def foo():
+ print("test.foo")
+
+ foo() # $ pt,tt=test.foo
+
+
+class A(object):
+ def foo(self):
+ print("A.foo")
+ foo() # $ pt=foo MISSING: tt=foo
+
+a = A()
+a.foo() # $ pt,tt=A.foo
diff --git a/python/ql/test/experimental/library-tests/CallGraph/code/simple.py b/python/ql/test/experimental/library-tests/CallGraph/code/simple.py
index ac07ace93b2..7d7d4865049 100644
--- a/python/ql/test/experimental/library-tests/CallGraph/code/simple.py
+++ b/python/ql/test/experimental/library-tests/CallGraph/code/simple.py
@@ -12,9 +12,9 @@ def bar():
lam = lambda: print("lambda called")
-foo() # $ pt=foo
-indirect_foo() # $ pt=foo
-bar() # $ pt=bar
-lam() # $ pt=lambda[simple.py:12:7]
+foo() # $ pt,tt=foo
+indirect_foo() # $ pt,tt=foo
+bar() # $ pt,tt=bar
+lam() # $ pt,tt=lambda[simple.py:12:7]
# python -m trace --trackcalls simple.py
diff --git a/python/ql/test/experimental/library-tests/CallGraph/code/through_content.py b/python/ql/test/experimental/library-tests/CallGraph/code/through_content.py
new file mode 100644
index 00000000000..f449c4bd6da
--- /dev/null
+++ b/python/ql/test/experimental/library-tests/CallGraph/code/through_content.py
@@ -0,0 +1,6 @@
+def my_func():
+ print("my_func")
+
+funcs = [my_func]
+for f in funcs:
+ f() # $ MISSING: tt=my_func
diff --git a/python/ql/test/experimental/library-tests/CallGraph/code/type_tracking_limitation.py b/python/ql/test/experimental/library-tests/CallGraph/code/type_tracking_limitation.py
new file mode 100644
index 00000000000..4831cf74291
--- /dev/null
+++ b/python/ql/test/experimental/library-tests/CallGraph/code/type_tracking_limitation.py
@@ -0,0 +1,8 @@
+def return_arg(arg):
+ return arg
+
+def my_func():
+ print("my_func")
+
+x = return_arg(my_func) # $ pt,tt=return_arg
+x() # $ pt=my_func MISSING: tt=my_func
diff --git a/python/ql/test/experimental/library-tests/CallGraph/code/underscore_prefix_func_name.py b/python/ql/test/experimental/library-tests/CallGraph/code/underscore_prefix_func_name.py
index fb3f5fc45a8..0331dbb30c4 100644
--- a/python/ql/test/experimental/library-tests/CallGraph/code/underscore_prefix_func_name.py
+++ b/python/ql/test/experimental/library-tests/CallGraph/code/underscore_prefix_func_name.py
@@ -11,14 +11,14 @@ def some_function():
def _ignored():
print('_ignored')
- some_function()
+ some_function() # $ tt=some_function
def _works_since_called():
print('_works_since_called')
- some_function() # $ pt=some_function
+ some_function() # $ pt,tt=some_function
def works_even_though_not_called():
- some_function() # $ pt=some_function
+ some_function() # $ pt,tt=some_function
globals()['_ignored']()
-_works_since_called() # $ pt=_works_since_called
+_works_since_called() # $ pt,tt=_works_since_called
diff --git a/python/ql/test/experimental/library-tests/CallGraph/dataflow-consistency.expected b/python/ql/test/experimental/library-tests/CallGraph/dataflow-consistency.expected
new file mode 100644
index 00000000000..9fedaf9f663
--- /dev/null
+++ b/python/ql/test/experimental/library-tests/CallGraph/dataflow-consistency.expected
@@ -0,0 +1,19 @@
+uniqueEnclosingCallable
+uniqueType
+uniqueNodeLocation
+missingLocation
+uniqueNodeToString
+missingToString
+parameterCallable
+localFlowIsLocal
+compatibleTypesReflexive
+unreachableNodeCCtx
+localCallNodes
+postIsNotPre
+postHasUniquePre
+uniquePostUpdate
+postIsInSameCallable
+reverseRead
+argHasPostUpdate
+postWithInFlow
+viableImplInCallContextTooLarge
diff --git a/python/ql/test/experimental/library-tests/CallGraph/dataflow-consistency.ql b/python/ql/test/experimental/library-tests/CallGraph/dataflow-consistency.ql
new file mode 100644
index 00000000000..6743fa10d27
--- /dev/null
+++ b/python/ql/test/experimental/library-tests/CallGraph/dataflow-consistency.ql
@@ -0,0 +1 @@
+import semmle.python.dataflow.new.internal.DataFlowImplConsistency::Consistency
diff --git a/python/ql/test/library-tests/PointsTo/new/ImpliesDataflow.expected b/python/ql/test/library-tests/PointsTo/new/ImpliesDataflow.expected
index 5dcf739c068..0c2bd1b4ce0 100644
--- a/python/ql/test/library-tests/PointsTo/new/ImpliesDataflow.expected
+++ b/python/ql/test/library-tests/PointsTo/new/ImpliesDataflow.expected
@@ -5,11 +5,6 @@
| code/l_calls.py:12:1:12:20 | ControlFlowNode for ClassExpr | code/l_calls.py:25:16:25:16 | ControlFlowNode for a |
| code/l_calls.py:33:5:33:23 | ControlFlowNode for FunctionExpr | code/l_calls.py:39:1:39:3 | ControlFlowNode for Attribute |
| code/l_calls.py:48:5:48:30 | ControlFlowNode for FunctionExpr | code/l_calls.py:53:1:53:3 | ControlFlowNode for Attribute |
-| code/q_super.py:10:18:10:21 | ControlFlowNode for self | code/q_super.py:4:22:4:25 | ControlFlowNode for self |
-| code/q_super.py:26:14:26:17 | ControlFlowNode for self | code/q_super.py:22:32:22:35 | ControlFlowNode for self |
-| code/q_super.py:31:14:31:17 | ControlFlowNode for self | code/q_super.py:22:32:22:35 | ControlFlowNode for self |
-| code/q_super.py:37:14:37:17 | ControlFlowNode for self | code/q_super.py:22:32:22:35 | ControlFlowNode for self |
-| code/q_super.py:37:14:37:17 | ControlFlowNode for self | code/q_super.py:27:32:27:35 | ControlFlowNode for self |
| code/q_super.py:48:5:48:17 | ControlFlowNode for ClassExpr | code/q_super.py:51:25:51:29 | ControlFlowNode for Attribute |
| code/q_super.py:63:5:63:17 | ControlFlowNode for ClassExpr | code/q_super.py:66:19:66:23 | ControlFlowNode for Attribute |
| code/t_type.py:3:1:3:16 | ControlFlowNode for ClassExpr | code/t_type.py:6:1:6:9 | ControlFlowNode for type() |
diff --git a/python/ql/test/library-tests/frameworks/django-orm/ReflectedXss.expected b/python/ql/test/library-tests/frameworks/django-orm/ReflectedXss.expected
index dc055e4a08f..c56e9c8a3f6 100644
--- a/python/ql/test/library-tests/frameworks/django-orm/ReflectedXss.expected
+++ b/python/ql/test/library-tests/frameworks/django-orm/ReflectedXss.expected
@@ -5,13 +5,13 @@ edges
| testapp/orm_security_tests.py:15:1:15:27 | [orm-model] Class Person [Attribute name] | testapp/orm_security_tests.py:47:14:47:53 | ControlFlowNode for Attribute() [Attribute name] |
| testapp/orm_security_tests.py:19:12:19:18 | ControlFlowNode for request | testapp/orm_security_tests.py:22:23:22:34 | ControlFlowNode for Attribute |
| testapp/orm_security_tests.py:19:12:19:18 | ControlFlowNode for request | testapp/orm_security_tests.py:23:22:23:33 | ControlFlowNode for Attribute |
-| testapp/orm_security_tests.py:22:9:22:14 | [post store] ControlFlowNode for person [Attribute name] | testapp/orm_security_tests.py:23:9:23:14 | ControlFlowNode for person [Attribute name] |
+| testapp/orm_security_tests.py:22:9:22:14 | [post] ControlFlowNode for person [Attribute name] | testapp/orm_security_tests.py:23:9:23:14 | ControlFlowNode for person [Attribute name] |
| testapp/orm_security_tests.py:22:23:22:34 | ControlFlowNode for Attribute | testapp/orm_security_tests.py:22:23:22:42 | ControlFlowNode for Subscript |
-| testapp/orm_security_tests.py:22:23:22:42 | ControlFlowNode for Subscript | testapp/orm_security_tests.py:22:9:22:14 | [post store] ControlFlowNode for person [Attribute name] |
+| testapp/orm_security_tests.py:22:23:22:42 | ControlFlowNode for Subscript | testapp/orm_security_tests.py:22:9:22:14 | [post] ControlFlowNode for person [Attribute name] |
| testapp/orm_security_tests.py:23:9:23:14 | ControlFlowNode for person [Attribute name] | testapp/orm_security_tests.py:28:9:28:14 | ControlFlowNode for person [Attribute name] |
-| testapp/orm_security_tests.py:23:9:23:14 | [post store] ControlFlowNode for person [Attribute age] | testapp/orm_security_tests.py:28:9:28:14 | ControlFlowNode for person [Attribute age] |
+| testapp/orm_security_tests.py:23:9:23:14 | [post] ControlFlowNode for person [Attribute age] | testapp/orm_security_tests.py:28:9:28:14 | ControlFlowNode for person [Attribute age] |
| testapp/orm_security_tests.py:23:22:23:33 | ControlFlowNode for Attribute | testapp/orm_security_tests.py:23:22:23:40 | ControlFlowNode for Subscript |
-| testapp/orm_security_tests.py:23:22:23:40 | ControlFlowNode for Subscript | testapp/orm_security_tests.py:23:9:23:14 | [post store] ControlFlowNode for person [Attribute age] |
+| testapp/orm_security_tests.py:23:22:23:40 | ControlFlowNode for Subscript | testapp/orm_security_tests.py:23:9:23:14 | [post] ControlFlowNode for person [Attribute age] |
| testapp/orm_security_tests.py:28:9:28:14 | ControlFlowNode for person [Attribute age] | testapp/orm_security_tests.py:15:1:15:27 | [orm-model] Class Person [Attribute age] |
| testapp/orm_security_tests.py:28:9:28:14 | ControlFlowNode for person [Attribute name] | testapp/orm_security_tests.py:15:1:15:27 | [orm-model] Class Person [Attribute name] |
| testapp/orm_security_tests.py:42:13:42:18 | SSA variable person [Attribute age] | testapp/orm_security_tests.py:43:62:43:67 | ControlFlowNode for person [Attribute age] |
@@ -48,11 +48,11 @@ nodes
| testapp/orm_security_tests.py:15:1:15:27 | [orm-model] Class Person [Attribute age] | semmle.label | [orm-model] Class Person [Attribute age] |
| testapp/orm_security_tests.py:15:1:15:27 | [orm-model] Class Person [Attribute name] | semmle.label | [orm-model] Class Person [Attribute name] |
| testapp/orm_security_tests.py:19:12:19:18 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
-| testapp/orm_security_tests.py:22:9:22:14 | [post store] ControlFlowNode for person [Attribute name] | semmle.label | [post store] ControlFlowNode for person [Attribute name] |
+| testapp/orm_security_tests.py:22:9:22:14 | [post] ControlFlowNode for person [Attribute name] | semmle.label | [post] ControlFlowNode for person [Attribute name] |
| testapp/orm_security_tests.py:22:23:22:34 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
| testapp/orm_security_tests.py:22:23:22:42 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
| testapp/orm_security_tests.py:23:9:23:14 | ControlFlowNode for person [Attribute name] | semmle.label | ControlFlowNode for person [Attribute name] |
-| testapp/orm_security_tests.py:23:9:23:14 | [post store] ControlFlowNode for person [Attribute age] | semmle.label | [post store] ControlFlowNode for person [Attribute age] |
+| testapp/orm_security_tests.py:23:9:23:14 | [post] ControlFlowNode for person [Attribute age] | semmle.label | [post] ControlFlowNode for person [Attribute age] |
| testapp/orm_security_tests.py:23:22:23:33 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
| testapp/orm_security_tests.py:23:22:23:40 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
| testapp/orm_security_tests.py:28:9:28:14 | ControlFlowNode for person [Attribute age] | semmle.label | ControlFlowNode for person [Attribute age] |
diff --git a/python/ql/test/library-tests/fuck/options b/python/ql/test/library-tests/fuck/options
new file mode 100644
index 00000000000..efa237f03c4
--- /dev/null
+++ b/python/ql/test/library-tests/fuck/options
@@ -0,0 +1 @@
+semmle-extractor-options: --max-import-depth=0
diff --git a/python/ql/test/library-tests/fuck/test.py b/python/ql/test/library-tests/fuck/test.py
new file mode 100644
index 00000000000..3029c8be234
--- /dev/null
+++ b/python/ql/test/library-tests/fuck/test.py
@@ -0,0 +1,17 @@
+def my_func(arg):
+ print("my_func", arg)
+
+class Foo:
+ def foo(self, arg=42):
+ print("Foo.foo", self, arg)
+
+
+my_func(43)
+
+import random
+if random.choice([True, False]):
+ func = my_func
+else:
+ func = Foo.foo
+
+func(44)
diff --git a/python/ql/test/library-tests/fuck/wat.expected b/python/ql/test/library-tests/fuck/wat.expected
new file mode 100644
index 00000000000..2a4f078a25f
--- /dev/null
+++ b/python/ql/test/library-tests/fuck/wat.expected
@@ -0,0 +1 @@
+| 1 |
From ed70e118a967701a9ecc9a15214e8a10b2219b78 Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Tue, 16 Aug 2022 09:02:42 +0200
Subject: [PATCH 008/415] Python: Make test/Filter query more robust
Since if you had tornado installed, we would follow imports and have
results from those files as well :|
---
python/ql/test/library-tests/filters/tests/Filter.ql | 1 +
1 file changed, 1 insertion(+)
diff --git a/python/ql/test/library-tests/filters/tests/Filter.ql b/python/ql/test/library-tests/filters/tests/Filter.ql
index 0528a318f77..967ed8d12f7 100644
--- a/python/ql/test/library-tests/filters/tests/Filter.ql
+++ b/python/ql/test/library-tests/filters/tests/Filter.ql
@@ -2,4 +2,5 @@ import python
import semmle.python.filters.Tests
from TestScope t
+where exists(t.getLocation().getFile().getRelativePath())
select t
From 2e2cee06c36e7c689ece97d47562889f9efd9638 Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Tue, 16 Aug 2022 09:12:21 +0200
Subject: [PATCH 009/415] Python: Adjust InsecureRandomnessCustomizations.qll
---
.../python/security/InsecureRandomnessCustomizations.qll | 7 +++----
1 file changed, 3 insertions(+), 4 deletions(-)
diff --git a/python/ql/src/experimental/semmle/python/security/InsecureRandomnessCustomizations.qll b/python/ql/src/experimental/semmle/python/security/InsecureRandomnessCustomizations.qll
index 29230af4634..cc99b286f8a 100644
--- a/python/ql/src/experimental/semmle/python/security/InsecureRandomnessCustomizations.qll
+++ b/python/ql/src/experimental/semmle/python/security/InsecureRandomnessCustomizations.qll
@@ -59,12 +59,11 @@ module InsecureRandomness {
*/
class RandomFnSink extends Sink {
RandomFnSink() {
- exists(DataFlowCallable randomFn |
- randomFn
- .getName()
+ exists(Function func |
+ func.getName()
.regexpMatch("(?i).*(gen(erate)?|make|mk|create).*(nonce|salt|pepper|Password).*")
|
- this.getEnclosingCallable() = randomFn
+ this.asExpr().getScope() = func
)
}
}
From 7648462f989b17d5b437e9daddf7970ee468f095 Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Tue, 16 Aug 2022 14:56:19 +0200
Subject: [PATCH 010/415] Python: Fix ExternalAPIs queries
The output might end up being slightly more noisy since we don't
collapse positional and keyword arguments when the external target
function is included in the database, but this aligns with our long-term
goal of not doing that anymore, so I think it's fine.
---
.../CWE-020-ExternalAPIs/ExternalAPIs.qll | 263 +++++++++---------
...ExternalAPIsUsedWithUntrustedData.expected | 5 +-
.../UntrustedDataToExternalAPI.expected | 24 +-
.../Security/CWE-020-ExternalAPIs/test.py | 8 +-
4 files changed, 159 insertions(+), 141 deletions(-)
diff --git a/python/ql/src/Security/CWE-020-ExternalAPIs/ExternalAPIs.qll b/python/ql/src/Security/CWE-020-ExternalAPIs/ExternalAPIs.qll
index fc2e64fc786..94494f3ca9b 100644
--- a/python/ql/src/Security/CWE-020-ExternalAPIs/ExternalAPIs.qll
+++ b/python/ql/src/Security/CWE-020-ExternalAPIs/ExternalAPIs.qll
@@ -1,48 +1,36 @@
/**
* Definitions for reasoning about untrusted data used in APIs defined outside the
- * database.
+ * user-written code.
*/
-import python
+private import python
import semmle.python.dataflow.new.DataFlow
-import semmle.python.dataflow.new.TaintTracking
-import semmle.python.Concepts
-import semmle.python.dataflow.new.RemoteFlowSources
+private import semmle.python.dataflow.new.TaintTracking
+private import semmle.python.dataflow.new.RemoteFlowSources
+private import semmle.python.ApiGraphs
private import semmle.python.dataflow.new.internal.DataFlowPrivate as DataFlowPrivate
private import semmle.python.dataflow.new.internal.TaintTrackingPrivate as TaintTrackingPrivate
-private import semmle.python.types.Builtins
-private import semmle.python.objects.ObjectInternal
-// IMPLEMENTATION NOTES:
-//
-// This query uses *both* the new data-flow library, and points-to. Why? To get this
-// finished quickly, so it can provide value for our field team and ourselves.
-//
-// In the long run, it should not need to use points-to for anything. Possibly this can
-// even be helpful in figuring out what we need from TypeTrackers and the new data-flow
-// library to be fully operational.
-//
-// At least it will allow us to provide a baseline comparison against a solution that
-// doesn't use points-to at all
-//
-// There is a few dirty things we do here:
-// 1. DataFlowPrivate: since `DataFlowCall` and `DataFlowCallable` are not exposed
-// publicly, but we really want access to them.
-// 2. points-to: we kinda need to do this since this is what powers `DataFlowCall` and
-// `DataFlowCallable`
-// 3. ObjectInternal: to provide better names for built-in functions and methods. If we
-// really wanted to polish our points-to implementation, we could move this
-// functionality into `BuiltinFunctionValue` and `BuiltinMethodValue`, but will
-// probably require some more work: for this query, it's totally ok to use
-// `builtins.open` for the code `open(f)`, but well, it requires a bit of thinking to
-// figure out if that is desirable in general. I simply skipped a corner here!
-// 4. TaintTrackingPrivate: Nothing else gives us access to `defaultAdditionalTaintStep` :(
/**
- * A callable that is considered a "safe" external API from a security perspective.
+ * An external API that is considered a "safe" from a security perspective.
*/
class SafeExternalApi extends Unit {
- /** Gets a callable that is considered a "safe" external API from a security perspective. */
- abstract DataFlowPrivate::DataFlowCallable getSafeCallable();
+ /**
+ * Gets a call that is considered "safe" from a security perspective. You can use API
+ * graphs to find calls to functions you know are safe.
+ *
+ * Which works even when the external library isn't extracted.
+ */
+ abstract DataFlow::CallCfgNode getSafeCall();
+
+ /**
+ * Gets a callable that is considered a "safe" external API from a security
+ * perspective.
+ *
+ * You probably want to define this as `none()` and use `getSafeCall` instead, since
+ * that can handle the external library not being extracted.
+ */
+ DataFlowPrivate::DataFlowCallable getSafeCallable() { none() }
}
/** DEPRECATED: Alias for SafeExternalApi */
@@ -50,57 +38,112 @@ deprecated class SafeExternalAPI = SafeExternalApi;
/** The default set of "safe" external APIs. */
private class DefaultSafeExternalApi extends SafeExternalApi {
- override DataFlowPrivate::DataFlowCallable getSafeCallable() {
- exists(CallableValue cv | cv = result.getCallableValue() |
- cv = Value::named(["len", "isinstance", "getattr", "hasattr"])
- or
- exists(ClassValue cls, string attr |
- cls = Value::named("dict") and attr in ["__getitem__", "__setitem__"]
- |
- cls.lookup(attr) = cv
- )
+ override DataFlow::CallCfgNode getSafeCall() {
+ result = API::builtin(["len", "isinstance", "getattr", "hasattr"]).getACall()
+ }
+}
+
+/** Gets a human readable representation of `node`. */
+string apiNodeToStringRepr(API::Node node) {
+ node = API::builtin(result)
+ or
+ node = API::moduleImport(result)
+ or
+ exists(API::Node base, string basename |
+ base.getDepth() < node.getDepth() and
+ basename = apiNodeToStringRepr(base)
+ |
+ exists(string m | node = base.getMember(m) | result = basename + "." + m)
+ or
+ node = base.getReturn() and
+ result = basename + "()"
+ or
+ node = base.getAwaited() and
+ result = basename
+ )
+}
+
+newtype TInterestingExternalApiCall =
+ TUnresolvedCall(DataFlow::CallCfgNode call) {
+ exists(call.getLocation().getFile().getRelativePath()) and
+ not exists(DataFlowPrivate::DataFlowCall dfCall | dfCall.getNode() = call.getNode()) and
+ not call = any(SafeExternalApi safe).getSafeCall()
+ } or
+ TResolvedCall(DataFlowPrivate::DataFlowCall call) {
+ exists(call.getLocation().getFile().getRelativePath()) and
+ not call.getCallable() = any(SafeExternalApi safe).getSafeCallable() and
+ not exists(call.getCallable().getLocation().getFile().getRelativePath())
+ }
+
+abstract class InterestingExternalApiCall extends TInterestingExternalApiCall {
+ /** Gets the argument at position `apos`, if any */
+ abstract DataFlow::Node getArgument(DataFlowPrivate::ArgumentPosition apos);
+
+ /** Gets a textual representation of this element. */
+ abstract string toString();
+
+ /**
+ * Gets a human-readable name for the external API.
+ */
+ abstract string getApiName();
+}
+
+class ResolvedCall extends InterestingExternalApiCall, TResolvedCall {
+ DataFlowPrivate::DataFlowCall dfCall;
+
+ ResolvedCall() { this = TResolvedCall(dfCall) }
+
+ override DataFlow::Node getArgument(DataFlowPrivate::ArgumentPosition apos) {
+ result = dfCall.getArgument(apos)
+ }
+
+ override string toString() { result = "ExternalAPI:ResolvedCall" }
+
+ override string getApiName() {
+ exists(DataFlow::CallCfgNode call, API::Node apiNode | dfCall.getNode() = call.getNode() |
+ result = apiNodeToStringRepr(apiNode) and
+ apiNode.getACall() = call
+ )
+ }
+}
+
+class UnresolvedCall extends InterestingExternalApiCall, TUnresolvedCall {
+ DataFlow::CallCfgNode call;
+
+ UnresolvedCall() { this = TUnresolvedCall(call) }
+
+ override DataFlow::Node getArgument(DataFlowPrivate::ArgumentPosition apos) {
+ exists(int i | apos.isPositional(i) | result = call.getArg(i))
+ or
+ exists(string name | apos.isKeyword(name) | result = call.getArgByName(name))
+ }
+
+ override string toString() { result = "ExternalAPI:UnresolvedCall" }
+
+ override string getApiName() {
+ exists(API::Node apiNode |
+ result = apiNodeToStringRepr(apiNode) and
+ apiNode.getACall() = call
)
}
}
/** A node representing data being passed to an external API through a call. */
class ExternalApiDataNode extends DataFlow::Node {
- DataFlowPrivate::DataFlowCallable callable;
- int i;
+ InterestingExternalApiCall call;
+ DataFlowPrivate::ArgumentPosition apos;
ExternalApiDataNode() {
- exists(DataFlowPrivate::DataFlowCall call |
- exists(call.getLocation().getFile().getRelativePath())
- |
- callable = call.getCallable() and
- // TODO: this ignores some complexity of keyword arguments (especially keyword-only args)
- this = call.getArg(i)
- ) and
- not any(SafeExternalApi safe).getSafeCallable() = callable and
- exists(Value cv | cv = callable.getCallableValue() |
- cv.isAbsent()
- or
- cv.isBuiltin()
- or
- cv.(CallableValue).getScope().getLocation().getFile().inStdlib()
- or
- not exists(cv.(CallableValue).getScope().getLocation().getFile().getRelativePath())
- ) and
+ this = call.getArgument(apos) and
// Not already modeled as a taint step
not exists(DataFlow::Node next | TaintTrackingPrivate::defaultAdditionalTaintStep(this, next)) and
// for `list.append(x)`, we have a additional taint step from x -> [post] list.
// Since we have modeled this explicitly, I don't see any cases where we would want to report this.
- not exists(DataFlow::Node prev, DataFlow::PostUpdateNode post |
+ not exists(DataFlow::PostUpdateNode post |
post.getPreUpdateNode() = this and
- TaintTrackingPrivate::defaultAdditionalTaintStep(prev, post)
+ TaintTrackingPrivate::defaultAdditionalTaintStep(_, post)
)
}
-
- /** Gets the index for the parameter that will receive this untrusted data */
- int getIndex() { result = i }
-
- /** Gets the callable to which this argument is passed. */
- DataFlowPrivate::DataFlowCallable getCallable() { result = callable }
}
/** DEPRECATED: Alias for ExternalApiDataNode */
@@ -133,19 +176,26 @@ deprecated class UntrustedExternalAPIDataNode = UntrustedExternalApiDataNode;
/** An external API which is used with untrusted data. */
private newtype TExternalApi =
- /** An untrusted API method `m` where untrusted data is passed at `index`. */
- TExternalApiParameter(DataFlowPrivate::DataFlowCallable callable, int index) {
- exists(UntrustedExternalApiDataNode n |
- callable = n.getCallable() and
- index = n.getIndex()
+ MkExternalApi(string repr, DataFlowPrivate::ArgumentPosition apos) {
+ exists(UntrustedExternalApiDataNode ex, InterestingExternalApiCall call |
+ ex = call.getArgument(apos) and
+ repr = call.getApiName()
)
}
-/** An external API which is used with untrusted data. */
-class ExternalApiUsedWithUntrustedData extends TExternalApi {
+/** A argument of an external API which is used with untrusted data. */
+class ExternalApiUsedWithUntrustedData extends MkExternalApi {
+ string repr;
+ DataFlowPrivate::ArgumentPosition apos;
+
+ ExternalApiUsedWithUntrustedData() { this = MkExternalApi(repr, apos) }
+
/** Gets a possibly untrusted use of this external API. */
UntrustedExternalApiDataNode getUntrustedDataNode() {
- this = TExternalApiParameter(result.getCallable(), result.getIndex())
+ exists(InterestingExternalApiCall call |
+ result = call.getArgument(apos) and
+ call.getApiName() = repr
+ )
}
/** Gets the number of untrusted sources used with this external API. */
@@ -154,63 +204,8 @@ class ExternalApiUsedWithUntrustedData extends TExternalApi {
}
/** Gets a textual representation of this element. */
- string toString() {
- exists(
- DataFlowPrivate::DataFlowCallable callable, int index, string callableString,
- string indexString
- |
- this = TExternalApiParameter(callable, index) and
- indexString = "param " + index and
- exists(CallableValue cv | cv = callable.getCallableValue() |
- callableString =
- cv.getScope().getEnclosingModule().getName() + "." + cv.getScope().getQualifiedName()
- or
- not exists(cv.getScope()) and
- (
- cv instanceof BuiltinFunctionValue and
- callableString = pretty_builtin_function_value(cv)
- or
- cv instanceof BuiltinMethodValue and
- callableString = pretty_builtin_method_value(cv)
- or
- not cv instanceof BuiltinFunctionValue and
- not cv instanceof BuiltinMethodValue and
- callableString = cv.toString()
- )
- ) and
- result = callableString + " [" + indexString + "]"
- )
- }
+ string toString() { result = repr + " [" + apos + "]" }
}
/** DEPRECATED: Alias for ExternalApiUsedWithUntrustedData */
deprecated class ExternalAPIUsedWithUntrustedData = ExternalApiUsedWithUntrustedData;
-
-/** Gets the fully qualified name for the `BuiltinFunctionValue` bfv. */
-private string pretty_builtin_function_value(BuiltinFunctionValue bfv) {
- exists(Builtin b | b = bfv.(BuiltinFunctionObjectInternal).getBuiltin() |
- result = prefix_with_module_if_found(b)
- )
-}
-
-/** Gets the fully qualified name for the `BuiltinMethodValue` bmv. */
-private string pretty_builtin_method_value(BuiltinMethodValue bmv) {
- exists(Builtin b | b = bmv.(BuiltinMethodObjectInternal).getBuiltin() |
- exists(Builtin cls | cls.isClass() and cls.getMember(b.getName()) = b |
- result = prefix_with_module_if_found(cls) + "." + b.getName()
- )
- or
- not exists(Builtin cls | cls.isClass() and cls.getMember(b.getName()) = b) and
- result = b.getName()
- )
-}
-
-/** Helper predicate that tries to adds module qualifier to `b`. Will succeed even if module not found. */
-private string prefix_with_module_if_found(Builtin b) {
- exists(Builtin mod | mod.isModule() and mod.getMember(b.getName()) = b |
- result = mod.getName() + "." + b.getName()
- )
- or
- not exists(Builtin mod | mod.isModule() and mod.getMember(b.getName()) = b) and
- result = b.getName()
-}
diff --git a/python/ql/test/query-tests/Security/CWE-020-ExternalAPIs/ExternalAPIsUsedWithUntrustedData.expected b/python/ql/test/query-tests/Security/CWE-020-ExternalAPIs/ExternalAPIsUsedWithUntrustedData.expected
index 7438c415858..a346aef9d22 100644
--- a/python/ql/test/query-tests/Security/CWE-020-ExternalAPIs/ExternalAPIsUsedWithUntrustedData.expected
+++ b/python/ql/test/query-tests/Security/CWE-020-ExternalAPIs/ExternalAPIsUsedWithUntrustedData.expected
@@ -1 +1,4 @@
-| hmac.new [param 1] | 2 | 1 |
+| hmac.new [keyword msg] | 1 | 1 |
+| hmac.new [position 1] | 1 | 1 |
+| unknown.lib.func [keyword kw] | 2 | 1 |
+| unknown.lib.func [position 0] | 2 | 1 |
diff --git a/python/ql/test/query-tests/Security/CWE-020-ExternalAPIs/UntrustedDataToExternalAPI.expected b/python/ql/test/query-tests/Security/CWE-020-ExternalAPIs/UntrustedDataToExternalAPI.expected
index e024ef20cba..ead0ff7b093 100644
--- a/python/ql/test/query-tests/Security/CWE-020-ExternalAPIs/UntrustedDataToExternalAPI.expected
+++ b/python/ql/test/query-tests/Security/CWE-020-ExternalAPIs/UntrustedDataToExternalAPI.expected
@@ -1,12 +1,20 @@
edges
| test.py:0:0:0:0 | ModuleVariableNode for test.request | test.py:13:16:13:22 | ControlFlowNode for request |
| test.py:0:0:0:0 | ModuleVariableNode for test.request | test.py:23:16:23:22 | ControlFlowNode for request |
+| test.py:0:0:0:0 | ModuleVariableNode for test.request | test.py:34:12:34:18 | ControlFlowNode for request |
+| test.py:0:0:0:0 | ModuleVariableNode for test.request | test.py:42:12:42:18 | ControlFlowNode for request |
| test.py:5:26:5:32 | ControlFlowNode for ImportMember | test.py:5:26:5:32 | GSSA Variable request |
| test.py:5:26:5:32 | GSSA Variable request | test.py:0:0:0:0 | ModuleVariableNode for test.request |
| test.py:13:16:13:22 | ControlFlowNode for request | test.py:13:16:13:27 | ControlFlowNode for Attribute |
| test.py:13:16:13:27 | ControlFlowNode for Attribute | test.py:15:36:15:39 | ControlFlowNode for data |
| test.py:23:16:23:22 | ControlFlowNode for request | test.py:23:16:23:27 | ControlFlowNode for Attribute |
| test.py:23:16:23:27 | ControlFlowNode for Attribute | test.py:25:44:25:47 | ControlFlowNode for data |
+| test.py:34:12:34:18 | ControlFlowNode for request | test.py:34:12:34:23 | ControlFlowNode for Attribute |
+| test.py:34:12:34:23 | ControlFlowNode for Attribute | test.py:35:10:35:13 | ControlFlowNode for data |
+| test.py:34:12:34:23 | ControlFlowNode for Attribute | test.py:36:13:36:16 | ControlFlowNode for data |
+| test.py:42:12:42:18 | ControlFlowNode for request | test.py:42:12:42:23 | ControlFlowNode for Attribute |
+| test.py:42:12:42:23 | ControlFlowNode for Attribute | test.py:43:22:43:25 | ControlFlowNode for data |
+| test.py:42:12:42:23 | ControlFlowNode for Attribute | test.py:44:25:44:28 | ControlFlowNode for data |
nodes
| test.py:0:0:0:0 | ModuleVariableNode for test.request | semmle.label | ModuleVariableNode for test.request |
| test.py:5:26:5:32 | ControlFlowNode for ImportMember | semmle.label | ControlFlowNode for ImportMember |
@@ -17,7 +25,19 @@ nodes
| test.py:23:16:23:22 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
| test.py:23:16:23:27 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
| test.py:25:44:25:47 | ControlFlowNode for data | semmle.label | ControlFlowNode for data |
+| test.py:34:12:34:18 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
+| test.py:34:12:34:23 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
+| test.py:35:10:35:13 | ControlFlowNode for data | semmle.label | ControlFlowNode for data |
+| test.py:36:13:36:16 | ControlFlowNode for data | semmle.label | ControlFlowNode for data |
+| test.py:42:12:42:18 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
+| test.py:42:12:42:23 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
+| test.py:43:22:43:25 | ControlFlowNode for data | semmle.label | ControlFlowNode for data |
+| test.py:44:25:44:28 | ControlFlowNode for data | semmle.label | ControlFlowNode for data |
subpaths
#select
-| test.py:15:36:15:39 | ControlFlowNode for data | test.py:5:26:5:32 | ControlFlowNode for ImportMember | test.py:15:36:15:39 | ControlFlowNode for data | Call to hmac.new [param 1] with untrusted data from $@. | test.py:5:26:5:32 | ControlFlowNode for ImportMember | ControlFlowNode for ImportMember |
-| test.py:25:44:25:47 | ControlFlowNode for data | test.py:5:26:5:32 | ControlFlowNode for ImportMember | test.py:25:44:25:47 | ControlFlowNode for data | Call to hmac.new [param 1] with untrusted data from $@. | test.py:5:26:5:32 | ControlFlowNode for ImportMember | ControlFlowNode for ImportMember |
+| test.py:15:36:15:39 | ControlFlowNode for data | test.py:5:26:5:32 | ControlFlowNode for ImportMember | test.py:15:36:15:39 | ControlFlowNode for data | Call to hmac.new [position 1] with untrusted data from $@. | test.py:5:26:5:32 | ControlFlowNode for ImportMember | ControlFlowNode for ImportMember |
+| test.py:25:44:25:47 | ControlFlowNode for data | test.py:5:26:5:32 | ControlFlowNode for ImportMember | test.py:25:44:25:47 | ControlFlowNode for data | Call to hmac.new [keyword msg] with untrusted data from $@. | test.py:5:26:5:32 | ControlFlowNode for ImportMember | ControlFlowNode for ImportMember |
+| test.py:35:10:35:13 | ControlFlowNode for data | test.py:5:26:5:32 | ControlFlowNode for ImportMember | test.py:35:10:35:13 | ControlFlowNode for data | Call to unknown.lib.func [position 0] with untrusted data from $@. | test.py:5:26:5:32 | ControlFlowNode for ImportMember | ControlFlowNode for ImportMember |
+| test.py:36:13:36:16 | ControlFlowNode for data | test.py:5:26:5:32 | ControlFlowNode for ImportMember | test.py:36:13:36:16 | ControlFlowNode for data | Call to unknown.lib.func [keyword kw] with untrusted data from $@. | test.py:5:26:5:32 | ControlFlowNode for ImportMember | ControlFlowNode for ImportMember |
+| test.py:43:22:43:25 | ControlFlowNode for data | test.py:5:26:5:32 | ControlFlowNode for ImportMember | test.py:43:22:43:25 | ControlFlowNode for data | Call to unknown.lib.func [position 0] with untrusted data from $@. | test.py:5:26:5:32 | ControlFlowNode for ImportMember | ControlFlowNode for ImportMember |
+| test.py:44:25:44:28 | ControlFlowNode for data | test.py:5:26:5:32 | ControlFlowNode for ImportMember | test.py:44:25:44:28 | ControlFlowNode for data | Call to unknown.lib.func [keyword kw] with untrusted data from $@. | test.py:5:26:5:32 | ControlFlowNode for ImportMember | ControlFlowNode for ImportMember |
diff --git a/python/ql/test/query-tests/Security/CWE-020-ExternalAPIs/test.py b/python/ql/test/query-tests/Security/CWE-020-ExternalAPIs/test.py
index ca4191ded85..dd4edcbcf64 100644
--- a/python/ql/test/query-tests/Security/CWE-020-ExternalAPIs/test.py
+++ b/python/ql/test/query-tests/Security/CWE-020-ExternalAPIs/test.py
@@ -32,16 +32,16 @@ def hmac_example2():
def unknown_lib_1():
from unknown.lib import func
data = request.args.get("data")
- func(data) # TODO: currently not recognized
- func(kw=data) # TODO: currently not recognized
+ func(data)
+ func(kw=data)
@app.route("/unknown-lib-2")
def unknown_lib_2():
import unknown.lib
data = request.args.get("data")
- unknown.lib.func(data) # TODO: currently not recognized
- unknown.lib.func(kw=data) # TODO: currently not recognized
+ unknown.lib.func(data)
+ unknown.lib.func(kw=data)
if __name__ == "__main__":
From 0bdc808a7a3e34c1551df10c2a961c18cd13f109 Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Tue, 16 Aug 2022 15:13:15 +0200
Subject: [PATCH 011/415] Python: Add ExternalAPI test `None.json.dumps`
---
.../ExternalAPIsUsedWithUntrustedData.expected | 2 ++
.../UntrustedDataToExternalAPI.expected | 12 ++++++++++++
.../Security/CWE-020-ExternalAPIs/test.py | 10 ++++++++++
3 files changed, 24 insertions(+)
diff --git a/python/ql/test/query-tests/Security/CWE-020-ExternalAPIs/ExternalAPIsUsedWithUntrustedData.expected b/python/ql/test/query-tests/Security/CWE-020-ExternalAPIs/ExternalAPIsUsedWithUntrustedData.expected
index a346aef9d22..30220ea651d 100644
--- a/python/ql/test/query-tests/Security/CWE-020-ExternalAPIs/ExternalAPIsUsedWithUntrustedData.expected
+++ b/python/ql/test/query-tests/Security/CWE-020-ExternalAPIs/ExternalAPIsUsedWithUntrustedData.expected
@@ -1,3 +1,5 @@
+| None.json.dumps [position 0] | 1 | 1 |
+| builtins.None.json.dumps [position 0] | 1 | 1 |
| hmac.new [keyword msg] | 1 | 1 |
| hmac.new [position 1] | 1 | 1 |
| unknown.lib.func [keyword kw] | 2 | 1 |
diff --git a/python/ql/test/query-tests/Security/CWE-020-ExternalAPIs/UntrustedDataToExternalAPI.expected b/python/ql/test/query-tests/Security/CWE-020-ExternalAPIs/UntrustedDataToExternalAPI.expected
index ead0ff7b093..708054a8dab 100644
--- a/python/ql/test/query-tests/Security/CWE-020-ExternalAPIs/UntrustedDataToExternalAPI.expected
+++ b/python/ql/test/query-tests/Security/CWE-020-ExternalAPIs/UntrustedDataToExternalAPI.expected
@@ -3,6 +3,7 @@ edges
| test.py:0:0:0:0 | ModuleVariableNode for test.request | test.py:23:16:23:22 | ControlFlowNode for request |
| test.py:0:0:0:0 | ModuleVariableNode for test.request | test.py:34:12:34:18 | ControlFlowNode for request |
| test.py:0:0:0:0 | ModuleVariableNode for test.request | test.py:42:12:42:18 | ControlFlowNode for request |
+| test.py:0:0:0:0 | ModuleVariableNode for test.request | test.py:54:12:54:18 | ControlFlowNode for request |
| test.py:5:26:5:32 | ControlFlowNode for ImportMember | test.py:5:26:5:32 | GSSA Variable request |
| test.py:5:26:5:32 | GSSA Variable request | test.py:0:0:0:0 | ModuleVariableNode for test.request |
| test.py:13:16:13:22 | ControlFlowNode for request | test.py:13:16:13:27 | ControlFlowNode for Attribute |
@@ -15,6 +16,10 @@ edges
| test.py:42:12:42:18 | ControlFlowNode for request | test.py:42:12:42:23 | ControlFlowNode for Attribute |
| test.py:42:12:42:23 | ControlFlowNode for Attribute | test.py:43:22:43:25 | ControlFlowNode for data |
| test.py:42:12:42:23 | ControlFlowNode for Attribute | test.py:44:25:44:28 | ControlFlowNode for data |
+| test.py:47:17:47:19 | ControlFlowNode for arg | test.py:50:32:50:34 | ControlFlowNode for arg |
+| test.py:54:12:54:18 | ControlFlowNode for request | test.py:54:12:54:23 | ControlFlowNode for Attribute |
+| test.py:54:12:54:23 | ControlFlowNode for Attribute | test.py:55:17:55:20 | ControlFlowNode for data |
+| test.py:55:17:55:20 | ControlFlowNode for data | test.py:47:17:47:19 | ControlFlowNode for arg |
nodes
| test.py:0:0:0:0 | ModuleVariableNode for test.request | semmle.label | ModuleVariableNode for test.request |
| test.py:5:26:5:32 | ControlFlowNode for ImportMember | semmle.label | ControlFlowNode for ImportMember |
@@ -33,6 +38,11 @@ nodes
| test.py:42:12:42:23 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
| test.py:43:22:43:25 | ControlFlowNode for data | semmle.label | ControlFlowNode for data |
| test.py:44:25:44:28 | ControlFlowNode for data | semmle.label | ControlFlowNode for data |
+| test.py:47:17:47:19 | ControlFlowNode for arg | semmle.label | ControlFlowNode for arg |
+| test.py:50:32:50:34 | ControlFlowNode for arg | semmle.label | ControlFlowNode for arg |
+| test.py:54:12:54:18 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
+| test.py:54:12:54:23 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
+| test.py:55:17:55:20 | ControlFlowNode for data | semmle.label | ControlFlowNode for data |
subpaths
#select
| test.py:15:36:15:39 | ControlFlowNode for data | test.py:5:26:5:32 | ControlFlowNode for ImportMember | test.py:15:36:15:39 | ControlFlowNode for data | Call to hmac.new [position 1] with untrusted data from $@. | test.py:5:26:5:32 | ControlFlowNode for ImportMember | ControlFlowNode for ImportMember |
@@ -41,3 +51,5 @@ subpaths
| test.py:36:13:36:16 | ControlFlowNode for data | test.py:5:26:5:32 | ControlFlowNode for ImportMember | test.py:36:13:36:16 | ControlFlowNode for data | Call to unknown.lib.func [keyword kw] with untrusted data from $@. | test.py:5:26:5:32 | ControlFlowNode for ImportMember | ControlFlowNode for ImportMember |
| test.py:43:22:43:25 | ControlFlowNode for data | test.py:5:26:5:32 | ControlFlowNode for ImportMember | test.py:43:22:43:25 | ControlFlowNode for data | Call to unknown.lib.func [position 0] with untrusted data from $@. | test.py:5:26:5:32 | ControlFlowNode for ImportMember | ControlFlowNode for ImportMember |
| test.py:44:25:44:28 | ControlFlowNode for data | test.py:5:26:5:32 | ControlFlowNode for ImportMember | test.py:44:25:44:28 | ControlFlowNode for data | Call to unknown.lib.func [keyword kw] with untrusted data from $@. | test.py:5:26:5:32 | ControlFlowNode for ImportMember | ControlFlowNode for ImportMember |
+| test.py:50:32:50:34 | ControlFlowNode for arg | test.py:5:26:5:32 | ControlFlowNode for ImportMember | test.py:50:32:50:34 | ControlFlowNode for arg | Call to None.json.dumps [position 0] with untrusted data from $@. | test.py:5:26:5:32 | ControlFlowNode for ImportMember | ControlFlowNode for ImportMember |
+| test.py:50:32:50:34 | ControlFlowNode for arg | test.py:5:26:5:32 | ControlFlowNode for ImportMember | test.py:50:32:50:34 | ControlFlowNode for arg | Call to builtins.None.json.dumps [position 0] with untrusted data from $@. | test.py:5:26:5:32 | ControlFlowNode for ImportMember | ControlFlowNode for ImportMember |
diff --git a/python/ql/test/query-tests/Security/CWE-020-ExternalAPIs/test.py b/python/ql/test/query-tests/Security/CWE-020-ExternalAPIs/test.py
index dd4edcbcf64..18b46298d8a 100644
--- a/python/ql/test/query-tests/Security/CWE-020-ExternalAPIs/test.py
+++ b/python/ql/test/query-tests/Security/CWE-020-ExternalAPIs/test.py
@@ -44,6 +44,16 @@ def unknown_lib_2():
unknown.lib.func(kw=data)
+def handle_this(arg, application = None):
+ if application:
+ # since application could be None, we could end up reporting `None.json.dumps`
+ application.json.dumps(arg)
+
+@app.route("/optional-arg")
+def optional_arg():
+ data = request.args.get("data")
+ handle_this(data)
+
if __name__ == "__main__":
# http://127.0.0.1:5000/hmac-example?data=aGVsbG8gd29ybGQh
app.run(debug=True)
From 70cc986d5f794e433e94faa2173d252cefbf2d7d Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Tue, 16 Aug 2022 15:16:59 +0200
Subject: [PATCH 012/415] Python: Suppress `None.json.dumps` from ExternalAPI
queries
---
.../src/Security/CWE-020-ExternalAPIs/ExternalAPIs.qll | 10 ++++++++--
.../ExternalAPIsUsedWithUntrustedData.expected | 2 --
.../UntrustedDataToExternalAPI.expected | 2 --
3 files changed, 8 insertions(+), 6 deletions(-)
diff --git a/python/ql/src/Security/CWE-020-ExternalAPIs/ExternalAPIs.qll b/python/ql/src/Security/CWE-020-ExternalAPIs/ExternalAPIs.qll
index 94494f3ca9b..ae2c9c273f5 100644
--- a/python/ql/src/Security/CWE-020-ExternalAPIs/ExternalAPIs.qll
+++ b/python/ql/src/Security/CWE-020-ExternalAPIs/ExternalAPIs.qll
@@ -43,7 +43,12 @@ private class DefaultSafeExternalApi extends SafeExternalApi {
}
}
-/** Gets a human readable representation of `node`. */
+/**
+ * Gets a human readable representation of `node`.
+ *
+ * Note that this is only defined for API nodes that are allowed as external APIs,
+ * so `None.json.dumps` will for example not be allowed.
+ */
string apiNodeToStringRepr(API::Node node) {
node = API::builtin(result)
or
@@ -51,7 +56,8 @@ string apiNodeToStringRepr(API::Node node) {
or
exists(API::Node base, string basename |
base.getDepth() < node.getDepth() and
- basename = apiNodeToStringRepr(base)
+ basename = apiNodeToStringRepr(base) and
+ not base = API::builtin("None")
|
exists(string m | node = base.getMember(m) | result = basename + "." + m)
or
diff --git a/python/ql/test/query-tests/Security/CWE-020-ExternalAPIs/ExternalAPIsUsedWithUntrustedData.expected b/python/ql/test/query-tests/Security/CWE-020-ExternalAPIs/ExternalAPIsUsedWithUntrustedData.expected
index 30220ea651d..a346aef9d22 100644
--- a/python/ql/test/query-tests/Security/CWE-020-ExternalAPIs/ExternalAPIsUsedWithUntrustedData.expected
+++ b/python/ql/test/query-tests/Security/CWE-020-ExternalAPIs/ExternalAPIsUsedWithUntrustedData.expected
@@ -1,5 +1,3 @@
-| None.json.dumps [position 0] | 1 | 1 |
-| builtins.None.json.dumps [position 0] | 1 | 1 |
| hmac.new [keyword msg] | 1 | 1 |
| hmac.new [position 1] | 1 | 1 |
| unknown.lib.func [keyword kw] | 2 | 1 |
diff --git a/python/ql/test/query-tests/Security/CWE-020-ExternalAPIs/UntrustedDataToExternalAPI.expected b/python/ql/test/query-tests/Security/CWE-020-ExternalAPIs/UntrustedDataToExternalAPI.expected
index 708054a8dab..bb6ffaab366 100644
--- a/python/ql/test/query-tests/Security/CWE-020-ExternalAPIs/UntrustedDataToExternalAPI.expected
+++ b/python/ql/test/query-tests/Security/CWE-020-ExternalAPIs/UntrustedDataToExternalAPI.expected
@@ -51,5 +51,3 @@ subpaths
| test.py:36:13:36:16 | ControlFlowNode for data | test.py:5:26:5:32 | ControlFlowNode for ImportMember | test.py:36:13:36:16 | ControlFlowNode for data | Call to unknown.lib.func [keyword kw] with untrusted data from $@. | test.py:5:26:5:32 | ControlFlowNode for ImportMember | ControlFlowNode for ImportMember |
| test.py:43:22:43:25 | ControlFlowNode for data | test.py:5:26:5:32 | ControlFlowNode for ImportMember | test.py:43:22:43:25 | ControlFlowNode for data | Call to unknown.lib.func [position 0] with untrusted data from $@. | test.py:5:26:5:32 | ControlFlowNode for ImportMember | ControlFlowNode for ImportMember |
| test.py:44:25:44:28 | ControlFlowNode for data | test.py:5:26:5:32 | ControlFlowNode for ImportMember | test.py:44:25:44:28 | ControlFlowNode for data | Call to unknown.lib.func [keyword kw] with untrusted data from $@. | test.py:5:26:5:32 | ControlFlowNode for ImportMember | ControlFlowNode for ImportMember |
-| test.py:50:32:50:34 | ControlFlowNode for arg | test.py:5:26:5:32 | ControlFlowNode for ImportMember | test.py:50:32:50:34 | ControlFlowNode for arg | Call to None.json.dumps [position 0] with untrusted data from $@. | test.py:5:26:5:32 | ControlFlowNode for ImportMember | ControlFlowNode for ImportMember |
-| test.py:50:32:50:34 | ControlFlowNode for arg | test.py:5:26:5:32 | ControlFlowNode for ImportMember | test.py:50:32:50:34 | ControlFlowNode for arg | Call to builtins.None.json.dumps [position 0] with untrusted data from $@. | test.py:5:26:5:32 | ControlFlowNode for ImportMember | ControlFlowNode for ImportMember |
From af9be6ad7ea6699d3b3c7709d57339664ab20ba6 Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Tue, 16 Aug 2022 15:45:02 +0200
Subject: [PATCH 013/415] Python: Suppress more spurious alerts from
ExternalAPI queries
---
.../CWE-020-ExternalAPIs/ExternalAPIs.qll | 34 +++++++++++++++----
1 file changed, 27 insertions(+), 7 deletions(-)
diff --git a/python/ql/src/Security/CWE-020-ExternalAPIs/ExternalAPIs.qll b/python/ql/src/Security/CWE-020-ExternalAPIs/ExternalAPIs.qll
index ae2c9c273f5..76aa68c5162 100644
--- a/python/ql/src/Security/CWE-020-ExternalAPIs/ExternalAPIs.qll
+++ b/python/ql/src/Security/CWE-020-ExternalAPIs/ExternalAPIs.qll
@@ -39,7 +39,11 @@ deprecated class SafeExternalAPI = SafeExternalApi;
/** The default set of "safe" external APIs. */
private class DefaultSafeExternalApi extends SafeExternalApi {
override DataFlow::CallCfgNode getSafeCall() {
- result = API::builtin(["len", "isinstance", "getattr", "hasattr"]).getACall()
+ result =
+ API::builtin([
+ "len", "enumerate", "isinstance", "getattr", "hasattr", "bool", "float", "int", "repr",
+ "str", "type"
+ ]).getACall()
}
}
@@ -57,28 +61,40 @@ string apiNodeToStringRepr(API::Node node) {
exists(API::Node base, string basename |
base.getDepth() < node.getDepth() and
basename = apiNodeToStringRepr(base) and
- not base = API::builtin("None")
+ not base = API::builtin(["None", "True", "False"])
|
exists(string m | node = base.getMember(m) | result = basename + "." + m)
or
node = base.getReturn() and
- result = basename + "()"
+ result = basename + "()" and
+ not base.getACall() = any(SafeExternalApi safe).getSafeCall()
or
node = base.getAwaited() and
result = basename
)
}
+predicate resolvedCall(CallNode call) {
+ DataFlowPrivate::resolveCall(call, _, _) or
+ DataFlowPrivate::resolveClassCall(call, _)
+}
+
newtype TInterestingExternalApiCall =
TUnresolvedCall(DataFlow::CallCfgNode call) {
exists(call.getLocation().getFile().getRelativePath()) and
- not exists(DataFlowPrivate::DataFlowCall dfCall | dfCall.getNode() = call.getNode()) and
+ not resolvedCall(call.getNode()) and
not call = any(SafeExternalApi safe).getSafeCall()
} or
TResolvedCall(DataFlowPrivate::DataFlowCall call) {
exists(call.getLocation().getFile().getRelativePath()) and
not call.getCallable() = any(SafeExternalApi safe).getSafeCallable() and
- not exists(call.getCallable().getLocation().getFile().getRelativePath())
+ // ignore calls inside codebase, and ignore calls that are marked as safe. This is
+ // only needed as long as we extract dependencies. When we stop doing that, all
+ // targets of resolved calls will be from user-written code.
+ not exists(call.getCallable().getLocation().getFile().getRelativePath()) and
+ not exists(DataFlow::CallCfgNode callCfgNode | callCfgNode.getNode() = call.getNode() |
+ any(SafeExternalApi safe).getSafeCall() = callCfgNode
+ )
}
abstract class InterestingExternalApiCall extends TInterestingExternalApiCall {
@@ -103,7 +119,9 @@ class ResolvedCall extends InterestingExternalApiCall, TResolvedCall {
result = dfCall.getArgument(apos)
}
- override string toString() { result = "ExternalAPI:ResolvedCall" }
+ override string toString() {
+ result = "ExternalAPI:ResolvedCall: " + dfCall.getNode().getNode().toString()
+ }
override string getApiName() {
exists(DataFlow::CallCfgNode call, API::Node apiNode | dfCall.getNode() = call.getNode() |
@@ -124,7 +142,9 @@ class UnresolvedCall extends InterestingExternalApiCall, TUnresolvedCall {
exists(string name | apos.isKeyword(name) | result = call.getArgByName(name))
}
- override string toString() { result = "ExternalAPI:UnresolvedCall" }
+ override string toString() {
+ result = "ExternalAPI:UnresolvedCall: " + call.getNode().getNode().toString()
+ }
override string getApiName() {
exists(API::Node apiNode |
From 7c1320ed4bc13e41d91f4019767bd4c72c21ec37 Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Tue, 16 Aug 2022 15:49:51 +0200
Subject: [PATCH 014/415] Python: Adjust ExternalAPI qhelp files
---
.../ExternalAPIsUsedWithUntrustedData.qhelp | 10 +++-------
.../UntrustedDataToExternalAPI.qhelp | 10 +++-------
2 files changed, 6 insertions(+), 14 deletions(-)
diff --git a/python/ql/src/Security/CWE-020-ExternalAPIs/ExternalAPIsUsedWithUntrustedData.qhelp b/python/ql/src/Security/CWE-020-ExternalAPIs/ExternalAPIsUsedWithUntrustedData.qhelp
index 0627615ca64..e0692ffeae0 100644
--- a/python/ql/src/Security/CWE-020-ExternalAPIs/ExternalAPIsUsedWithUntrustedData.qhelp
+++ b/python/ql/src/Security/CWE-020-ExternalAPIs/ExternalAPIsUsedWithUntrustedData.qhelp
@@ -11,11 +11,9 @@ relevant for security analysis of this application.
An external API is defined as a call to a method that is not defined in the source
code, and is not modeled as a taint step in the default taint library. External APIs may
-be from the Python standard library or dependencies. The query will report the fully qualified name,
-along with [param x], where x indicates the position of
-the parameter receiving the untrusted data. Note that for methods and
-classmethods, parameter 0 represents the class instance or class itself
-respectively.
+be from the Python standard library or dependencies. The query will report the fully
+qualified name, along with [position index] or [keyword name],
+to indicate the argument passing the untrusted data.
Note that an excepted sink might not be included in the results, if it also defines a
taint step. This is the case for pickle.loads which is a sink for the
@@ -24,8 +22,6 @@ Unsafe Deserialization query, but is also a taint step for other queries.
Note: Compared to the Java version of this query, we currently do not give special
care to methods that are overridden in the source code.
-Note: Currently this query will only report results for external packages that are extracted.
-
diff --git a/python/ql/src/Security/CWE-020-ExternalAPIs/UntrustedDataToExternalAPI.qhelp b/python/ql/src/Security/CWE-020-ExternalAPIs/UntrustedDataToExternalAPI.qhelp
index fc7f1a18da9..2b8c31d37b7 100644
--- a/python/ql/src/Security/CWE-020-ExternalAPIs/UntrustedDataToExternalAPI.qhelp
+++ b/python/ql/src/Security/CWE-020-ExternalAPIs/UntrustedDataToExternalAPI.qhelp
@@ -11,11 +11,9 @@ be modeled as either taint steps, or sinks for specific problems.
An external API is defined as a call to a method that is not defined in the source
code, and is not modeled as a taint step in the default taint library. External APIs may
-be from the Python standard library or dependencies. The query will report the fully qualified name,
-along with [param x], where x indicates the position of
-the parameter receiving the untrusted data. Note that for methods and
-classmethods, parameter 0 represents the class instance or class itself
-respectively.
+be from the Python standard library or dependencies. The query will report the fully
+qualified name, along with [position index] or [keyword name],
+to indicate the argument passing the untrusted data.
Note that an excepted sink might not be included in the results, if it also defines a
taint step. This is the case for pickle.loads which is a sink for the
@@ -24,8 +22,6 @@ Unsafe Deserialization query, but is also a taint step for other queries.
Note: Compared to the Java version of this query, we currently do not give special
care to methods that are overridden in the source code.
-Note: Currently this query will only report results for external packages that are extracted.
-
From f2e92bf96322562e879c31392862f30471ed14b1 Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Wed, 24 Aug 2022 10:45:08 +0200
Subject: [PATCH 015/415] Python: Port `py/meta/call-graph`
---
python/ql/src/meta/analysis-quality/CallGraph.ql | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/python/ql/src/meta/analysis-quality/CallGraph.ql b/python/ql/src/meta/analysis-quality/CallGraph.ql
index d23ee43014c..71ca77e87f8 100644
--- a/python/ql/src/meta/analysis-quality/CallGraph.ql
+++ b/python/ql/src/meta/analysis-quality/CallGraph.ql
@@ -1,9 +1,9 @@
/**
* @name Call graph
- * @description An edge in the points-to call graph.
+ * @description An edge in the call graph.
* @kind problem
* @problem.severity recommendation
- * @id py/meta/points-to-call-graph
+ * @id py/meta/call-graph
* @tags meta
* @precision very-low
*/
@@ -11,6 +11,6 @@
import python
import semmle.python.dataflow.new.internal.DataFlowPrivate
-from DataFlowCall c, DataFlowCallableValue f
-where c.getCallable() = f
-select c, "Call to $@", f.getScope(), f.toString()
+from DataFlowCall call, DataFlowCallable target
+where target = viableCallable(call)
+select call, "Call to $@", target.getScope(), target.toString()
From f3ac81a013d90479624c8ffba67c2d64829ccb73 Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Tue, 6 Sep 2022 13:54:17 +0200
Subject: [PATCH 016/415] Python: Expand tests for special method calls
---
.../CallGraph/code/class_special_methods.py | 57 +++++++++++++++----
1 file changed, 47 insertions(+), 10 deletions(-)
diff --git a/python/ql/test/experimental/library-tests/CallGraph/code/class_special_methods.py b/python/ql/test/experimental/library-tests/CallGraph/code/class_special_methods.py
index 7b8df9c4139..454eb5207a1 100644
--- a/python/ql/test/experimental/library-tests/CallGraph/code/class_special_methods.py
+++ b/python/ql/test/experimental/library-tests/CallGraph/code/class_special_methods.py
@@ -1,29 +1,66 @@
-class B(object):
+class Base(object):
def __init__(self, arg):
- print('B.__init__', arg)
+ print("Base.__init__", arg)
self.arg = arg
def __str__(self):
- print('B.__str__')
- return 'B (arg={})'.format(self.arg)
+ print("Base.__str__")
+ return 'Base STR (arg={})'.format(self.arg)
def __add__(self, other):
- print('B.__add__')
- if isinstance(other, B):
- return B(self.arg + other.arg) # $ tt=B.__init__
- return B(self.arg + other) # $ tt=B.__init__
+ print("Base.__add__")
+ if isinstance(other, Base):
+ return Base(self.arg + other.arg) # $ tt=Base.__init__
+ return Base(self.arg + other) # $ tt=Base.__init__
-b = B(1) # $ tt=B.__init__
+ def __call__(self, val):
+ print("Base.__call__", val)
+
+ def wat(self):
+ print("Base.wat")
+ self(43) # $ MISSING: tt=Base.__call__ tt=Sub.__call__
+
+
+b = Base(1) # $ tt=Base.__init__
print(str(b))
# this calls `str(b)` inside
print(b)
+print("\n! calls")
+b(42) # $ MISSING: tt=Base.__call__
+b.wat() # $ pt,tt=Base.wat
-b2 = B(2) # $ tt=B.__init__
+b.__call__(44) # $ pt,tt=Base.__call__
+
+print("\n! b2")
+b2 = Base(2) # $ tt=Base.__init__
# __add__ is called
b + b2
b + 100
+
+
+# ========
+print("\n! Sub")
+
+class Sub(Base):
+ def __add__(self, other):
+ print("Sub.__add__")
+
+ def __call__(self, arg):
+ print("Sub.__call__", arg)
+
+sub = Sub(10) # $ tt=Base.__init__
+sub + 42
+
+sub(55) # $ MISSING: tt=Sub.__call__
+sub.wat() # $ pt,tt=Base.wat
+
+# not possible to indirectly access addition of subclass
+try:
+ super(Sub, sub) + 143
+except TypeError:
+ print("TypeError as expected")
From b5e8bf7882b0599c69f4b73eeca001b18ba01ef7 Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Tue, 6 Sep 2022 14:24:30 +0200
Subject: [PATCH 017/415] Python: Add note about trying
`DataFlowDispatchPointsTo` for `InlineCallGraphTest`
Since I was very confused about no results for __call__, I tried to see
whether I had cheated by making the comparison too unfair. But it didn't
seem to be the case.
---
.../library-tests/CallGraph/InlineCallGraphTest.ql | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/python/ql/test/experimental/library-tests/CallGraph/InlineCallGraphTest.ql b/python/ql/test/experimental/library-tests/CallGraph/InlineCallGraphTest.ql
index d00d0ae1301..327621fb7f2 100644
--- a/python/ql/test/experimental/library-tests/CallGraph/InlineCallGraphTest.ql
+++ b/python/ql/test/experimental/library-tests/CallGraph/InlineCallGraphTest.ql
@@ -6,6 +6,10 @@ private import semmle.python.dataflow.new.internal.DataFlowDispatch as TT
predicate pointsToCallEdge(CallNode call, Function callable) {
exists(call.getLocation().getFile().getRelativePath()) and
exists(callable.getLocation().getFile().getRelativePath()) and
+ // I did try using viableCallable from `DataFlowDispatchPointsTo` (from temporary copy
+ // of `dataflow.new.internal` that still uses points-to) instead of direct
+ // `getACall()` on a Value, but it only added results for `__init__` methods, not for
+ // anything else.
exists(PythonFunctionValue funcValue |
funcValue.getScope() = callable and
call = funcValue.getACall()
From a5c3e850f115c07d5a7b43870af464aa2f887aad Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Tue, 6 Sep 2022 14:34:30 +0200
Subject: [PATCH 018/415] Python: Handle `__call__`
---
.../new/internal/DataFlowDispatch.qll | 42 +++++++++++++++++--
.../dataflow/new/internal/DataFlowPublic.qll | 3 ++
.../dataflow/basic/local.expected | 1 +
.../dataflow/basic/sinks.expected | 1 +
.../dataflow/basic/sources.expected | 1 +
.../CallGraph/InlineCallGraphTest.expected | 4 ++
.../CallGraph/code/class_special_methods.py | 6 +--
7 files changed, 52 insertions(+), 6 deletions(-)
diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
index 37841765030..3a6b431de6e 100644
--- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
@@ -622,7 +622,9 @@ newtype TCallType =
*/
CallTypeMethodAsPlainFunction() or
/** A call to a class. */
- CallTypeClass()
+ CallTypeClass() or
+ /** A call on a class instance, that goes to the `__call__` method of the class */
+ CallTypeClassInstanceCall()
/** A type of call. */
class CallType extends TCallType {
@@ -644,6 +646,9 @@ class CallType extends TCallType {
or
this instanceof CallTypeClass and
result = "CallTypeClass"
+ or
+ this instanceof CallTypeClassInstanceCall and
+ result = "CallTypeClassInstanceCall"
}
}
@@ -664,7 +669,7 @@ private module MethodCalls {
private predicate directCall(
CallNode call, Function target, string functionName, Class cls, AttrRead attr, Node self
) {
- target = findFunctionAccordingToMroKnownStartingClass(cls, cls, functionName) and
+ target = findFunctionAccordingToMroKnownStartingClass(cls, functionName) and
directCall_join(call, functionName, cls, attr, self)
}
@@ -826,6 +831,24 @@ Function invokedFunctionFromClassConstruction(Class cls) {
result = findFunctionAccordingToMroKnownStartingClass(cls, "__init__")
}
+/**
+ * Holds when `call` is a call on a class instance, that goes to the `__call__` method
+ * of the class.
+ *
+ * See https://docs.python.org/3/reference/datamodel.html#object.__call__
+ */
+predicate resolveClassInstanceCall(CallNode call, Function target, Node self) {
+ exists(Class cls |
+ call.getFunction() = classInstanceTracker(cls).asCfgNode() and
+ target = findFunctionAccordingToMroKnownStartingClass(cls, "__call__")
+ or
+ call.getFunction() = selfTracker(cls).asCfgNode() and
+ target = findFunctionAccordingToMro(getADirectSubclass*(cls), "__call__")
+ |
+ self.asCfgNode() = call.getFunction()
+ )
+}
+
// -------------------------------------
// overall call resolution
// -------------------------------------
@@ -844,6 +867,9 @@ predicate resolveCall(ControlFlowNode call, Function target, CallType type) {
resolveClassCall(call, cls) and
target = invokedFunctionFromClassConstruction(cls)
)
+ or
+ type instanceof CallTypeClassInstanceCall and
+ resolveClassInstanceCall(call, target, _)
}
// =============================================================================
@@ -953,6 +979,15 @@ predicate getCallArg(
or
normalCallArg(call, arg, apos)
)
+ or
+ // call on class instance, which goes to `__call__` method
+ type instanceof CallTypeClassInstanceCall and
+ (
+ apos.isSelf() and
+ resolveClassInstanceCall(call, target, arg)
+ or
+ normalCallArg(call, arg, apos)
+ )
)
}
@@ -1010,7 +1045,8 @@ abstract class ExtractedDataFlowCall extends DataFlowCall {
* A resolved call in source code with an underlying `CallNode`.
*
* This is considered normal, compared with special calls such as `obj[0]` calling the
- * `__getitem__` method on the object.
+ * `__getitem__` method on the object. However, this also includes calls that go to the
+ * `__call__` special method.
*/
class NormalCall extends ExtractedDataFlowCall, TNormalCall {
CallNode call;
diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll
index aa48df91c0c..d184dc4117c 100644
--- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll
@@ -51,6 +51,9 @@ newtype TNode =
node = call.getArg(_)
or
node = call.getArgByName(_)
+ or
+ // `self` argument when handling class instance calls (`__call__` special method))
+ node = call.getFunction()
)
or
node = any(AttrNode a).getObject()
diff --git a/python/ql/test/experimental/dataflow/basic/local.expected b/python/ql/test/experimental/dataflow/basic/local.expected
index 74263f31a52..133f740596c 100644
--- a/python/ql/test/experimental/dataflow/basic/local.expected
+++ b/python/ql/test/experimental/dataflow/basic/local.expected
@@ -47,6 +47,7 @@
| test.py:7:1:7:1 | ControlFlowNode for b | test.py:7:1:7:1 | ControlFlowNode for b |
| test.py:7:1:7:1 | GSSA Variable b | test.py:7:1:7:1 | GSSA Variable b |
| test.py:7:5:7:17 | ControlFlowNode for obfuscated_id | test.py:7:5:7:17 | ControlFlowNode for obfuscated_id |
+| test.py:7:5:7:17 | [post] ControlFlowNode for obfuscated_id | test.py:7:5:7:17 | [post] ControlFlowNode for obfuscated_id |
| test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() | test.py:7:1:7:1 | GSSA Variable b |
| test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() |
| test.py:7:5:7:20 | GSSA Variable a | test.py:7:5:7:20 | GSSA Variable a |
diff --git a/python/ql/test/experimental/dataflow/basic/sinks.expected b/python/ql/test/experimental/dataflow/basic/sinks.expected
index 97d7e313dac..cfd8effd77b 100644
--- a/python/ql/test/experimental/dataflow/basic/sinks.expected
+++ b/python/ql/test/experimental/dataflow/basic/sinks.expected
@@ -20,6 +20,7 @@
| test.py:7:1:7:1 | ControlFlowNode for b |
| test.py:7:1:7:1 | GSSA Variable b |
| test.py:7:5:7:17 | ControlFlowNode for obfuscated_id |
+| test.py:7:5:7:17 | [post] ControlFlowNode for obfuscated_id |
| test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() |
| test.py:7:5:7:20 | GSSA Variable a |
| test.py:7:5:7:20 | [pre] ControlFlowNode for obfuscated_id() |
diff --git a/python/ql/test/experimental/dataflow/basic/sources.expected b/python/ql/test/experimental/dataflow/basic/sources.expected
index 97d7e313dac..cfd8effd77b 100644
--- a/python/ql/test/experimental/dataflow/basic/sources.expected
+++ b/python/ql/test/experimental/dataflow/basic/sources.expected
@@ -20,6 +20,7 @@
| test.py:7:1:7:1 | ControlFlowNode for b |
| test.py:7:1:7:1 | GSSA Variable b |
| test.py:7:5:7:17 | ControlFlowNode for obfuscated_id |
+| test.py:7:5:7:17 | [post] ControlFlowNode for obfuscated_id |
| test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() |
| test.py:7:5:7:20 | GSSA Variable a |
| test.py:7:5:7:20 | [pre] ControlFlowNode for obfuscated_id() |
diff --git a/python/ql/test/experimental/library-tests/CallGraph/InlineCallGraphTest.expected b/python/ql/test/experimental/library-tests/CallGraph/InlineCallGraphTest.expected
index 10031cecde5..e748746b01a 100644
--- a/python/ql/test/experimental/library-tests/CallGraph/InlineCallGraphTest.expected
+++ b/python/ql/test/experimental/library-tests/CallGraph/InlineCallGraphTest.expected
@@ -24,6 +24,10 @@ typeTracker_found_pointsTo_notFound
| code/class_more_mro2.py:21:1:21:8 | ControlFlowNode for Attribute() | A.foo |
| code/class_more_mro.py:24:9:24:21 | ControlFlowNode for Attribute() | A.foo |
| code/class_more_mro.py:34:1:34:16 | ControlFlowNode for Attribute() | A.foo |
+| code/class_special_methods.py:22:9:22:16 | ControlFlowNode for self() | Base.__call__ |
+| code/class_special_methods.py:22:9:22:16 | ControlFlowNode for self() | Sub.__call__ |
+| code/class_special_methods.py:33:1:33:5 | ControlFlowNode for b() | Base.__call__ |
+| code/class_special_methods.py:59:1:59:7 | ControlFlowNode for sub() | Sub.__call__ |
| code/class_super.py:43:9:43:21 | ControlFlowNode for Attribute() | A.bar |
| code/class_super.py:44:9:44:27 | ControlFlowNode for Attribute() | A.bar |
| code/class_super.py:63:1:63:18 | ControlFlowNode for Attribute() | A.foo |
diff --git a/python/ql/test/experimental/library-tests/CallGraph/code/class_special_methods.py b/python/ql/test/experimental/library-tests/CallGraph/code/class_special_methods.py
index 454eb5207a1..e765f155f3c 100644
--- a/python/ql/test/experimental/library-tests/CallGraph/code/class_special_methods.py
+++ b/python/ql/test/experimental/library-tests/CallGraph/code/class_special_methods.py
@@ -19,7 +19,7 @@ class Base(object):
def wat(self):
print("Base.wat")
- self(43) # $ MISSING: tt=Base.__call__ tt=Sub.__call__
+ self(43) # $ tt=Base.__call__ tt=Sub.__call__
b = Base(1) # $ tt=Base.__init__
@@ -30,7 +30,7 @@ print(b)
print("\n! calls")
-b(42) # $ MISSING: tt=Base.__call__
+b(42) # $ tt=Base.__call__
b.wat() # $ pt,tt=Base.wat
b.__call__(44) # $ pt,tt=Base.__call__
@@ -56,7 +56,7 @@ class Sub(Base):
sub = Sub(10) # $ tt=Base.__init__
sub + 42
-sub(55) # $ MISSING: tt=Sub.__call__
+sub(55) # $ tt=Sub.__call__
sub.wat() # $ pt,tt=Base.wat
# not possible to indirectly access addition of subclass
From 61410191e7d85122f5f6d33eb33be5f91eee9fe3 Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Tue, 1 Nov 2022 15:43:16 +0100
Subject: [PATCH 019/415] Python: Fix bug in `argumentRoutingTest.ql`
Since `DataFlowPrivate::DataFlowCall` only exists for calls resolved to
a function, we didn't have any results before... but allowing any call
helps things!
---
.../experimental/dataflow/coverage/argumentRoutingTest.ql | 4 ++--
python/ql/test/experimental/dataflow/coverage/classes.py | 2 +-
2 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/python/ql/test/experimental/dataflow/coverage/argumentRoutingTest.ql b/python/ql/test/experimental/dataflow/coverage/argumentRoutingTest.ql
index 1a58715fc1c..6a4db891996 100644
--- a/python/ql/test/experimental/dataflow/coverage/argumentRoutingTest.ql
+++ b/python/ql/test/experimental/dataflow/coverage/argumentRoutingTest.ql
@@ -22,10 +22,10 @@ class Argument1RoutingConfig extends DataFlow::Configuration {
override predicate isSource(DataFlow::Node node) {
node.(DataFlow::CfgNode).getNode().(NameNode).getId() = "arg1"
or
- exists(AssignmentDefinition def, DataFlowPrivate::DataFlowCall call |
+ exists(AssignmentDefinition def, DataFlow::CallCfgNode call |
def.getVariable() = node.(DataFlow::EssaNode).getVar() and
def.getValue() = call.getNode() and
- call.getNode().(CallNode).getFunction().(NameNode).getId().matches("With\\_%")
+ call.getFunction().asCfgNode().(NameNode).getId().matches("With\\_%")
) and
node.(DataFlow::EssaNode).getVar().getName().matches("with\\_%")
}
diff --git a/python/ql/test/experimental/dataflow/coverage/classes.py b/python/ql/test/experimental/dataflow/coverage/classes.py
index af8d19354e7..bd60a93f243 100644
--- a/python/ql/test/experimental/dataflow/coverage/classes.py
+++ b/python/ql/test/experimental/dataflow/coverage/classes.py
@@ -506,7 +506,7 @@ class With_call:
def test_call():
- with_call = With_call() #$ MISSING: arg1="SSA variable with_call" func=With_call.__call__
+ with_call = With_call() #$ arg1="SSA variable with_call" func=With_call.__call__
with_call()
From 7014be204707ec5e0facc9727a0716a205294bad Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Thu, 8 Sep 2022 17:13:57 +0200
Subject: [PATCH 020/415] Python: Reduce size of `attrReadTracker`
On pallets/flask, this reduced the number of tuples from
100866 results => 33060 results
---
.../python/dataflow/new/internal/DataFlowDispatch.qll | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
index 3a6b431de6e..83680b04247 100644
--- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
@@ -585,7 +585,11 @@ Function findFunctionAccordingToMroKnownStartingClass(Class startingClass, strin
/** Gets a reference to the attribute read `attr` */
private TypeTrackingNode attrReadTracker(TypeTracker t, AttrRead attr) {
t.start() and
- result = attr
+ result = attr and
+ attr.getObject() in [
+ classTracker(_), classInstanceTracker(_), selfTracker(_), clsTracker(_),
+ superCallNoArgumentTracker(_), superCallTwoArgumentTracker(_, _)
+ ]
or
exists(TypeTracker t2 | result = attrReadTracker(t2, attr).track(t2, t))
}
From e7a337991af0505ee361631fe333f1bcae23df8d Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Thu, 8 Sep 2022 17:17:04 +0200
Subject: [PATCH 021/415] Python: Accept fix from extractor change
namely the variable access mentioned in
https://github.com/github/codeql/pull/10171
---
.../test/experimental/library-tests/CallGraph/code/shadowing.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/python/ql/test/experimental/library-tests/CallGraph/code/shadowing.py b/python/ql/test/experimental/library-tests/CallGraph/code/shadowing.py
index fdbf554084e..ad91712c481 100644
--- a/python/ql/test/experimental/library-tests/CallGraph/code/shadowing.py
+++ b/python/ql/test/experimental/library-tests/CallGraph/code/shadowing.py
@@ -16,7 +16,7 @@ def test():
class A(object):
def foo(self):
print("A.foo")
- foo() # $ pt=foo MISSING: tt=foo
+ foo() # $ pt,tt=foo
a = A()
a.foo() # $ pt,tt=A.foo
From 5a976cfb1460a37e51c128ae5f7edf71644d6030 Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Thu, 8 Sep 2022 21:11:29 +0200
Subject: [PATCH 022/415] Python: Add more `**kwargs` arg passing tests
---
.../dataflow/coverage/argumentPassing.py | 71 +++++++++++--------
1 file changed, 42 insertions(+), 29 deletions(-)
diff --git a/python/ql/test/experimental/dataflow/coverage/argumentPassing.py b/python/ql/test/experimental/dataflow/coverage/argumentPassing.py
index 7129cded015..68657c2ae05 100644
--- a/python/ql/test/experimental/dataflow/coverage/argumentPassing.py
+++ b/python/ql/test/experimental/dataflow/coverage/argumentPassing.py
@@ -38,6 +38,14 @@ SINK5 = functools.partial(SINK, expected=arg5)
SINK6 = functools.partial(SINK, expected=arg6)
SINK7 = functools.partial(SINK, expected=arg7)
+SINK1_F = functools.partial(SINK_F, unexpected=arg1)
+SINK2_F = functools.partial(SINK_F, unexpected=arg2)
+SINK3_F = functools.partial(SINK_F, unexpected=arg3)
+SINK4_F = functools.partial(SINK_F, unexpected=arg4)
+SINK5_F = functools.partial(SINK_F, unexpected=arg5)
+SINK6_F = functools.partial(SINK_F, unexpected=arg6)
+SINK7_F = functools.partial(SINK_F, unexpected=arg7)
+
def argument_passing(
a,
@@ -64,7 +72,7 @@ def argument_passing(
@expects(7)
def test_argument_passing1():
- argument_passing(arg1, *(arg2, arg3, arg4), e=arg5, **{"f": arg6, "g": arg7}) #$ arg1 arg5 MISSING: arg2 arg3 arg4 arg6 arg7
+ argument_passing(arg1, *(arg2, arg3, arg4), e=arg5, **{"f": arg6, "g": arg7}) #$ arg1 arg5 MISSING: arg2 arg3 arg4 arg6 arg7
@expects(7)
@@ -112,32 +120,6 @@ def test_default_arguments():
with_default_arguments(**{"c": arg3}) #$ MISSING: arg3
-# Nested constructor pattern
-def grab_foo_bar_baz(foo, **kwargs):
- SINK1(foo)
- grab_bar_baz(**kwargs)
-
-
-# It is not possible to pass `bar` into `kwargs`,
-# since `bar` is a valid keyword argument.
-def grab_bar_baz(bar, **kwargs):
- SINK2(bar)
- try:
- SINK2_F(kwargs["bar"])
- except:
- print("OK")
- grab_baz(**kwargs)
-
-
-def grab_baz(baz):
- SINK3(baz)
-
-
-@expects(4)
-def test_grab():
- grab_foo_bar_baz(baz=arg3, bar=arg2, foo=arg1) #$ arg1 MISSING: arg2 func=grab_bar_baz arg3 func=grab_baz
-
-
# All combinations
def test_pos_pos():
def with_pos(a):
@@ -183,7 +165,38 @@ def test_kw_kw():
def test_kw_doublestar():
- def with_doublestar(**a):
- SINK1(a["a"])
+ def with_doublestar(**kwargs):
+ SINK1(kwargs["a"])
with_doublestar(a=arg1) #$ MISSING: arg1 func=test_kw_doublestar.with_doublestar
+
+
+def only_kwargs(**kwargs):
+ SINK1(kwargs["a"])
+ SINK2(kwargs["b"])
+ SINK3_F(kwargs["c"])
+
+@expects(3)
+def test_kwargs():
+ args = {"a": arg1, "b": arg2, "c": "safe"} # $ MISSING: arg1 arg2 func=only_kwargs
+ only_kwargs(**args)
+
+
+def mixed(a, **kwargs):
+ SINK1(a)
+ try:
+ SINK1_F(kwargs["a"]) # since 'a' is a keyword argument, it cannot be part of **kwargs
+ except KeyError:
+ print("OK")
+ SINK2(kwargs["b"])
+ SINK3_F(kwargs["c"])
+
+@expects(4*3)
+def test_mixed():
+ mixed(a=arg1, b=arg2, c="safe") # $ arg1 MISSING: arg2
+
+ args = {"b": arg2, "c": "safe"} # $ MISSING: arg2 func=mixed
+ mixed(a=arg1, **args) # $ arg1
+
+ args = {"a": arg1, "b": arg2, "c": "safe"} # $ MISSING: arg2 func=mixed MISSING: arg1
+ mixed(**args)
From 9b2663034d30d9e129491f8e9aa3e6892f800ada Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Fri, 9 Sep 2022 10:16:25 +0200
Subject: [PATCH 023/415] Python: Change test .ql to also show bad argument
flow
---
.../coverage/argumentPassing_bad_flow_test.py | 63 +++++++++
.../dataflow/coverage/argumentRoutingTest.ql | 129 ++++++++++++------
2 files changed, 149 insertions(+), 43 deletions(-)
create mode 100644 python/ql/test/experimental/dataflow/coverage/argumentPassing_bad_flow_test.py
diff --git a/python/ql/test/experimental/dataflow/coverage/argumentPassing_bad_flow_test.py b/python/ql/test/experimental/dataflow/coverage/argumentPassing_bad_flow_test.py
new file mode 100644
index 00000000000..44451801a9e
--- /dev/null
+++ b/python/ql/test/experimental/dataflow/coverage/argumentPassing_bad_flow_test.py
@@ -0,0 +1,63 @@
+import sys
+import os
+import functools
+
+sys.path.append(os.path.dirname(os.path.dirname((__file__))))
+from testlib import expects
+
+arg = "source"
+arg1 = "source1"
+arg2 = "source2"
+arg3 = "source3"
+arg4 = "source4"
+arg5 = "source5"
+arg6 = "source6"
+arg7 = "source7"
+
+
+def SINK_TEST(x, test):
+ if test(x):
+ print("OK")
+ else:
+ print("Unexpected flow", x)
+
+
+def SINK(x, expected=arg):
+ SINK_TEST(x, test=lambda x: x == expected)
+
+
+def SINK_F(x, unexpected=arg):
+ SINK_TEST(x, test=lambda x: x != unexpected)
+
+
+SINK1 = functools.partial(SINK, expected=arg1)
+SINK2 = functools.partial(SINK, expected=arg2)
+SINK3 = functools.partial(SINK, expected=arg3)
+SINK4 = functools.partial(SINK, expected=arg4)
+SINK5 = functools.partial(SINK, expected=arg5)
+SINK6 = functools.partial(SINK, expected=arg6)
+SINK7 = functools.partial(SINK, expected=arg7)
+
+SINK1_F = functools.partial(SINK_F, unexpected=arg1)
+SINK2_F = functools.partial(SINK_F, unexpected=arg2)
+SINK3_F = functools.partial(SINK_F, unexpected=arg3)
+SINK4_F = functools.partial(SINK_F, unexpected=arg4)
+SINK5_F = functools.partial(SINK_F, unexpected=arg5)
+SINK6_F = functools.partial(SINK_F, unexpected=arg6)
+SINK7_F = functools.partial(SINK_F, unexpected=arg7)
+
+
+def bad_argument_flow_func(arg):
+ SINK1_F(arg)
+
+def bad_argument_flow_func2(arg):
+ SINK2(arg)
+
+def test_bad_argument_flow():
+ # this is just a test to show that the testing setup works
+
+ # in the first one, we pretend we expected no flow for arg1
+ bad_argument_flow_func(arg1) # $ bad1="arg1"
+
+ # in the second one, we pretend we wanted flow for arg2 instead
+ bad_argument_flow_func2(arg1) # $ bad2="arg1"
diff --git a/python/ql/test/experimental/dataflow/coverage/argumentRoutingTest.ql b/python/ql/test/experimental/dataflow/coverage/argumentRoutingTest.ql
index 6a4db891996..2adbd635090 100644
--- a/python/ql/test/experimental/dataflow/coverage/argumentRoutingTest.ql
+++ b/python/ql/test/experimental/dataflow/coverage/argumentRoutingTest.ql
@@ -9,19 +9,60 @@ class Argument1RoutingTest extends RoutingTest {
override string flowTag() { result = "arg1" }
override predicate relevantFlow(DataFlow::Node source, DataFlow::Node sink) {
- exists(Argument1RoutingConfig cfg | cfg.hasFlow(source, sink))
+ exists(Argument1ExtraRoutingConfig cfg | cfg.hasFlow(source, sink))
+ or
+ exists(ArgumentRoutingConfig cfg |
+ cfg.hasFlow(source, sink) and
+ cfg.isArgSource(source, 1) and
+ cfg.isGoodSink(sink, 1)
+ )
}
}
-/**
- * A configuration to check routing of arguments through magic methods.
- */
-class Argument1RoutingConfig extends DataFlow::Configuration {
- Argument1RoutingConfig() { this = "Argument1RoutingConfig" }
+class ArgNumber extends int {
+ ArgNumber() { this in [1 .. 7] }
+}
+
+class ArgumentRoutingConfig extends DataFlow::Configuration {
+ ArgumentRoutingConfig() { this = "ArgumentRoutingConfig" }
+
+ predicate isArgSource(DataFlow::Node node, ArgNumber argNumber) {
+ node.(DataFlow::CfgNode).getNode().(NameNode).getId() = "arg" + argNumber
+ }
+
+ override predicate isSource(DataFlow::Node node) { this.isArgSource(node, _) }
+
+ predicate isGoodSink(DataFlow::Node node, ArgNumber argNumber) {
+ exists(CallNode call |
+ call.getFunction().(NameNode).getId() = "SINK" + argNumber and
+ node.(DataFlow::CfgNode).getNode() = call.getAnArg()
+ )
+ }
+
+ predicate isBadSink(DataFlow::Node node, ArgNumber argNumber) {
+ exists(CallNode call |
+ call.getFunction().(NameNode).getId() = "SINK" + argNumber + "_F" and
+ node.(DataFlow::CfgNode).getNode() = call.getAnArg()
+ )
+ }
+
+ override predicate isSink(DataFlow::Node node) {
+ this.isGoodSink(node, _) or this.isBadSink(node, _)
+ }
+
+ /**
+ * We want to be able to use `arg` in a sequence of calls such as `func(kw=arg); ... ; func(arg)`.
+ * Use-use flow lets the argument to the first call reach the sink inside the second call,
+ * making it seem like we handle all cases even if we only handle the last one.
+ * We make the test honest by preventing flow into source nodes.
+ */
+ override predicate isBarrierIn(DataFlow::Node node) { this.isSource(node) }
+}
+
+class Argument1ExtraRoutingConfig extends DataFlow::Configuration {
+ Argument1ExtraRoutingConfig() { this = "Argument1ExtraRoutingConfig" }
override predicate isSource(DataFlow::Node node) {
- node.(DataFlow::CfgNode).getNode().(NameNode).getId() = "arg1"
- or
exists(AssignmentDefinition def, DataFlow::CallCfgNode call |
def.getVariable() = node.(DataFlow::EssaNode).getVar() and
def.getValue() = call.getNode() and
@@ -46,57 +87,59 @@ class Argument1RoutingConfig extends DataFlow::Configuration {
override predicate isBarrierIn(DataFlow::Node node) { this.isSource(node) }
}
-// for argument 2 and up, we use a generic approach. Change `maxNumArgs` below if we
-// need to increase the maximum number of arguments.
-private int maxNumArgs() { result = 7 }
-
class RestArgumentRoutingTest extends RoutingTest {
- int argNumber;
+ ArgNumber argNumber;
RestArgumentRoutingTest() {
- argNumber in [2 .. maxNumArgs()] and
+ argNumber > 1 and
this = "Argument" + argNumber + "RoutingTest"
}
override string flowTag() { result = "arg" + argNumber }
override predicate relevantFlow(DataFlow::Node source, DataFlow::Node sink) {
- exists(RestArgumentRoutingConfig cfg | cfg.getArgNumber() = argNumber |
- cfg.hasFlow(source, sink)
+ exists(ArgumentRoutingConfig cfg |
+ cfg.hasFlow(source, sink) and
+ cfg.isArgSource(source, argNumber) and
+ cfg.isGoodSink(sink, argNumber)
)
}
}
-/**
- * A configuration to check routing of arguments through magic methods.
- */
-class RestArgumentRoutingConfig extends DataFlow::Configuration {
- int argNumber;
+/** Bad flow from `arg` to `SINK_F` */
+class BadArgumentRoutingTestSinkF extends RoutingTest {
+ ArgNumber argNumber;
- RestArgumentRoutingConfig() {
- argNumber in [2 .. maxNumArgs()] and
- this = "Argument" + argNumber + "RoutingConfig"
- }
+ BadArgumentRoutingTestSinkF() { this = "BadArgumentRoutingTestSinkF" + argNumber }
- /** Gets the argument number this configuration is for. */
- int getArgNumber() { result = argNumber }
+ override string flowTag() { result = "bad" + argNumber }
- override predicate isSource(DataFlow::Node node) {
- node.(DataFlow::CfgNode).getNode().(NameNode).getId() = "arg" + argNumber
- }
-
- override predicate isSink(DataFlow::Node node) {
- exists(CallNode call |
- call.getFunction().(NameNode).getId() = "SINK" + argNumber and
- node.(DataFlow::CfgNode).getNode() = call.getAnArg()
+ override predicate relevantFlow(DataFlow::Node source, DataFlow::Node sink) {
+ exists(ArgumentRoutingConfig cfg |
+ cfg.hasFlow(source, sink) and
+ cfg.isArgSource(source, argNumber) and
+ cfg.isBadSink(sink, argNumber)
+ )
+ }
+}
+
+/** Bad flow from `arg` to `SINK` or `SINK_F`, where `n != m`. */
+class BadArgumentRoutingTestWrongSink extends RoutingTest {
+ ArgNumber argNumber;
+
+ BadArgumentRoutingTestWrongSink() { this = "BadArgumentRoutingTestWrongSink" + argNumber }
+
+ override string flowTag() { result = "bad" + argNumber }
+
+ override predicate relevantFlow(DataFlow::Node source, DataFlow::Node sink) {
+ exists(ArgumentRoutingConfig cfg |
+ cfg.hasFlow(source, sink) and
+ cfg.isArgSource(source, any(ArgNumber i | not i = argNumber)) and
+ (
+ cfg.isGoodSink(sink, argNumber)
+ or
+ cfg.isBadSink(sink, argNumber)
+ )
)
}
-
- /**
- * We want to be able to use `arg` in a sequence of calls such as `func(kw=arg); ... ; func(arg)`.
- * Use-use flow lets the argument to the first call reach the sink inside the second call,
- * making it seem like we handle all cases even if we only handle the last one.
- * We make the test honest by preventing flow into source nodes.
- */
- override predicate isBarrierIn(DataFlow::Node node) { this.isSource(node) }
}
From 5722d231bdae159f391945c762b3834d013733ef Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Tue, 6 Sep 2022 16:21:13 +0200
Subject: [PATCH 024/415] Python: Add basic support for `**kwargs`
For now this is JUST from `**kwargs` in arguments, to `**kwargs`
parameters, and this part is based on field-flow
Note that dataflow-library complains about missing post update nodes for
these. This needs to be ignored, since post update nodes for `**kwargs`
arguments doesn't make sense, it's not possible to alter the dictionary
inside the method.
---
python/ql/lib/semmle/python/Flow.qll | 6 +++++
.../new/internal/DataFlowDispatch.qll | 22 +++++++++++++++++--
.../dataflow/coverage/argumentPassing.py | 8 +++----
.../coverage/dataflow-consistency.expected | 15 +++++++++++++
4 files changed, 45 insertions(+), 6 deletions(-)
diff --git a/python/ql/lib/semmle/python/Flow.qll b/python/ql/lib/semmle/python/Flow.qll
index e736749bba5..bd491d527cc 100644
--- a/python/ql/lib/semmle/python/Flow.qll
+++ b/python/ql/lib/semmle/python/Flow.qll
@@ -411,6 +411,12 @@ class CallNode extends ControlFlowNode {
result.getNode() = this.getNode().getStarArg() and
result.getBasicBlock().dominates(this.getBasicBlock())
}
+
+ /** Gets a dictionary (**) argument of this call, if any. */
+ ControlFlowNode getKwargs() {
+ result.getNode() = this.getNode().getKwargs() and
+ result.getBasicBlock().dominates(this.getBasicBlock())
+ }
}
/** A control flow corresponding to an attribute expression, such as `value.attr` */
diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
index 83680b04247..bcf1ae97940 100644
--- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
@@ -41,7 +41,8 @@ newtype TParameterPosition =
/** Used for `self` in methods, and `cls` in classmethods. */
TSelfParameterPosition() or
TPositionalParameterPosition(int pos) { pos = any(Parameter p).getPosition() } or
- TKeywordParameterPosition(string name) { name = any(Parameter p).getName() }
+ TKeywordParameterPosition(string name) { name = any(Parameter p).getName() } or
+ TDictSplatParameterPosition()
/** A parameter position. */
class ParameterPosition extends TParameterPosition {
@@ -54,6 +55,9 @@ class ParameterPosition extends TParameterPosition {
/** Holds if this position represents a keyword parameter named `name`. */
predicate isKeyword(string name) { this = TKeywordParameterPosition(name) }
+ /** Holds if this position represents a `**kwargs` parameter. */
+ predicate isDictSplat() { this = TDictSplatParameterPosition() }
+
/** Gets a textual representation of this element. */
string toString() {
this.isSelf() and result = "self"
@@ -61,6 +65,8 @@ class ParameterPosition extends TParameterPosition {
exists(int index | this.isPositional(index) and result = "position " + index)
or
exists(string name | this.isKeyword(name) and result = "keyword " + name)
+ or
+ this.isDictSplat() and result = "**"
}
}
@@ -68,7 +74,8 @@ newtype TArgumentPosition =
/** Used for `self` in methods, and `cls` in classmethods. */
TSelfArgumentPosition() or
TPositionalArgumentPosition(int pos) { exists(any(CallNode c).getArg(pos)) } or
- TKeywordArgumentPosition(string name) { exists(any(CallNode c).getArgByName(name)) }
+ TKeywordArgumentPosition(string name) { exists(any(CallNode c).getArgByName(name)) } or
+ TDictSplatArgumentPosition()
/** An argument position. */
class ArgumentPosition extends TArgumentPosition {
@@ -81,6 +88,9 @@ class ArgumentPosition extends TArgumentPosition {
/** Holds if this position represents a keyword argument named `name`. */
predicate isKeyword(string name) { this = TKeywordArgumentPosition(name) }
+ /** Holds if this position represents a `**kwargs` argument. */
+ predicate isDictSplat() { this = TDictSplatArgumentPosition() }
+
/** Gets a textual representation of this element. */
string toString() {
this.isSelf() and result = "self"
@@ -88,6 +98,8 @@ class ArgumentPosition extends TArgumentPosition {
exists(int pos | this.isPositional(pos) and result = "position " + pos)
or
exists(string name | this.isKeyword(name) and result = "keyword " + name)
+ or
+ this.isDictSplat() and result = "**"
}
}
@@ -99,6 +111,8 @@ predicate parameterMatch(ParameterPosition ppos, ArgumentPosition apos) {
exists(int index | ppos.isPositional(index) and apos.isPositional(index))
or
exists(string name | ppos.isKeyword(name) and apos.isKeyword(name))
+ or
+ ppos.isDictSplat() and apos.isDictSplat()
}
// =============================================================================
@@ -183,6 +197,8 @@ abstract class DataFlowFunction extends DataFlowCallable, TFunction {
)
or
exists(string name | ppos.isKeyword(name) | result.getParameter() = func.getArgByName(name))
+ or
+ ppos.isDictSplat() and result.getParameter() = func.getKwarg()
}
}
@@ -893,6 +909,8 @@ private predicate normalCallArg(CallNode call, Node arg, ArgumentPosition apos)
apos.isKeyword(name) and
arg.asCfgNode() = call.getArgByName(name)
)
+ or
+ apos.isDictSplat() and arg.asCfgNode() = call.getKwargs()
}
/**
diff --git a/python/ql/test/experimental/dataflow/coverage/argumentPassing.py b/python/ql/test/experimental/dataflow/coverage/argumentPassing.py
index 68657c2ae05..b2bd64ec268 100644
--- a/python/ql/test/experimental/dataflow/coverage/argumentPassing.py
+++ b/python/ql/test/experimental/dataflow/coverage/argumentPassing.py
@@ -72,7 +72,7 @@ def argument_passing(
@expects(7)
def test_argument_passing1():
- argument_passing(arg1, *(arg2, arg3, arg4), e=arg5, **{"f": arg6, "g": arg7}) #$ arg1 arg5 MISSING: arg2 arg3 arg4 arg6 arg7
+ argument_passing(arg1, *(arg2, arg3, arg4), e=arg5, **{"f": arg6, "g": arg7}) #$ arg1 arg5 arg7 func=argument_passing MISSING: arg2 arg3 arg4 arg6
@expects(7)
@@ -178,7 +178,7 @@ def only_kwargs(**kwargs):
@expects(3)
def test_kwargs():
- args = {"a": arg1, "b": arg2, "c": "safe"} # $ MISSING: arg1 arg2 func=only_kwargs
+ args = {"a": arg1, "b": arg2, "c": "safe"} # $ arg1 arg2 func=only_kwargs
only_kwargs(**args)
@@ -195,8 +195,8 @@ def mixed(a, **kwargs):
def test_mixed():
mixed(a=arg1, b=arg2, c="safe") # $ arg1 MISSING: arg2
- args = {"b": arg2, "c": "safe"} # $ MISSING: arg2 func=mixed
+ args = {"b": arg2, "c": "safe"} # $ arg2 func=mixed
mixed(a=arg1, **args) # $ arg1
- args = {"a": arg1, "b": arg2, "c": "safe"} # $ MISSING: arg2 func=mixed MISSING: arg1
+ args = {"a": arg1, "b": arg2, "c": "safe"} # $ bad1="arg1" arg2 func=mixed
mixed(**args)
diff --git a/python/ql/test/experimental/dataflow/coverage/dataflow-consistency.expected b/python/ql/test/experimental/dataflow/coverage/dataflow-consistency.expected
index 8f4dbd04742..ec828310226 100644
--- a/python/ql/test/experimental/dataflow/coverage/dataflow-consistency.expected
+++ b/python/ql/test/experimental/dataflow/coverage/dataflow-consistency.expected
@@ -17,5 +17,20 @@ uniquePostUpdate
postIsInSameCallable
reverseRead
argHasPostUpdate
+| argumentPassing.py:75:59:75:80 | ControlFlowNode for Dict | ArgumentNode is missing PostUpdateNode. |
+| argumentPassing.py:105:35:105:45 | ControlFlowNode for Dict | ArgumentNode is missing PostUpdateNode. |
+| argumentPassing.py:106:29:106:39 | ControlFlowNode for Dict | ArgumentNode is missing PostUpdateNode. |
+| argumentPassing.py:106:44:106:54 | ControlFlowNode for Dict | ArgumentNode is missing PostUpdateNode. |
+| argumentPassing.py:106:59:106:69 | ControlFlowNode for Dict | ArgumentNode is missing PostUpdateNode. |
+| argumentPassing.py:120:30:120:40 | ControlFlowNode for Dict | ArgumentNode is missing PostUpdateNode. |
+| argumentPassing.py:182:19:182:22 | ControlFlowNode for args | ArgumentNode is missing PostUpdateNode. |
+| argumentPassing.py:196:21:196:24 | ControlFlowNode for args | ArgumentNode is missing PostUpdateNode. |
+| argumentPassing.py:199:13:199:16 | ControlFlowNode for args | ArgumentNode is missing PostUpdateNode. |
+| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/functools.py:400:58:400:70 | ControlFlowNode for Attribute | ArgumentNode is missing PostUpdateNode. |
+| test.py:396:30:396:42 | ControlFlowNode for Dict | ArgumentNode is missing PostUpdateNode. |
+| test.py:422:33:422:46 | ControlFlowNode for Dict | ArgumentNode is missing PostUpdateNode. |
+| test.py:512:30:512:42 | ControlFlowNode for Dict | ArgumentNode is missing PostUpdateNode. |
+| test.py:529:33:529:46 | ControlFlowNode for Dict | ArgumentNode is missing PostUpdateNode. |
+| test.py:838:17:838:18 | ControlFlowNode for dd | ArgumentNode is missing PostUpdateNode. |
postWithInFlow
viableImplInCallContextTooLarge
From eb600f07b74e6dd10f360248945992c40d63314f Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Thu, 8 Sep 2022 21:32:36 +0200
Subject: [PATCH 025/415] Python: Use config for `dataflow-consistency.ql`
And ignore post-update nodes for `**kwargs` arguments
---
.../dataflow/TestUtil/DataFlowConsistency.qll | 11 +++++++++++
.../dataflow/basic/dataflow-consistency.ql | 3 ++-
.../callgraph_crosstalk/dataflow-consistency.ql | 3 ++-
.../dataflow/calls/dataflow-consistency.ql | 3 ++-
.../dataflow/consistency/dataflow-consistency.ql | 3 ++-
.../coverage/dataflow-consistency.expected | 15 ---------------
.../dataflow/coverage/dataflow-consistency.ql | 3 ++-
.../dataflow/fieldflow/dataflow-consistency.ql | 3 ++-
.../dataflow/global-flow/dataflow-consistency.ql | 3 ++-
.../dataflow/match/dataflow-consistency.ql | 3 ++-
.../dataflow/pep_328/dataflow-consistency.ql | 3 ++-
.../dataflow/regression/dataflow-consistency.ql | 3 ++-
.../strange-essaflow/dataflow-consistency.ql | 3 ++-
.../tainttracking/basic/dataflow-consistency.ql | 3 ++-
.../commonSanitizer/dataflow-consistency.ql | 3 ++-
.../customSanitizer/dataflow-consistency.ql | 3 ++-
.../dataflow-consistency.ql | 3 ++-
.../dataflow-consistency.ql | 3 ++-
.../unwanted-global-flow/dataflow-consistency.ql | 3 ++-
.../dataflow/typetracking/dataflow-consistency.ql | 3 ++-
.../variable-capture/dataflow-consistency.ql | 3 ++-
.../CallGraph/dataflow-consistency.ql | 3 ++-
.../ApiGraphs/py3/dataflow-consistency.ql | 3 ++-
.../frameworks/django-orm/dataflow-consistency.ql | 3 ++-
24 files changed, 55 insertions(+), 37 deletions(-)
create mode 100644 python/ql/test/experimental/dataflow/TestUtil/DataFlowConsistency.qll
diff --git a/python/ql/test/experimental/dataflow/TestUtil/DataFlowConsistency.qll b/python/ql/test/experimental/dataflow/TestUtil/DataFlowConsistency.qll
new file mode 100644
index 00000000000..b11c3ecd838
--- /dev/null
+++ b/python/ql/test/experimental/dataflow/TestUtil/DataFlowConsistency.qll
@@ -0,0 +1,11 @@
+import semmle.python.dataflow.new.DataFlow::DataFlow
+import semmle.python.dataflow.new.internal.DataFlowPrivate
+import semmle.python.dataflow.new.internal.DataFlowImplConsistency::Consistency
+
+// TODO: this should be promoted to be a REAL consistency query by being placed in
+// `python/ql/consistency-queries`. For for now it resides here.
+private class MyConsistencyConfiguration extends ConsistencyConfiguration {
+ override predicate argHasPostUpdateExclude(ArgumentNode n) {
+ exists(ArgumentPosition apos | n.argumentOf(_, apos) and apos.isDictSplat())
+ }
+}
diff --git a/python/ql/test/experimental/dataflow/basic/dataflow-consistency.ql b/python/ql/test/experimental/dataflow/basic/dataflow-consistency.ql
index 6743fa10d27..3dda6701a83 100644
--- a/python/ql/test/experimental/dataflow/basic/dataflow-consistency.ql
+++ b/python/ql/test/experimental/dataflow/basic/dataflow-consistency.ql
@@ -1 +1,2 @@
-import semmle.python.dataflow.new.internal.DataFlowImplConsistency::Consistency
+import python
+import experimental.dataflow.TestUtil.DataFlowConsistency
diff --git a/python/ql/test/experimental/dataflow/callgraph_crosstalk/dataflow-consistency.ql b/python/ql/test/experimental/dataflow/callgraph_crosstalk/dataflow-consistency.ql
index 6743fa10d27..3dda6701a83 100644
--- a/python/ql/test/experimental/dataflow/callgraph_crosstalk/dataflow-consistency.ql
+++ b/python/ql/test/experimental/dataflow/callgraph_crosstalk/dataflow-consistency.ql
@@ -1 +1,2 @@
-import semmle.python.dataflow.new.internal.DataFlowImplConsistency::Consistency
+import python
+import experimental.dataflow.TestUtil.DataFlowConsistency
diff --git a/python/ql/test/experimental/dataflow/calls/dataflow-consistency.ql b/python/ql/test/experimental/dataflow/calls/dataflow-consistency.ql
index 6743fa10d27..3dda6701a83 100644
--- a/python/ql/test/experimental/dataflow/calls/dataflow-consistency.ql
+++ b/python/ql/test/experimental/dataflow/calls/dataflow-consistency.ql
@@ -1 +1,2 @@
-import semmle.python.dataflow.new.internal.DataFlowImplConsistency::Consistency
+import python
+import experimental.dataflow.TestUtil.DataFlowConsistency
diff --git a/python/ql/test/experimental/dataflow/consistency/dataflow-consistency.ql b/python/ql/test/experimental/dataflow/consistency/dataflow-consistency.ql
index 6743fa10d27..3dda6701a83 100644
--- a/python/ql/test/experimental/dataflow/consistency/dataflow-consistency.ql
+++ b/python/ql/test/experimental/dataflow/consistency/dataflow-consistency.ql
@@ -1 +1,2 @@
-import semmle.python.dataflow.new.internal.DataFlowImplConsistency::Consistency
+import python
+import experimental.dataflow.TestUtil.DataFlowConsistency
diff --git a/python/ql/test/experimental/dataflow/coverage/dataflow-consistency.expected b/python/ql/test/experimental/dataflow/coverage/dataflow-consistency.expected
index ec828310226..8f4dbd04742 100644
--- a/python/ql/test/experimental/dataflow/coverage/dataflow-consistency.expected
+++ b/python/ql/test/experimental/dataflow/coverage/dataflow-consistency.expected
@@ -17,20 +17,5 @@ uniquePostUpdate
postIsInSameCallable
reverseRead
argHasPostUpdate
-| argumentPassing.py:75:59:75:80 | ControlFlowNode for Dict | ArgumentNode is missing PostUpdateNode. |
-| argumentPassing.py:105:35:105:45 | ControlFlowNode for Dict | ArgumentNode is missing PostUpdateNode. |
-| argumentPassing.py:106:29:106:39 | ControlFlowNode for Dict | ArgumentNode is missing PostUpdateNode. |
-| argumentPassing.py:106:44:106:54 | ControlFlowNode for Dict | ArgumentNode is missing PostUpdateNode. |
-| argumentPassing.py:106:59:106:69 | ControlFlowNode for Dict | ArgumentNode is missing PostUpdateNode. |
-| argumentPassing.py:120:30:120:40 | ControlFlowNode for Dict | ArgumentNode is missing PostUpdateNode. |
-| argumentPassing.py:182:19:182:22 | ControlFlowNode for args | ArgumentNode is missing PostUpdateNode. |
-| argumentPassing.py:196:21:196:24 | ControlFlowNode for args | ArgumentNode is missing PostUpdateNode. |
-| argumentPassing.py:199:13:199:16 | ControlFlowNode for args | ArgumentNode is missing PostUpdateNode. |
-| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/functools.py:400:58:400:70 | ControlFlowNode for Attribute | ArgumentNode is missing PostUpdateNode. |
-| test.py:396:30:396:42 | ControlFlowNode for Dict | ArgumentNode is missing PostUpdateNode. |
-| test.py:422:33:422:46 | ControlFlowNode for Dict | ArgumentNode is missing PostUpdateNode. |
-| test.py:512:30:512:42 | ControlFlowNode for Dict | ArgumentNode is missing PostUpdateNode. |
-| test.py:529:33:529:46 | ControlFlowNode for Dict | ArgumentNode is missing PostUpdateNode. |
-| test.py:838:17:838:18 | ControlFlowNode for dd | ArgumentNode is missing PostUpdateNode. |
postWithInFlow
viableImplInCallContextTooLarge
diff --git a/python/ql/test/experimental/dataflow/coverage/dataflow-consistency.ql b/python/ql/test/experimental/dataflow/coverage/dataflow-consistency.ql
index 6743fa10d27..3dda6701a83 100644
--- a/python/ql/test/experimental/dataflow/coverage/dataflow-consistency.ql
+++ b/python/ql/test/experimental/dataflow/coverage/dataflow-consistency.ql
@@ -1 +1,2 @@
-import semmle.python.dataflow.new.internal.DataFlowImplConsistency::Consistency
+import python
+import experimental.dataflow.TestUtil.DataFlowConsistency
diff --git a/python/ql/test/experimental/dataflow/fieldflow/dataflow-consistency.ql b/python/ql/test/experimental/dataflow/fieldflow/dataflow-consistency.ql
index 6743fa10d27..3dda6701a83 100644
--- a/python/ql/test/experimental/dataflow/fieldflow/dataflow-consistency.ql
+++ b/python/ql/test/experimental/dataflow/fieldflow/dataflow-consistency.ql
@@ -1 +1,2 @@
-import semmle.python.dataflow.new.internal.DataFlowImplConsistency::Consistency
+import python
+import experimental.dataflow.TestUtil.DataFlowConsistency
diff --git a/python/ql/test/experimental/dataflow/global-flow/dataflow-consistency.ql b/python/ql/test/experimental/dataflow/global-flow/dataflow-consistency.ql
index 6743fa10d27..3dda6701a83 100644
--- a/python/ql/test/experimental/dataflow/global-flow/dataflow-consistency.ql
+++ b/python/ql/test/experimental/dataflow/global-flow/dataflow-consistency.ql
@@ -1 +1,2 @@
-import semmle.python.dataflow.new.internal.DataFlowImplConsistency::Consistency
+import python
+import experimental.dataflow.TestUtil.DataFlowConsistency
diff --git a/python/ql/test/experimental/dataflow/match/dataflow-consistency.ql b/python/ql/test/experimental/dataflow/match/dataflow-consistency.ql
index 6743fa10d27..3dda6701a83 100644
--- a/python/ql/test/experimental/dataflow/match/dataflow-consistency.ql
+++ b/python/ql/test/experimental/dataflow/match/dataflow-consistency.ql
@@ -1 +1,2 @@
-import semmle.python.dataflow.new.internal.DataFlowImplConsistency::Consistency
+import python
+import experimental.dataflow.TestUtil.DataFlowConsistency
diff --git a/python/ql/test/experimental/dataflow/pep_328/dataflow-consistency.ql b/python/ql/test/experimental/dataflow/pep_328/dataflow-consistency.ql
index 6743fa10d27..3dda6701a83 100644
--- a/python/ql/test/experimental/dataflow/pep_328/dataflow-consistency.ql
+++ b/python/ql/test/experimental/dataflow/pep_328/dataflow-consistency.ql
@@ -1 +1,2 @@
-import semmle.python.dataflow.new.internal.DataFlowImplConsistency::Consistency
+import python
+import experimental.dataflow.TestUtil.DataFlowConsistency
diff --git a/python/ql/test/experimental/dataflow/regression/dataflow-consistency.ql b/python/ql/test/experimental/dataflow/regression/dataflow-consistency.ql
index 6743fa10d27..3dda6701a83 100644
--- a/python/ql/test/experimental/dataflow/regression/dataflow-consistency.ql
+++ b/python/ql/test/experimental/dataflow/regression/dataflow-consistency.ql
@@ -1 +1,2 @@
-import semmle.python.dataflow.new.internal.DataFlowImplConsistency::Consistency
+import python
+import experimental.dataflow.TestUtil.DataFlowConsistency
diff --git a/python/ql/test/experimental/dataflow/strange-essaflow/dataflow-consistency.ql b/python/ql/test/experimental/dataflow/strange-essaflow/dataflow-consistency.ql
index 6743fa10d27..3dda6701a83 100644
--- a/python/ql/test/experimental/dataflow/strange-essaflow/dataflow-consistency.ql
+++ b/python/ql/test/experimental/dataflow/strange-essaflow/dataflow-consistency.ql
@@ -1 +1,2 @@
-import semmle.python.dataflow.new.internal.DataFlowImplConsistency::Consistency
+import python
+import experimental.dataflow.TestUtil.DataFlowConsistency
diff --git a/python/ql/test/experimental/dataflow/tainttracking/basic/dataflow-consistency.ql b/python/ql/test/experimental/dataflow/tainttracking/basic/dataflow-consistency.ql
index 6743fa10d27..3dda6701a83 100644
--- a/python/ql/test/experimental/dataflow/tainttracking/basic/dataflow-consistency.ql
+++ b/python/ql/test/experimental/dataflow/tainttracking/basic/dataflow-consistency.ql
@@ -1 +1,2 @@
-import semmle.python.dataflow.new.internal.DataFlowImplConsistency::Consistency
+import python
+import experimental.dataflow.TestUtil.DataFlowConsistency
diff --git a/python/ql/test/experimental/dataflow/tainttracking/commonSanitizer/dataflow-consistency.ql b/python/ql/test/experimental/dataflow/tainttracking/commonSanitizer/dataflow-consistency.ql
index 6743fa10d27..3dda6701a83 100644
--- a/python/ql/test/experimental/dataflow/tainttracking/commonSanitizer/dataflow-consistency.ql
+++ b/python/ql/test/experimental/dataflow/tainttracking/commonSanitizer/dataflow-consistency.ql
@@ -1 +1,2 @@
-import semmle.python.dataflow.new.internal.DataFlowImplConsistency::Consistency
+import python
+import experimental.dataflow.TestUtil.DataFlowConsistency
diff --git a/python/ql/test/experimental/dataflow/tainttracking/customSanitizer/dataflow-consistency.ql b/python/ql/test/experimental/dataflow/tainttracking/customSanitizer/dataflow-consistency.ql
index 6743fa10d27..3dda6701a83 100644
--- a/python/ql/test/experimental/dataflow/tainttracking/customSanitizer/dataflow-consistency.ql
+++ b/python/ql/test/experimental/dataflow/tainttracking/customSanitizer/dataflow-consistency.ql
@@ -1 +1,2 @@
-import semmle.python.dataflow.new.internal.DataFlowImplConsistency::Consistency
+import python
+import experimental.dataflow.TestUtil.DataFlowConsistency
diff --git a/python/ql/test/experimental/dataflow/tainttracking/defaultAdditionalTaintStep-py3/dataflow-consistency.ql b/python/ql/test/experimental/dataflow/tainttracking/defaultAdditionalTaintStep-py3/dataflow-consistency.ql
index 6743fa10d27..3dda6701a83 100644
--- a/python/ql/test/experimental/dataflow/tainttracking/defaultAdditionalTaintStep-py3/dataflow-consistency.ql
+++ b/python/ql/test/experimental/dataflow/tainttracking/defaultAdditionalTaintStep-py3/dataflow-consistency.ql
@@ -1 +1,2 @@
-import semmle.python.dataflow.new.internal.DataFlowImplConsistency::Consistency
+import python
+import experimental.dataflow.TestUtil.DataFlowConsistency
diff --git a/python/ql/test/experimental/dataflow/tainttracking/defaultAdditionalTaintStep/dataflow-consistency.ql b/python/ql/test/experimental/dataflow/tainttracking/defaultAdditionalTaintStep/dataflow-consistency.ql
index 6743fa10d27..3dda6701a83 100644
--- a/python/ql/test/experimental/dataflow/tainttracking/defaultAdditionalTaintStep/dataflow-consistency.ql
+++ b/python/ql/test/experimental/dataflow/tainttracking/defaultAdditionalTaintStep/dataflow-consistency.ql
@@ -1 +1,2 @@
-import semmle.python.dataflow.new.internal.DataFlowImplConsistency::Consistency
+import python
+import experimental.dataflow.TestUtil.DataFlowConsistency
diff --git a/python/ql/test/experimental/dataflow/tainttracking/unwanted-global-flow/dataflow-consistency.ql b/python/ql/test/experimental/dataflow/tainttracking/unwanted-global-flow/dataflow-consistency.ql
index 6743fa10d27..3dda6701a83 100644
--- a/python/ql/test/experimental/dataflow/tainttracking/unwanted-global-flow/dataflow-consistency.ql
+++ b/python/ql/test/experimental/dataflow/tainttracking/unwanted-global-flow/dataflow-consistency.ql
@@ -1 +1,2 @@
-import semmle.python.dataflow.new.internal.DataFlowImplConsistency::Consistency
+import python
+import experimental.dataflow.TestUtil.DataFlowConsistency
diff --git a/python/ql/test/experimental/dataflow/typetracking/dataflow-consistency.ql b/python/ql/test/experimental/dataflow/typetracking/dataflow-consistency.ql
index 6743fa10d27..3dda6701a83 100644
--- a/python/ql/test/experimental/dataflow/typetracking/dataflow-consistency.ql
+++ b/python/ql/test/experimental/dataflow/typetracking/dataflow-consistency.ql
@@ -1 +1,2 @@
-import semmle.python.dataflow.new.internal.DataFlowImplConsistency::Consistency
+import python
+import experimental.dataflow.TestUtil.DataFlowConsistency
diff --git a/python/ql/test/experimental/dataflow/variable-capture/dataflow-consistency.ql b/python/ql/test/experimental/dataflow/variable-capture/dataflow-consistency.ql
index 6743fa10d27..3dda6701a83 100644
--- a/python/ql/test/experimental/dataflow/variable-capture/dataflow-consistency.ql
+++ b/python/ql/test/experimental/dataflow/variable-capture/dataflow-consistency.ql
@@ -1 +1,2 @@
-import semmle.python.dataflow.new.internal.DataFlowImplConsistency::Consistency
+import python
+import experimental.dataflow.TestUtil.DataFlowConsistency
diff --git a/python/ql/test/experimental/library-tests/CallGraph/dataflow-consistency.ql b/python/ql/test/experimental/library-tests/CallGraph/dataflow-consistency.ql
index 6743fa10d27..3dda6701a83 100644
--- a/python/ql/test/experimental/library-tests/CallGraph/dataflow-consistency.ql
+++ b/python/ql/test/experimental/library-tests/CallGraph/dataflow-consistency.ql
@@ -1 +1,2 @@
-import semmle.python.dataflow.new.internal.DataFlowImplConsistency::Consistency
+import python
+import experimental.dataflow.TestUtil.DataFlowConsistency
diff --git a/python/ql/test/library-tests/ApiGraphs/py3/dataflow-consistency.ql b/python/ql/test/library-tests/ApiGraphs/py3/dataflow-consistency.ql
index 6743fa10d27..3dda6701a83 100644
--- a/python/ql/test/library-tests/ApiGraphs/py3/dataflow-consistency.ql
+++ b/python/ql/test/library-tests/ApiGraphs/py3/dataflow-consistency.ql
@@ -1 +1,2 @@
-import semmle.python.dataflow.new.internal.DataFlowImplConsistency::Consistency
+import python
+import experimental.dataflow.TestUtil.DataFlowConsistency
diff --git a/python/ql/test/library-tests/frameworks/django-orm/dataflow-consistency.ql b/python/ql/test/library-tests/frameworks/django-orm/dataflow-consistency.ql
index 6743fa10d27..3dda6701a83 100644
--- a/python/ql/test/library-tests/frameworks/django-orm/dataflow-consistency.ql
+++ b/python/ql/test/library-tests/frameworks/django-orm/dataflow-consistency.ql
@@ -1 +1,2 @@
-import semmle.python.dataflow.new.internal.DataFlowImplConsistency::Consistency
+import python
+import experimental.dataflow.TestUtil.DataFlowConsistency
From 503ad544e95ba051d5adf1935ef7c45bb497b27c Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Fri, 9 Sep 2022 13:53:01 +0200
Subject: [PATCH 026/415] Python: Remove impossible flow for `**kwargs` params
---
.../dataflow/new/internal/DataFlowPrivate.qll | 24 +++++++++++++++++++
.../dataflow/coverage/argumentPassing.py | 2 +-
2 files changed, 25 insertions(+), 1 deletion(-)
diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll
index 8c4c807d3a8..cab77c83290 100644
--- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll
@@ -54,6 +54,28 @@ class SyntheticPreUpdateNode extends Node, TSyntheticPreUpdateNode {
override Location getLocation() { result = node.getLocation() }
}
+/**
+ * Ensures that the a `**kwargs` parameter will not contain elements with names of
+ * keyword parameters.
+ *
+ * For example, for the function below, it's not possible that the `kwargs` dictionary
+ * can contain an element with the name `a`, since that parameter can be given as a
+ * keyword argument.
+ *
+ * ```py
+ * def func(a, **kwargs):
+ * ...
+ * ```
+ */
+private predicate dictSplatParameterNodeClearStep(ParameterNode n, DictionaryElementContent c) {
+ exists(DataFlowCallable callable, ParameterPosition dictSplatPos, ParameterPosition keywordPos |
+ dictSplatPos.isDictSplat() and
+ n = callable.getParameter(dictSplatPos) and
+ exists(callable.getParameter(keywordPos)) and
+ keywordPos.isKeyword(c.getKey())
+ )
+}
+
abstract class PostUpdateNodeImpl extends Node {
/** Gets the node before the state update. */
abstract Node getPreUpdateNode();
@@ -673,6 +695,8 @@ predicate clearsContent(Node n, Content c) {
attributeClearStep(n, c)
or
FlowSummaryImpl::Private::Steps::summaryClearsContent(n, c)
+ or
+ dictSplatParameterNodeClearStep(n, c)
}
/**
diff --git a/python/ql/test/experimental/dataflow/coverage/argumentPassing.py b/python/ql/test/experimental/dataflow/coverage/argumentPassing.py
index b2bd64ec268..eca8ec9c1f5 100644
--- a/python/ql/test/experimental/dataflow/coverage/argumentPassing.py
+++ b/python/ql/test/experimental/dataflow/coverage/argumentPassing.py
@@ -198,5 +198,5 @@ def test_mixed():
args = {"b": arg2, "c": "safe"} # $ arg2 func=mixed
mixed(a=arg1, **args) # $ arg1
- args = {"a": arg1, "b": arg2, "c": "safe"} # $ bad1="arg1" arg2 func=mixed
+ args = {"a": arg1, "b": arg2, "c": "safe"} # $ arg2 func=mixed MISSING: arg1
mixed(**args)
From 215a03d94811468101f4c7b776c1bb2d94b7843e Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Fri, 9 Sep 2022 13:59:54 +0200
Subject: [PATCH 027/415] Python: Support flow to `**kwargs` param from keyword
arg
---
.../new/internal/DataFlowDispatch.qll | 7 ++++-
.../dataflow/new/internal/DataFlowPrivate.qll | 31 +++++++++++++++++++
.../dataflow/new/internal/DataFlowPublic.qll | 3 +-
.../dataflow/coverage/argumentPassing.py | 4 +--
.../experimental/dataflow/coverage/test.py | 4 +--
5 files changed, 43 insertions(+), 6 deletions(-)
diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
index bcf1ae97940..1b9e2ded82f 100644
--- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
@@ -910,7 +910,12 @@ private predicate normalCallArg(CallNode call, Node arg, ArgumentPosition apos)
arg.asCfgNode() = call.getArgByName(name)
)
or
- apos.isDictSplat() and arg.asCfgNode() = call.getKwargs()
+ apos.isDictSplat() and
+ (
+ arg.asCfgNode() = call.getKwargs()
+ or
+ arg = TSynthDictSplatArgumentNode(call)
+ )
}
/**
diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll
index cab77c83290..d6abd40e721 100644
--- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll
@@ -54,6 +54,33 @@ class SyntheticPreUpdateNode extends Node, TSyntheticPreUpdateNode {
override Location getLocation() { result = node.getLocation() }
}
+/**
+ * A (synthetic) data-flow node that represents all keyword arguments, as if they had
+ * been passed in a `**kwargs` argument.
+ */
+class SynthDictSplatArgumentNode extends Node, TSynthDictSplatArgumentNode {
+ CallNode node;
+
+ SynthDictSplatArgumentNode() { this = TSynthDictSplatArgumentNode(node) }
+
+ override string toString() { result = "SynthDictSplatArgumentNode" }
+
+ override Scope getScope() { result = node.getScope() }
+
+ override Location getLocation() { result = node.getLocation() }
+}
+
+private predicate synthDictSplatArgumentNodeStoreStep(
+ ArgumentNode nodeFrom, DictionaryElementContent c, SynthDictSplatArgumentNode nodeTo
+) {
+ exists(string name, CallNode call, ArgumentPosition keywordPos |
+ nodeTo = TSynthDictSplatArgumentNode(call) and
+ getCallArg(call, _, _, nodeFrom, keywordPos) and
+ keywordPos.isKeyword(name) and
+ c.getKey() = name
+ )
+}
+
/**
* Ensures that the a `**kwargs` parameter will not contain elements with names of
* keyword parameters.
@@ -426,6 +453,8 @@ predicate storeStep(Node nodeFrom, Content c, Node nodeTo) {
any(Orm::AdditionalOrmSteps es).storeStep(nodeFrom, c, nodeTo)
or
FlowSummaryImpl::Private::Steps::summaryStoreStep(nodeFrom, c, nodeTo)
+ or
+ synthDictSplatArgumentNodeStoreStep(nodeFrom, c, nodeTo)
}
/**
@@ -752,6 +781,8 @@ predicate nodeIsHidden(Node n) {
n instanceof SummaryNode
or
n instanceof SummaryParameterNode
+ or
+ n instanceof SynthDictSplatArgumentNode
}
class LambdaCallKind = Unit;
diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll
index d184dc4117c..761202ecb94 100644
--- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll
@@ -112,7 +112,8 @@ newtype TNode =
} or
TSummaryParameterNode(FlowSummaryImpl::Public::SummarizedCallable c, ParameterPosition pos) {
FlowSummaryImpl::Private::summaryParameterNodeRange(c, pos)
- }
+ } or
+ TSynthDictSplatArgumentNode(CallNode call) { exists(call.getArgByName(_)) }
class TParameterNode = TCfgNode or TSummaryParameterNode;
diff --git a/python/ql/test/experimental/dataflow/coverage/argumentPassing.py b/python/ql/test/experimental/dataflow/coverage/argumentPassing.py
index eca8ec9c1f5..4d87e750572 100644
--- a/python/ql/test/experimental/dataflow/coverage/argumentPassing.py
+++ b/python/ql/test/experimental/dataflow/coverage/argumentPassing.py
@@ -168,7 +168,7 @@ def test_kw_doublestar():
def with_doublestar(**kwargs):
SINK1(kwargs["a"])
- with_doublestar(a=arg1) #$ MISSING: arg1 func=test_kw_doublestar.with_doublestar
+ with_doublestar(a=arg1) #$ arg1 func=test_kw_doublestar.with_doublestar
def only_kwargs(**kwargs):
@@ -193,7 +193,7 @@ def mixed(a, **kwargs):
@expects(4*3)
def test_mixed():
- mixed(a=arg1, b=arg2, c="safe") # $ arg1 MISSING: arg2
+ mixed(a=arg1, b=arg2, c="safe") # $ arg1 arg2
args = {"b": arg2, "c": "safe"} # $ arg2 func=mixed
mixed(a=arg1, **args) # $ arg1
diff --git a/python/ql/test/experimental/dataflow/coverage/test.py b/python/ql/test/experimental/dataflow/coverage/test.py
index 0e06a828700..f4aadf433b2 100644
--- a/python/ql/test/experimental/dataflow/coverage/test.py
+++ b/python/ql/test/experimental/dataflow/coverage/test.py
@@ -409,7 +409,7 @@ def f_extra_keyword(a, **b):
def test_call_extra_keyword():
- SINK(f_extra_keyword(NONSOURCE, b=SOURCE)) #$ MISSING: flow="SOURCE -> f_extra_keyword(..)"
+ SINK(f_extra_keyword(NONSOURCE, b=SOURCE)) #$ flow="SOURCE -> f_extra_keyword(..)"
# return the name of the first extra keyword argument
@@ -519,7 +519,7 @@ def test_lambda_extra_pos():
def test_lambda_extra_keyword():
f_extra_keyword = lambda a, **b: b["b"]
- SINK(f_extra_keyword(NONSOURCE, b=SOURCE)) #$ MISSING: flow="SOURCE -> f_extra_keyword(..)"
+ SINK(f_extra_keyword(NONSOURCE, b=SOURCE)) #$ flow="SOURCE -> f_extra_keyword(..)"
# call the function with our source as the name of the keyword argument
From c687df4ddc862ae8737f916e771de5938db87909 Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Fri, 9 Sep 2022 17:05:08 +0200
Subject: [PATCH 028/415] Python: Support flow to keyword param from `**kwargs`
arg
When resolving merge conflict after flow-summaries was merged, this is
the original commit where I introduced ParameterNodeImpl, so this is the
commit where differences in that implementation was committed...
I removed TParameterNode, since I could not see we we gain anything from
having it.
---
.../new/internal/DataFlowDispatch.qll | 6 +-
.../dataflow/new/internal/DataFlowPrivate.qll | 69 +++++++++++++++++++
.../dataflow/new/internal/DataFlowPublic.qll | 17 +++--
.../dataflow/TestUtil/DataFlowConsistency.qll | 8 +++
.../dataflow/basic/callGraphSinks.expected | 1 +
.../dataflow/basic/local.expected | 1 +
.../dataflow/basic/sinks.expected | 1 +
.../dataflow/basic/sources.expected | 1 +
.../dataflow/coverage/argumentPassing.py | 10 +--
.../experimental/dataflow/coverage/test.py | 4 +-
10 files changed, 101 insertions(+), 17 deletions(-)
diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
index 1b9e2ded82f..ffca8390d86 100644
--- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
@@ -199,6 +199,8 @@ abstract class DataFlowFunction extends DataFlowCallable, TFunction {
exists(string name | ppos.isKeyword(name) | result.getParameter() = func.getArgByName(name))
or
ppos.isDictSplat() and result.getParameter() = func.getKwarg()
+ or
+ ppos.isDictSplat() and result = TSynthDictSplatParameterNode(this)
}
}
@@ -1194,7 +1196,9 @@ abstract class ParameterNodeImpl extends Node {
* Holds if this node is the parameter of callable `c` at the
* position `ppos`.
*/
- abstract predicate isParameterOf(DataFlowCallable c, ParameterPosition ppos);
+ predicate isParameterOf(DataFlowCallable c, ParameterPosition ppos) {
+ this = c.getParameter(ppos)
+ }
}
/** A parameter for a library callable with a flow summary. */
diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll
index d6abd40e721..7b2a1d81580 100644
--- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll
@@ -39,6 +39,9 @@ predicate isArgumentNode(ArgumentNode arg, DataFlowCall c, ArgumentPosition pos)
//--------
predicate isExpressionNode(ControlFlowNode node) { node.getNode() instanceof Expr }
+// =============================================================================
+// SyntheticPreUpdateNode
+// =============================================================================
class SyntheticPreUpdateNode extends Node, TSyntheticPreUpdateNode {
CallNode node;
@@ -54,6 +57,9 @@ class SyntheticPreUpdateNode extends Node, TSyntheticPreUpdateNode {
override Location getLocation() { result = node.getLocation() }
}
+// =============================================================================
+// **kwargs (DictSplat) related
+// =============================================================================
/**
* A (synthetic) data-flow node that represents all keyword arguments, as if they had
* been passed in a `**kwargs` argument.
@@ -98,11 +104,70 @@ private predicate dictSplatParameterNodeClearStep(ParameterNode n, DictionaryEle
exists(DataFlowCallable callable, ParameterPosition dictSplatPos, ParameterPosition keywordPos |
dictSplatPos.isDictSplat() and
n = callable.getParameter(dictSplatPos) and
+ not n instanceof SynthDictSplatParameterNode and
exists(callable.getParameter(keywordPos)) and
keywordPos.isKeyword(c.getKey())
)
}
+/**
+ * A synthetic data-flow node to allow flow to keyword parameters from a `**kwargs` argument.
+ *
+ * Take the code snippet below as an example. Since the call only has a `**kwargs` argument,
+ * with a `**` argument position, we add this synthetic parameter node with `**` parameter position,
+ * and a read step to the `p1` parameter.
+ *
+ * ```py
+ * def foo(p1): ...
+ *
+ * kwargs = {"p1": 42}
+ * foo(**kwargs)
+ * ```
+ *
+ *
+ * Note that this will introduce a bit of redundancy in cases like
+ *
+ * ```py
+ * foo(p1=taint(1), p2=taint(2))
+ * ```
+ *
+ * where direct keyword matching is possible, since we construct a synthesized dict
+ * splat argument (`SynthDictSplatArgumentNode`) at the call site, which means that
+ * `taint(1)` will flow into `p1` both via normal keyword matching and via the synthesized
+ * nodes (and similarly for `p2`). However, this redundancy is OK since
+ * (a) it means that type-tracking through keyword arguments also works in most cases,
+ * (b) read/store steps can be avoided when direct keyword matching is possible, and
+ * hence access path limits are not a concern, and
+ * (c) since the synthesized nodes are hidden, the reported data-flow paths will be
+ * collapsed anyway.
+ */
+class SynthDictSplatParameterNode extends ParameterNodeImpl, TSynthDictSplatParameterNode {
+ DataFlowCallable callable;
+
+ SynthDictSplatParameterNode() { this = TSynthDictSplatParameterNode(callable) }
+
+ override string toString() { result = "SynthDictSplatParameterNode" }
+
+ override Scope getScope() { result = callable.getScope() }
+
+ override Location getLocation() { result = callable.getLocation() }
+
+ override Parameter getParameter() { none() }
+}
+
+predicate synthDictSplatParameterNodeReadStep(
+ SynthDictSplatParameterNode nodeFrom, DictionaryElementContent c, ParameterNode nodeTo
+) {
+ exists(DataFlowCallable callable, ParameterPosition ppos |
+ nodeFrom = TSynthDictSplatParameterNode(callable) and
+ nodeTo = callable.getParameter(ppos) and
+ ppos.isKeyword(c.getKey())
+ )
+}
+
+// =============================================================================
+// PostUpdateNode
+// =============================================================================
abstract class PostUpdateNodeImpl extends Node {
/** Gets the node before the state update. */
abstract Node getPreUpdateNode();
@@ -624,6 +689,8 @@ predicate readStep(Node nodeFrom, Content c, Node nodeTo) {
attributeReadStep(nodeFrom, c, nodeTo)
or
FlowSummaryImpl::Private::Steps::summaryReadStep(nodeFrom, c, nodeTo)
+ or
+ synthDictSplatParameterNodeReadStep(nodeFrom, c, nodeTo)
}
/** Data flows from a sequence to a subscript of the sequence. */
@@ -783,6 +850,8 @@ predicate nodeIsHidden(Node n) {
n instanceof SummaryParameterNode
or
n instanceof SynthDictSplatArgumentNode
+ or
+ n instanceof SynthDictSplatParameterNode
}
class LambdaCallKind = Unit;
diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll
index 761202ecb94..94d7bb70543 100644
--- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll
@@ -64,7 +64,7 @@ newtype TNode =
exists(Class cls, Function func, ParameterDefinition def |
func = cls.getAMethod() and
not hasStaticmethodDecorator(func) and
- // this matches what we do in ParameterNode
+ // this matches what we do in ExtractedParameterNode
def.getDefiningNode() = node and
def.getParameter() = func.getArg(0)
)
@@ -113,9 +113,12 @@ newtype TNode =
TSummaryParameterNode(FlowSummaryImpl::Public::SummarizedCallable c, ParameterPosition pos) {
FlowSummaryImpl::Private::summaryParameterNodeRange(c, pos)
} or
- TSynthDictSplatArgumentNode(CallNode call) { exists(call.getArgByName(_)) }
-
-class TParameterNode = TCfgNode or TSummaryParameterNode;
+ /** A synthetic node to capture keyword arguments that are passed to a `**kwargs` parameter. */
+ TSynthDictSplatArgumentNode(CallNode call) { exists(call.getArgByName(_)) } or
+ /** A synthetic node to allow flow to keyword parameters from a `**kwargs` argument. */
+ TSynthDictSplatParameterNode(DataFlowCallable callable) {
+ exists(ParameterPosition ppos | ppos.isKeyword(_) | exists(callable.getParameter(ppos)))
+ }
/** Helper for `Node::getEnclosingCallable`. */
private DataFlowCallable getCallableScope(Scope s) {
@@ -292,7 +295,7 @@ ExprNode exprNode(DataFlowExpr e) { result.getNode().getNode() = e }
* The value of a parameter at function entry, viewed as a node in a data
* flow graph.
*/
-class ParameterNode extends Node, TParameterNode instanceof ParameterNodeImpl {
+class ParameterNode extends Node instanceof ParameterNodeImpl {
/** Gets the parameter corresponding to this node, if any. */
final Parameter getParameter() { result = super.getParameter() }
}
@@ -304,10 +307,6 @@ class ExtractedParameterNode extends ParameterNodeImpl, CfgNode {
ExtractedParameterNode() { node = def.getDefiningNode() }
- override predicate isParameterOf(DataFlowCallable c, ParameterPosition ppos) {
- this = c.getParameter(ppos)
- }
-
override Parameter getParameter() { result = def.getParameter() }
}
diff --git a/python/ql/test/experimental/dataflow/TestUtil/DataFlowConsistency.qll b/python/ql/test/experimental/dataflow/TestUtil/DataFlowConsistency.qll
index b11c3ecd838..442c5fff770 100644
--- a/python/ql/test/experimental/dataflow/TestUtil/DataFlowConsistency.qll
+++ b/python/ql/test/experimental/dataflow/TestUtil/DataFlowConsistency.qll
@@ -8,4 +8,12 @@ private class MyConsistencyConfiguration extends ConsistencyConfiguration {
override predicate argHasPostUpdateExclude(ArgumentNode n) {
exists(ArgumentPosition apos | n.argumentOf(_, apos) and apos.isDictSplat())
}
+
+ override predicate reverseReadExclude(Node n) {
+ // since `self`/`cls` parameters can be marked as implicit argument to `super()`,
+ // they will have PostUpdateNodes. We have a read-step from the synthetic `**kwargs`
+ // parameter, but dataflow-consistency queries should _not_ complain about there not
+ // being a post-update node for the synthetic `**kwargs` parameter.
+ n instanceof SynthDictSplatParameterNode
+ }
}
diff --git a/python/ql/test/experimental/dataflow/basic/callGraphSinks.expected b/python/ql/test/experimental/dataflow/basic/callGraphSinks.expected
index 17f3028ae23..e4b8f905530 100644
--- a/python/ql/test/experimental/dataflow/basic/callGraphSinks.expected
+++ b/python/ql/test/experimental/dataflow/basic/callGraphSinks.expected
@@ -1,2 +1,3 @@
+| test.py:1:1:1:21 | SynthDictSplatParameterNode |
| test.py:1:19:1:19 | ControlFlowNode for x |
| test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() |
diff --git a/python/ql/test/experimental/dataflow/basic/local.expected b/python/ql/test/experimental/dataflow/basic/local.expected
index 133f740596c..cdf40018ed0 100644
--- a/python/ql/test/experimental/dataflow/basic/local.expected
+++ b/python/ql/test/experimental/dataflow/basic/local.expected
@@ -5,6 +5,7 @@
| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:1:1:1:21 | ControlFlowNode for FunctionExpr |
| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:1:5:1:17 | GSSA Variable obfuscated_id |
| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:7:5:7:17 | ControlFlowNode for obfuscated_id |
+| test.py:1:1:1:21 | SynthDictSplatParameterNode | test.py:1:1:1:21 | SynthDictSplatParameterNode |
| test.py:1:5:1:17 | ControlFlowNode for obfuscated_id | test.py:1:5:1:17 | ControlFlowNode for obfuscated_id |
| test.py:1:5:1:17 | GSSA Variable obfuscated_id | test.py:1:5:1:17 | GSSA Variable obfuscated_id |
| test.py:1:5:1:17 | GSSA Variable obfuscated_id | test.py:7:5:7:17 | ControlFlowNode for obfuscated_id |
diff --git a/python/ql/test/experimental/dataflow/basic/sinks.expected b/python/ql/test/experimental/dataflow/basic/sinks.expected
index cfd8effd77b..944f8190aa5 100644
--- a/python/ql/test/experimental/dataflow/basic/sinks.expected
+++ b/python/ql/test/experimental/dataflow/basic/sinks.expected
@@ -3,6 +3,7 @@
| test.py:0:0:0:0 | GSSA Variable b |
| test.py:0:0:0:0 | SSA variable $ |
| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr |
+| test.py:1:1:1:21 | SynthDictSplatParameterNode |
| test.py:1:5:1:17 | ControlFlowNode for obfuscated_id |
| test.py:1:5:1:17 | GSSA Variable obfuscated_id |
| test.py:1:19:1:19 | ControlFlowNode for x |
diff --git a/python/ql/test/experimental/dataflow/basic/sources.expected b/python/ql/test/experimental/dataflow/basic/sources.expected
index cfd8effd77b..944f8190aa5 100644
--- a/python/ql/test/experimental/dataflow/basic/sources.expected
+++ b/python/ql/test/experimental/dataflow/basic/sources.expected
@@ -3,6 +3,7 @@
| test.py:0:0:0:0 | GSSA Variable b |
| test.py:0:0:0:0 | SSA variable $ |
| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr |
+| test.py:1:1:1:21 | SynthDictSplatParameterNode |
| test.py:1:5:1:17 | ControlFlowNode for obfuscated_id |
| test.py:1:5:1:17 | GSSA Variable obfuscated_id |
| test.py:1:19:1:19 | ControlFlowNode for x |
diff --git a/python/ql/test/experimental/dataflow/coverage/argumentPassing.py b/python/ql/test/experimental/dataflow/coverage/argumentPassing.py
index 4d87e750572..a3e4752ffd2 100644
--- a/python/ql/test/experimental/dataflow/coverage/argumentPassing.py
+++ b/python/ql/test/experimental/dataflow/coverage/argumentPassing.py
@@ -72,7 +72,7 @@ def argument_passing(
@expects(7)
def test_argument_passing1():
- argument_passing(arg1, *(arg2, arg3, arg4), e=arg5, **{"f": arg6, "g": arg7}) #$ arg1 arg5 arg7 func=argument_passing MISSING: arg2 arg3 arg4 arg6
+ argument_passing(arg1, *(arg2, arg3, arg4), e=arg5, **{"f": arg6, "g": arg7}) #$ arg1 arg5 arg6 arg7 func=argument_passing MISSING: arg2 arg3 arg4
@expects(7)
@@ -102,8 +102,8 @@ def with_multiple_kw_args(a, b, c):
def test_multiple_kw_args():
with_multiple_kw_args(b=arg2, c=arg3, a=arg1) #$ arg1 arg2 arg3
with_multiple_kw_args(arg1, *(arg2,), arg3) #$ arg1 MISSING: arg2 arg3
- with_multiple_kw_args(arg1, **{"c": arg3}, b=arg2) #$ arg1 arg2 MISSING: arg3
- with_multiple_kw_args(**{"b": arg2}, **{"c": arg3}, **{"a": arg1}) #$ MISSING: arg1 arg2 arg3
+ with_multiple_kw_args(arg1, **{"c": arg3}, b=arg2) #$ arg1 arg2 arg3 func=with_multiple_kw_args
+ with_multiple_kw_args(**{"b": arg2}, **{"c": arg3}, **{"a": arg1}) #$ arg1 arg2 arg3 func=with_multiple_kw_args
def with_default_arguments(a=arg1, b=arg2, c=arg3): #$ arg1 arg2 arg3 func=with_default_arguments
@@ -117,7 +117,7 @@ def test_default_arguments():
with_default_arguments()
with_default_arguments(arg1) #$ arg1
with_default_arguments(b=arg2) #$ arg2
- with_default_arguments(**{"c": arg3}) #$ MISSING: arg3
+ with_default_arguments(**{"c": arg3}) #$ arg3 func=with_default_arguments
# All combinations
@@ -198,5 +198,5 @@ def test_mixed():
args = {"b": arg2, "c": "safe"} # $ arg2 func=mixed
mixed(a=arg1, **args) # $ arg1
- args = {"a": arg1, "b": arg2, "c": "safe"} # $ arg2 func=mixed MISSING: arg1
+ args = {"a": arg1, "b": arg2, "c": "safe"} # $ arg1 arg2 func=mixed
mixed(**args)
diff --git a/python/ql/test/experimental/dataflow/coverage/test.py b/python/ql/test/experimental/dataflow/coverage/test.py
index f4aadf433b2..1a7b2cbb6fa 100644
--- a/python/ql/test/experimental/dataflow/coverage/test.py
+++ b/python/ql/test/experimental/dataflow/coverage/test.py
@@ -393,7 +393,7 @@ def test_call_unpack_iterable():
def test_call_unpack_mapping():
- SINK(second(NONSOURCE, **{"b": SOURCE})) #$ MISSING: flow="SOURCE -> second(..)"
+ SINK(second(NONSOURCE, **{"b": SOURCE})) #$ flow="SOURCE -> second(..)"
def f_extra_pos(a, *b):
@@ -509,7 +509,7 @@ def test_lambda_unpack_mapping():
def second(a, b):
return b
- SINK(second(NONSOURCE, **{"b": SOURCE})) #$ MISSING: flow="SOURCE -> second(..)"
+ SINK(second(NONSOURCE, **{"b": SOURCE})) #$ flow="SOURCE -> second(..)"
def test_lambda_extra_pos():
From b6314dd19ddda18b7f4f1dc25560082fc37350ea Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Mon, 12 Sep 2022 15:44:10 +0200
Subject: [PATCH 029/415] Python: Add `*args` tests
---
.../dataflow/coverage/argumentPassing.py | 48 +++++++++++++++++++
1 file changed, 48 insertions(+)
diff --git a/python/ql/test/experimental/dataflow/coverage/argumentPassing.py b/python/ql/test/experimental/dataflow/coverage/argumentPassing.py
index a3e4752ffd2..c825d86b9f5 100644
--- a/python/ql/test/experimental/dataflow/coverage/argumentPassing.py
+++ b/python/ql/test/experimental/dataflow/coverage/argumentPassing.py
@@ -200,3 +200,51 @@ def test_mixed():
args = {"a": arg1, "b": arg2, "c": "safe"} # $ arg1 arg2 func=mixed
mixed(**args)
+
+
+def starargs_only(*args):
+ SINK1(args[0])
+ SINK2(args[1])
+ SINK3_F(args[2])
+
+@expects(3*3)
+def test_only_starargs():
+ starargs_only(arg1, arg2, "safe") # $ MISSING: arg1 arg2
+
+ args = (arg2, "safe")
+ starargs_only(arg1, *args) # $ MISSING: arg1 arg2
+
+ args = (arg1, arg2, "safe")
+ starargs_only(*args) # $ MISSING: arg1 arg2
+
+
+def starargs_mixed(a, *args):
+ SINK1(a)
+ SINK2(args[0])
+ SINK3_F(args[1])
+
+@expects(3*8)
+def test_stararg_mixed():
+ starargs_mixed(arg1, arg2, "safe") # $ arg1 MISSING: arg2
+
+ args = (arg2, "safe")
+ starargs_mixed(arg1, *args) # $ arg1 MISSING: arg2
+
+ args = (arg1, arg2, "safe")
+ starargs_mixed(*args) # $ MISSING: arg1 arg2
+
+ args = (arg1, arg2, "safe")
+ more_args = ("foo", "bar")
+ starargs_mixed(*args, *more_args) # $ MISSING: arg1 arg2
+
+ empty_args = ()
+
+ # adding first/last
+ starargs_mixed(arg1, arg2, "safe", *empty_args) # $ arg1 MISSING: arg2
+ starargs_mixed(*empty_args, arg1, arg2, "safe") # $ MISSING: arg1 arg2
+
+ # adding before/after *args
+ args = (arg2, "safe")
+ starargs_mixed(arg1, *args, *empty_args) # $ arg1 MISSING: arg2
+ args = (arg2, "safe")
+ starargs_mixed(arg1, *empty_args, *args) # $ arg1 MISSING: arg2
From db921ac036f1b5ad8aead972f4d985f6baf79816 Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Mon, 12 Sep 2022 16:45:57 +0200
Subject: [PATCH 030/415] Python: Add basic support for `*args`
---
.../new/internal/DataFlowDispatch.qll | 42 +++++++++++++++++++
.../dataflow/TestUtil/DataFlowConsistency.qll | 2 +
.../dataflow/coverage/argumentPassing.py | 8 ++--
3 files changed, 48 insertions(+), 4 deletions(-)
diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
index ffca8390d86..f5b628f7e45 100644
--- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
@@ -42,6 +42,13 @@ newtype TParameterPosition =
TSelfParameterPosition() or
TPositionalParameterPosition(int pos) { pos = any(Parameter p).getPosition() } or
TKeywordParameterPosition(string name) { name = any(Parameter p).getName() } or
+ TStarArgsParameterPosition(int pos) {
+ // since `.getPosition` does not work for `*args`, we need *args parameter positions
+ // at index 1 larger than the largest positional parameter position (and 0 must be
+ // included as well). This is a bit of an over-approximation.
+ pos = 0 or
+ pos = any(Parameter p).getPosition() + 1
+ } or
TDictSplatParameterPosition()
/** A parameter position. */
@@ -55,6 +62,9 @@ class ParameterPosition extends TParameterPosition {
/** Holds if this position represents a keyword parameter named `name`. */
predicate isKeyword(string name) { this = TKeywordParameterPosition(name) }
+ /** Holds if this position represents a `*args` parameter at (0-based) `index`. */
+ predicate isStarArgs(int index) { this = TStarArgsParameterPosition(index) }
+
/** Holds if this position represents a `**kwargs` parameter. */
predicate isDictSplat() { this = TDictSplatParameterPosition() }
@@ -66,6 +76,8 @@ class ParameterPosition extends TParameterPosition {
or
exists(string name | this.isKeyword(name) and result = "keyword " + name)
or
+ exists(int index | this.isStarArgs(index) and result = "*args at " + index)
+ or
this.isDictSplat() and result = "**"
}
}
@@ -75,6 +87,7 @@ newtype TArgumentPosition =
TSelfArgumentPosition() or
TPositionalArgumentPosition(int pos) { exists(any(CallNode c).getArg(pos)) } or
TKeywordArgumentPosition(string name) { exists(any(CallNode c).getArgByName(name)) } or
+ TStarArgsArgumentPosition(int pos) { exists(Call c | c.getPositionalArg(pos) instanceof Starred) } or
TDictSplatArgumentPosition()
/** An argument position. */
@@ -88,6 +101,9 @@ class ArgumentPosition extends TArgumentPosition {
/** Holds if this position represents a keyword argument named `name`. */
predicate isKeyword(string name) { this = TKeywordArgumentPosition(name) }
+ /** Holds if this position represents a `*args` argument at (0-based) `index`. */
+ predicate isStarArgs(int index) { this = TStarArgsArgumentPosition(index) }
+
/** Holds if this position represents a `**kwargs` argument. */
predicate isDictSplat() { this = TDictSplatArgumentPosition() }
@@ -99,6 +115,8 @@ class ArgumentPosition extends TArgumentPosition {
or
exists(string name | this.isKeyword(name) and result = "keyword " + name)
or
+ exists(int index | this.isStarArgs(index) and result = "*args at " + index)
+ or
this.isDictSplat() and result = "**"
}
}
@@ -112,6 +130,8 @@ predicate parameterMatch(ParameterPosition ppos, ArgumentPosition apos) {
or
exists(string name | ppos.isKeyword(name) and apos.isKeyword(name))
or
+ exists(int index | ppos.isStarArgs(index) and apos.isStarArgs(index))
+ or
ppos.isDictSplat() and apos.isDictSplat()
}
@@ -198,6 +218,22 @@ abstract class DataFlowFunction extends DataFlowCallable, TFunction {
or
exists(string name | ppos.isKeyword(name) | result.getParameter() = func.getArgByName(name))
or
+ exists(int index |
+ ppos.isStarArgs(index) and
+ result.getParameter() = func.getVararg()
+ |
+ // a `*args` parameter comes after the last positional parameter. We need to take
+ // self parameter into account, so for
+ // `def func(foo, bar, *args)` it should be index 2 (1 + max-index == 1 + 1)
+ // `class A: def func(self, foo, bar, *args)` it should be index 2 (1 + max-index - 1 == 1 + 2 - 1)
+ index =
+ 1 + max(int positionalIndex | exists(func.getArg(positionalIndex)) | positionalIndex) -
+ this.positionalOffset()
+ or
+ // no positional argument
+ not exists(func.getArg(_)) and index = 0
+ )
+ or
ppos.isDictSplat() and result.getParameter() = func.getKwarg()
or
ppos.isDictSplat() and result = TSynthDictSplatParameterNode(this)
@@ -912,6 +948,12 @@ private predicate normalCallArg(CallNode call, Node arg, ArgumentPosition apos)
arg.asCfgNode() = call.getArgByName(name)
)
or
+ exists(int index |
+ apos.isStarArgs(index) and
+ arg.asCfgNode() = call.getStarArg() and
+ call.getStarArg().getNode() = call.getNode().getPositionalArg(index).(Starred).getValue()
+ )
+ or
apos.isDictSplat() and
(
arg.asCfgNode() = call.getKwargs()
diff --git a/python/ql/test/experimental/dataflow/TestUtil/DataFlowConsistency.qll b/python/ql/test/experimental/dataflow/TestUtil/DataFlowConsistency.qll
index 442c5fff770..8d85437b7d3 100644
--- a/python/ql/test/experimental/dataflow/TestUtil/DataFlowConsistency.qll
+++ b/python/ql/test/experimental/dataflow/TestUtil/DataFlowConsistency.qll
@@ -6,6 +6,8 @@ import semmle.python.dataflow.new.internal.DataFlowImplConsistency::Consistency
// `python/ql/consistency-queries`. For for now it resides here.
private class MyConsistencyConfiguration extends ConsistencyConfiguration {
override predicate argHasPostUpdateExclude(ArgumentNode n) {
+ exists(ArgumentPosition apos | n.argumentOf(_, apos) and apos.isStarArgs(_))
+ or
exists(ArgumentPosition apos | n.argumentOf(_, apos) and apos.isDictSplat())
}
diff --git a/python/ql/test/experimental/dataflow/coverage/argumentPassing.py b/python/ql/test/experimental/dataflow/coverage/argumentPassing.py
index c825d86b9f5..4e55bc48cd7 100644
--- a/python/ql/test/experimental/dataflow/coverage/argumentPassing.py
+++ b/python/ql/test/experimental/dataflow/coverage/argumentPassing.py
@@ -214,8 +214,8 @@ def test_only_starargs():
args = (arg2, "safe")
starargs_only(arg1, *args) # $ MISSING: arg1 arg2
- args = (arg1, arg2, "safe")
- starargs_only(*args) # $ MISSING: arg1 arg2
+ args = (arg1, arg2, "safe") # $ arg1 arg2 func=starargs_only
+ starargs_only(*args)
def starargs_mixed(a, *args):
@@ -227,8 +227,8 @@ def starargs_mixed(a, *args):
def test_stararg_mixed():
starargs_mixed(arg1, arg2, "safe") # $ arg1 MISSING: arg2
- args = (arg2, "safe")
- starargs_mixed(arg1, *args) # $ arg1 MISSING: arg2
+ args = (arg2, "safe") # $ arg2 func=starargs_mixed
+ starargs_mixed(arg1, *args) # $ arg1
args = (arg1, arg2, "safe")
starargs_mixed(*args) # $ MISSING: arg1 arg2
From 035d08351567fb8dc741c4dcc4214fa9c888f2bd Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Mon, 19 Sep 2022 16:50:48 +0200
Subject: [PATCH 031/415] Python: Support flow to `*args` param from positional
arg
---
.../new/internal/DataFlowDispatch.qll | 28 ++++++-
.../dataflow/new/internal/DataFlowPrivate.qll | 76 +++++++++++++++++++
.../dataflow/new/internal/DataFlowPublic.qll | 4 +
.../dataflow/coverage/argumentPassing.py | 14 ++--
.../experimental/dataflow/coverage/test.py | 6 +-
5 files changed, 116 insertions(+), 12 deletions(-)
diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
index f5b628f7e45..3e6f82284c4 100644
--- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
@@ -49,6 +49,7 @@ newtype TParameterPosition =
pos = 0 or
pos = any(Parameter p).getPosition() + 1
} or
+ TSynthStarArgsElementParameterPosition(int pos) { exists(TStarArgsParameterPosition(pos)) } or
TDictSplatParameterPosition()
/** A parameter position. */
@@ -65,6 +66,15 @@ class ParameterPosition extends TParameterPosition {
/** Holds if this position represents a `*args` parameter at (0-based) `index`. */
predicate isStarArgs(int index) { this = TStarArgsParameterPosition(index) }
+ /**
+ * Holds if this position represents a synthetic parameter at or after (0-based)
+ * position `index`, from which there will be made a store step to the real
+ * `*args` parameter.
+ */
+ predicate isSynthStarArgsElement(int index) {
+ this = TSynthStarArgsElementParameterPosition(index)
+ }
+
/** Holds if this position represents a `**kwargs` parameter. */
predicate isDictSplat() { this = TDictSplatParameterPosition() }
@@ -78,6 +88,11 @@ class ParameterPosition extends TParameterPosition {
or
exists(int index | this.isStarArgs(index) and result = "*args at " + index)
or
+ exists(int index |
+ this.isSynthStarArgsElement(index) and
+ result = "synthetic *args element at (or after) " + index
+ )
+ or
this.isDictSplat() and result = "**"
}
}
@@ -132,6 +147,10 @@ predicate parameterMatch(ParameterPosition ppos, ArgumentPosition apos) {
or
exists(int index | ppos.isStarArgs(index) and apos.isStarArgs(index))
or
+ exists(int paramIndex, int argIndex | argIndex >= paramIndex |
+ ppos.isSynthStarArgsElement(paramIndex) and apos.isPositional(argIndex)
+ )
+ or
ppos.isDictSplat() and apos.isDictSplat()
}
@@ -219,8 +238,13 @@ abstract class DataFlowFunction extends DataFlowCallable, TFunction {
exists(string name | ppos.isKeyword(name) | result.getParameter() = func.getArgByName(name))
or
exists(int index |
- ppos.isStarArgs(index) and
- result.getParameter() = func.getVararg()
+ (
+ ppos.isStarArgs(index) and
+ result.getParameter() = func.getVararg()
+ or
+ ppos.isSynthStarArgsElement(index) and
+ result = TSynthStarArgsElementParameterNode(this)
+ )
|
// a `*args` parameter comes after the last positional parameter. We need to take
// self parameter into account, so for
diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll
index 7b2a1d81580..136bdc85f8d 100644
--- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll
@@ -57,6 +57,78 @@ class SyntheticPreUpdateNode extends Node, TSyntheticPreUpdateNode {
override Location getLocation() { result = node.getLocation() }
}
+// =============================================================================
+// *args (StarArgs) related
+// =============================================================================
+/**
+ * A (synthetic) data-flow parameter node to capture all positional arguments that
+ * should be passed to the `*args` parameter.
+ *
+ * To handle
+ * ```py
+ * def func(*args):
+ * for arg in args:
+ * sink(arg)
+ *
+ * func(source1, source2, ...)
+ * ```
+ *
+ * we add a synthetic parameter to `func` that accepts any positional argument at (or
+ * after) the index for the `*args` parameter. We add a store step (at any list index) to the real
+ * `*args` parameter. This means we can handle the code above, but if the code had done `sink(args[0])`
+ * we would (wrongly) add flow for `source2` as well.
+ *
+ * To solve this more precisely, we could add a synthetic argument with position `*args`
+ * that had store steps with the correct index (like we do for mapping keyword arguments to a
+ * `**kwargs` parameter). However, if a single call could go to 2 different
+ * targets with `*args` parameters at different positions, as in the example below, it's unclear what
+ * index to store `2` at. For the `foo` callable it should be 1, for the `bar` callable it should be 0.
+ * So this information would need to be encoded in the arguments of a `ArgumentPosition` branch, and
+ * one of the arguments would be which callable is the target. However, we cannot build `ArgumentPosition`
+ * branches based on the call-graph, so this strategy doesn't work.
+ *
+ * Another approach to solving it precisely is to add multiple synthetic parameters that have store steps
+ * to the real `*args` parameter. So for the example below, `foo` would need to have synthetic parameter
+ * nodes for indexes 1 and 2 (which would have store step for index 0 and 1 of the `*args` parameter),
+ * and `bar` would need it for indexes 1, 2, and 3. The question becomes how many synthetic parameters to
+ * create, which _must_ be `max(Call call, int i | exists(call.getArg(i)))`, since (again) we can't base
+ * this on the call-graph. And each function with a `*args` parameter would need this many extra synthetic
+ * nodes. My gut feeling at that this simple approach will be good enough, but if we need to get it more
+ * precise, it should be possible to do it like this.
+ *
+ * ```py
+ * def foo(one, *args): ...
+ * def bar(*args): ...
+ *
+ * func = foo if else bar
+ * func(1, 2, 3)
+ */
+class SynthStarArgsElementParameterNode extends ParameterNodeImpl,
+ TSynthStarArgsElementParameterNode {
+ DataFlowCallable callable;
+
+ SynthStarArgsElementParameterNode() { this = TSynthStarArgsElementParameterNode(callable) }
+
+ override string toString() { result = "SynthStarArgsElementParameterNode" }
+
+ override Scope getScope() { result = callable.getScope() }
+
+ override Location getLocation() { result = callable.getLocation() }
+
+ override Parameter getParameter() { none() }
+}
+
+predicate synthStarArgsElementParameterNodeStoreStep(
+ SynthStarArgsElementParameterNode nodeFrom, ListElementContent c, ParameterNode nodeTo
+) {
+ c = c and // suppress warning about unused parameter
+ exists(DataFlowCallable callable, ParameterPosition ppos |
+ nodeFrom = TSynthStarArgsElementParameterNode(callable) and
+ nodeTo = callable.getParameter(ppos) and
+ ppos.isStarArgs(_)
+ )
+}
+
// =============================================================================
// **kwargs (DictSplat) related
// =============================================================================
@@ -519,6 +591,8 @@ predicate storeStep(Node nodeFrom, Content c, Node nodeTo) {
or
FlowSummaryImpl::Private::Steps::summaryStoreStep(nodeFrom, c, nodeTo)
or
+ synthStarArgsElementParameterNodeStoreStep(nodeFrom, c, nodeTo)
+ or
synthDictSplatArgumentNodeStoreStep(nodeFrom, c, nodeTo)
}
@@ -849,6 +923,8 @@ predicate nodeIsHidden(Node n) {
or
n instanceof SummaryParameterNode
or
+ n instanceof SynthStarArgsElementParameterNode
+ or
n instanceof SynthDictSplatArgumentNode
or
n instanceof SynthDictSplatParameterNode
diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll
index 94d7bb70543..6d6113bc5af 100644
--- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll
@@ -113,6 +113,10 @@ newtype TNode =
TSummaryParameterNode(FlowSummaryImpl::Public::SummarizedCallable c, ParameterPosition pos) {
FlowSummaryImpl::Private::summaryParameterNodeRange(c, pos)
} or
+ /** A synthetic node to capture positional arguments that are passed to a `*args` parameter. */
+ TSynthStarArgsElementParameterNode(DataFlowCallable callable) {
+ exists(ParameterPosition ppos | ppos.isStarArgs(_) | exists(callable.getParameter(ppos)))
+ } or
/** A synthetic node to capture keyword arguments that are passed to a `**kwargs` parameter. */
TSynthDictSplatArgumentNode(CallNode call) { exists(call.getArgByName(_)) } or
/** A synthetic node to allow flow to keyword parameters from a `**kwargs` argument. */
diff --git a/python/ql/test/experimental/dataflow/coverage/argumentPassing.py b/python/ql/test/experimental/dataflow/coverage/argumentPassing.py
index 4e55bc48cd7..d9191c4bd80 100644
--- a/python/ql/test/experimental/dataflow/coverage/argumentPassing.py
+++ b/python/ql/test/experimental/dataflow/coverage/argumentPassing.py
@@ -140,7 +140,7 @@ def test_pos_star():
if len(a) > 0:
SINK1(a[0])
- with_star(arg1) #$ MISSING: arg1 func=test_pos_star.with_star
+ with_star(arg1) #$ arg1 func=test_pos_star.with_star
def test_pos_kw():
@@ -209,10 +209,10 @@ def starargs_only(*args):
@expects(3*3)
def test_only_starargs():
- starargs_only(arg1, arg2, "safe") # $ MISSING: arg1 arg2
+ starargs_only(arg1, arg2, "safe") # $ arg1 arg2 SPURIOUS: bad2,bad3="arg1" bad1,bad3="arg2"
args = (arg2, "safe")
- starargs_only(arg1, *args) # $ MISSING: arg1 arg2
+ starargs_only(arg1, *args) # $ arg1 SPURIOUS: bad2,bad3="arg1" MISSING: arg2
args = (arg1, arg2, "safe") # $ arg1 arg2 func=starargs_only
starargs_only(*args)
@@ -225,7 +225,7 @@ def starargs_mixed(a, *args):
@expects(3*8)
def test_stararg_mixed():
- starargs_mixed(arg1, arg2, "safe") # $ arg1 MISSING: arg2
+ starargs_mixed(arg1, arg2, "safe") # $ arg1 arg2 SPURIOUS: bad3="arg2"
args = (arg2, "safe") # $ arg2 func=starargs_mixed
starargs_mixed(arg1, *args) # $ arg1
@@ -240,11 +240,11 @@ def test_stararg_mixed():
empty_args = ()
# adding first/last
- starargs_mixed(arg1, arg2, "safe", *empty_args) # $ arg1 MISSING: arg2
+ starargs_mixed(arg1, arg2, "safe", *empty_args) # $ arg1 arg2 SPURIOUS: bad3="arg2"
starargs_mixed(*empty_args, arg1, arg2, "safe") # $ MISSING: arg1 arg2
# adding before/after *args
- args = (arg2, "safe")
- starargs_mixed(arg1, *args, *empty_args) # $ arg1 MISSING: arg2
+ args = (arg2, "safe") # $ arg2 func=starargs_mixed
+ starargs_mixed(arg1, *args, *empty_args) # $ arg1
args = (arg2, "safe")
starargs_mixed(arg1, *empty_args, *args) # $ arg1 MISSING: arg2
diff --git a/python/ql/test/experimental/dataflow/coverage/test.py b/python/ql/test/experimental/dataflow/coverage/test.py
index 1a7b2cbb6fa..65f915cfd9b 100644
--- a/python/ql/test/experimental/dataflow/coverage/test.py
+++ b/python/ql/test/experimental/dataflow/coverage/test.py
@@ -401,7 +401,7 @@ def f_extra_pos(a, *b):
def test_call_extra_pos():
- SINK(f_extra_pos(NONSOURCE, SOURCE)) #$ MISSING: flow="SOURCE -> f_extra_pos(..)"
+ SINK(f_extra_pos(NONSOURCE, SOURCE)) #$ flow="SOURCE -> f_extra_pos(..)"
def f_extra_keyword(a, **b):
@@ -514,7 +514,7 @@ def test_lambda_unpack_mapping():
def test_lambda_extra_pos():
f_extra_pos = lambda a, *b: b[0]
- SINK(f_extra_pos(NONSOURCE, SOURCE)) #$ MISSING: flow="SOURCE -> f_extra_pos(..)"
+ SINK(f_extra_pos(NONSOURCE, SOURCE)) #$ flow="SOURCE -> f_extra_pos(..)"
def test_lambda_extra_keyword():
@@ -689,7 +689,7 @@ def test_iterable_star_unpacking_in_for_2():
def iterate_star_args(first, second, *args):
for arg in args:
- SINK(arg) #$ MISSING: flow="SOURCE, l:+5 -> arg" flow="SOURCE, l:+6 -> arg"
+ SINK(arg) #$ flow="SOURCE, l:+5 -> arg" flow="SOURCE, l:+6 -> arg"
# FP reported here: https://github.com/github/codeql-python-team/issues/49
@expects(2)
From 98a849405f6c54ec3a78d8339531b6559d31fbf6 Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Tue, 20 Sep 2022 09:12:00 +0200
Subject: [PATCH 032/415] Python: Add support for late `*args` arguments
---
.../new/internal/DataFlowDispatch.qll | 21 +++++++++
.../dataflow/new/internal/DataFlowPrivate.qll | 47 +++++++++++++++++++
.../dataflow/new/internal/DataFlowPublic.qll | 8 ++++
.../dataflow/coverage/argumentPassing.py | 4 +-
4 files changed, 78 insertions(+), 2 deletions(-)
diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
index 3e6f82284c4..ea8c4a76158 100644
--- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
@@ -50,6 +50,7 @@ newtype TParameterPosition =
pos = any(Parameter p).getPosition() + 1
} or
TSynthStarArgsElementParameterPosition(int pos) { exists(TStarArgsParameterPosition(pos)) } or
+ TSynthLateStarArgsParameterPosition(int pos) { exists(TStarArgsParameterPosition(pos)) } or
TDictSplatParameterPosition()
/** A parameter position. */
@@ -75,6 +76,14 @@ class ParameterPosition extends TParameterPosition {
this = TSynthStarArgsElementParameterPosition(index)
}
+ /**
+ * Holds if this position represents a synthetic `*args` parameter after the real
+ * `*args` parameter. The real `*args` parameter is at the 0-based index `index`.
+ */
+ predicate isSynthLateStarArgsParameterPosition(int index) {
+ this = TSynthLateStarArgsParameterPosition(index)
+ }
+
/** Holds if this position represents a `**kwargs` parameter. */
predicate isDictSplat() { this = TDictSplatParameterPosition() }
@@ -93,6 +102,11 @@ class ParameterPosition extends TParameterPosition {
result = "synthetic *args element at (or after) " + index
)
or
+ exists(int index |
+ this.isSynthLateStarArgsParameterPosition(index) and
+ result = "synthetic late *args after " + index
+ )
+ or
this.isDictSplat() and result = "**"
}
}
@@ -151,6 +165,10 @@ predicate parameterMatch(ParameterPosition ppos, ArgumentPosition apos) {
ppos.isSynthStarArgsElement(paramIndex) and apos.isPositional(argIndex)
)
or
+ exists(int realStarArgsIndex, int argIndex | argIndex > realStarArgsIndex |
+ ppos.isSynthLateStarArgsParameterPosition(realStarArgsIndex) and apos.isStarArgs(argIndex)
+ )
+ or
ppos.isDictSplat() and apos.isDictSplat()
}
@@ -244,6 +262,9 @@ abstract class DataFlowFunction extends DataFlowCallable, TFunction {
or
ppos.isSynthStarArgsElement(index) and
result = TSynthStarArgsElementParameterNode(this)
+ or
+ ppos.isSynthLateStarArgsParameterPosition(index) and
+ result = TSynthLateStarArgsParameterNode(this)
)
|
// a `*args` parameter comes after the last positional parameter. We need to take
diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll
index 136bdc85f8d..8d4f45bcdeb 100644
--- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll
@@ -129,6 +129,49 @@ predicate synthStarArgsElementParameterNodeStoreStep(
)
}
+/**
+ * A synthetic node to capture a `*args` argument that is passed to a `*args`
+ * parameter, but "too late" in the argument list, so we cannot just do a 1-1 mapping
+ * without messing up the indexes; instead we make a list/tuple/set read step to
+ * `SynthStarArgsElementParameterNode`.
+ *
+ * Example. The `*args` arguments starts at index 1, while the `*args` parameter accepts
+ * arguments starting at index 0.
+ *
+ * ```py
+ * def func(*args): ...
+ * func(1, *args)
+ */
+class SynthLateStarArgsParameterNode extends ParameterNodeImpl, TSynthLateStarArgsParameterNode {
+ DataFlowCallable callable;
+
+ SynthLateStarArgsParameterNode() { this = TSynthLateStarArgsParameterNode(callable) }
+
+ override string toString() { result = "SynthLateStarArgsParameterNode" }
+
+ override Scope getScope() { result = callable.getScope() }
+
+ override Location getLocation() { result = callable.getLocation() }
+
+ override Parameter getParameter() { none() }
+}
+
+predicate synthLateStarArgsParameterNodeReadStep(
+ SynthLateStarArgsParameterNode nodeFrom, Content c, ParameterNode nodeTo
+) {
+ (
+ c instanceof ListElementContent
+ or
+ c instanceof TupleElementContent
+ or
+ c instanceof SetElementContent
+ ) and
+ exists(DataFlowCallable callable |
+ nodeFrom = TSynthLateStarArgsParameterNode(callable) and
+ nodeTo = TSynthStarArgsElementParameterNode(callable)
+ )
+}
+
// =============================================================================
// **kwargs (DictSplat) related
// =============================================================================
@@ -764,6 +807,8 @@ predicate readStep(Node nodeFrom, Content c, Node nodeTo) {
or
FlowSummaryImpl::Private::Steps::summaryReadStep(nodeFrom, c, nodeTo)
or
+ synthLateStarArgsParameterNodeReadStep(nodeFrom, c, nodeTo)
+ or
synthDictSplatParameterNodeReadStep(nodeFrom, c, nodeTo)
}
@@ -925,6 +970,8 @@ predicate nodeIsHidden(Node n) {
or
n instanceof SynthStarArgsElementParameterNode
or
+ n instanceof SynthLateStarArgsParameterNode
+ or
n instanceof SynthDictSplatArgumentNode
or
n instanceof SynthDictSplatParameterNode
diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll
index 6d6113bc5af..336330ad924 100644
--- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll
@@ -117,6 +117,14 @@ newtype TNode =
TSynthStarArgsElementParameterNode(DataFlowCallable callable) {
exists(ParameterPosition ppos | ppos.isStarArgs(_) | exists(callable.getParameter(ppos)))
} or
+ /**
+ * A synthetic node to capture a `*args` argument that is passed to a `*args`
+ * parameter, but "too late" in the argument list, so we cannot just do a 1-1 mapping
+ * without messing up the indexes.
+ */
+ TSynthLateStarArgsParameterNode(DataFlowCallable callable) {
+ exists(ParameterPosition ppos | ppos.isStarArgs(_) | exists(callable.getParameter(ppos)))
+ } or
/** A synthetic node to capture keyword arguments that are passed to a `**kwargs` parameter. */
TSynthDictSplatArgumentNode(CallNode call) { exists(call.getArgByName(_)) } or
/** A synthetic node to allow flow to keyword parameters from a `**kwargs` argument. */
diff --git a/python/ql/test/experimental/dataflow/coverage/argumentPassing.py b/python/ql/test/experimental/dataflow/coverage/argumentPassing.py
index d9191c4bd80..2a07a99801f 100644
--- a/python/ql/test/experimental/dataflow/coverage/argumentPassing.py
+++ b/python/ql/test/experimental/dataflow/coverage/argumentPassing.py
@@ -211,8 +211,8 @@ def starargs_only(*args):
def test_only_starargs():
starargs_only(arg1, arg2, "safe") # $ arg1 arg2 SPURIOUS: bad2,bad3="arg1" bad1,bad3="arg2"
- args = (arg2, "safe")
- starargs_only(arg1, *args) # $ arg1 SPURIOUS: bad2,bad3="arg1" MISSING: arg2
+ args = (arg2, "safe") # $ arg2 func=starargs_only SPURIOUS: bad1,bad3="arg2"
+ starargs_only(arg1, *args) # $ arg1 SPURIOUS: bad2,bad3="arg1"
args = (arg1, arg2, "safe") # $ arg1 arg2 func=starargs_only
starargs_only(*args)
From 5fc127cb2c5ae38c6be506e71a79a19d93089f98 Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Tue, 20 Sep 2022 14:29:14 +0200
Subject: [PATCH 033/415] Python: Make `UnresolvedCalls.qll` handle class calls
without __init__
This commit used to make sense to have here in the ordering of commits,
but due to various rebases it no longer changes any test output..
it's still a good change though, so I'll keep it.
---
.../experimental/dataflow/TestUtil/UnresolvedCalls.qll | 9 ++-------
1 file changed, 2 insertions(+), 7 deletions(-)
diff --git a/python/ql/test/experimental/dataflow/TestUtil/UnresolvedCalls.qll b/python/ql/test/experimental/dataflow/TestUtil/UnresolvedCalls.qll
index ea123e9ca45..fbdcca3ef04 100644
--- a/python/ql/test/experimental/dataflow/TestUtil/UnresolvedCalls.qll
+++ b/python/ql/test/experimental/dataflow/TestUtil/UnresolvedCalls.qll
@@ -12,13 +12,8 @@ class UnresolvedCallExpectations extends InlineExpectationsTest {
override predicate hasActualResult(Location location, string element, string tag, string value) {
exists(location.getFile().getRelativePath()) and
exists(CallNode call |
- not exists(DataFlowPrivate::DataFlowCall dfc | dfc.getNode() = call |
- // For every `CallNode`, there is a `DataFlowCall` in the form of a `NormalCall`.
- // It does not really count, as it has some abstract overrides. For instance, it does not
- // define `getCallable`, so checking for the existence of this guarantees that we are in a
- // properly resolved call.
- exists(dfc.getCallable())
- ) and
+ not exists(DataFlowPrivate::DataFlowCall dfc | dfc.getNode() = call) and
+ not DataFlowPrivate::resolveClassCall(call, _) and
not call = API::builtin(_).getACall().asCfgNode() and
location = call.getLocation() and
tag = "unresolved_call" and
From 6351defe0d95da1676f4a67acd001dc4b00a7da9 Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Tue, 20 Sep 2022 14:34:12 +0200
Subject: [PATCH 034/415] Python: Add call-graph tests with `isinstance`
---
.../CallGraph/InlineCallGraphTest.expected | 13 ++++++
.../CallGraph/code/isinstance.py | 40 +++++++++++++++++++
2 files changed, 53 insertions(+)
create mode 100644 python/ql/test/experimental/library-tests/CallGraph/code/isinstance.py
diff --git a/python/ql/test/experimental/library-tests/CallGraph/InlineCallGraphTest.expected b/python/ql/test/experimental/library-tests/CallGraph/InlineCallGraphTest.expected
index e748746b01a..d095fc73dbb 100644
--- a/python/ql/test/experimental/library-tests/CallGraph/InlineCallGraphTest.expected
+++ b/python/ql/test/experimental/library-tests/CallGraph/InlineCallGraphTest.expected
@@ -39,5 +39,18 @@ typeTracker_found_pointsTo_notFound
| code/class_super.py:101:1:101:7 | ControlFlowNode for Attribute() | Z.foo |
| code/class_super.py:108:1:108:8 | ControlFlowNode for Attribute() | Z.foo |
| code/def_in_function.py:22:5:22:11 | ControlFlowNode for Attribute() | test.A.foo |
+| code/isinstance.py:9:13:9:22 | ControlFlowNode for Attribute() | A.foo |
+| code/isinstance.py:9:13:9:22 | ControlFlowNode for Attribute() | ASub.foo |
+| code/isinstance.py:9:13:9:22 | ControlFlowNode for Attribute() | B.foo |
+| code/isinstance.py:9:13:9:22 | ControlFlowNode for Attribute() | Base.foo |
+| code/isinstance.py:14:13:14:22 | ControlFlowNode for Attribute() | A.foo |
+| code/isinstance.py:14:13:14:22 | ControlFlowNode for Attribute() | ASub.foo |
+| code/isinstance.py:14:13:14:22 | ControlFlowNode for Attribute() | B.foo |
+| code/isinstance.py:14:13:14:22 | ControlFlowNode for Attribute() | Base.foo |
+| code/isinstance.py:17:13:17:22 | ControlFlowNode for Attribute() | A.foo |
+| code/isinstance.py:17:13:17:22 | ControlFlowNode for Attribute() | ASub.foo |
+| code/isinstance.py:17:13:17:22 | ControlFlowNode for Attribute() | B.foo |
+| code/isinstance.py:17:13:17:22 | ControlFlowNode for Attribute() | Base.foo |
+| code/isinstance.py:40:5:40:11 | ControlFlowNode for Attribute() | B.foo |
| code/nested_class.py:83:9:83:16 | ControlFlowNode for Attribute() | X.class_def_in_func.Y.meth |
| code/underscore_prefix_func_name.py:14:5:14:19 | ControlFlowNode for some_function() | some_function |
diff --git a/python/ql/test/experimental/library-tests/CallGraph/code/isinstance.py b/python/ql/test/experimental/library-tests/CallGraph/code/isinstance.py
new file mode 100644
index 00000000000..a8fbcc32d43
--- /dev/null
+++ b/python/ql/test/experimental/library-tests/CallGraph/code/isinstance.py
@@ -0,0 +1,40 @@
+import os
+
+class Base:
+ def foo(self):
+ print("Base.foo")
+
+ def call(self):
+ if isinstance(self, A):
+ self.foo() # $ tt=A.foo tt=ASub.foo SPURIOUS: tt=B.foo tt=Base.foo
+
+ # This is a silly test, but just to show that second argument of isinstance as
+ # tuple is handled
+ if isinstance(self, (A, B)):
+ self.foo() # $ tt=A.foo tt=ASub.foo tt=B.foo SPURIOUS: tt=Base.foo
+
+ if isinstance(self, ASubNoDef):
+ self.foo() # $ tt=A.foo SPURIOUS: tt=ASub.foo tt=B.foo tt=Base.foo
+
+
+class A(Base):
+ def foo(self):
+ print("A.foo")
+
+class ASub(A):
+ def foo(self):
+ print("ASub.foo")
+
+class ASubNoDef(A): pass
+
+class B(Base):
+ def foo(self):
+ print("B.foo")
+
+cond = os.urandom(1)[0] > 128
+
+x = A() if cond else B()
+x.foo() # $ pt,tt=A.foo pt,tt=B.foo
+
+if isinstance(x, A):
+ x.foo() # $ pt,tt=A.foo SPURIOUS: tt=B.foo
From 4416037dc68969d439ccb7a32e8d11b42426ffc5 Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Tue, 20 Sep 2022 14:45:08 +0200
Subject: [PATCH 035/415] Python: Ignore SPURIOUS call-graph edges in points-to
vs. type-tracker results
---
.../InlineCallGraphTest.expected | 2 +-
.../CallGraph/InlineCallGraphTest.expected | 11 -----
.../CallGraph/InlineCallGraphTest.ql | 49 ++++++++++++-------
3 files changed, 33 insertions(+), 29 deletions(-)
diff --git a/python/ql/test/experimental/library-tests/CallGraph-imports/InlineCallGraphTest.expected b/python/ql/test/experimental/library-tests/CallGraph-imports/InlineCallGraphTest.expected
index 2836800d300..7bba932e8f4 100644
--- a/python/ql/test/experimental/library-tests/CallGraph-imports/InlineCallGraphTest.expected
+++ b/python/ql/test/experimental/library-tests/CallGraph-imports/InlineCallGraphTest.expected
@@ -1,5 +1,5 @@
failures
debug_callableNotUnique
pointsTo_found_typeTracker_notFound
-| pkg/use.py:10:5:10:10 | ControlFlowNode for func() | func |
+| pkg/use.py:10:5:10:10 | ControlFlowNode for func() | "pkg/func_def.py:func" |
typeTracker_found_pointsTo_notFound
diff --git a/python/ql/test/experimental/library-tests/CallGraph/InlineCallGraphTest.expected b/python/ql/test/experimental/library-tests/CallGraph/InlineCallGraphTest.expected
index d095fc73dbb..534633980c3 100644
--- a/python/ql/test/experimental/library-tests/CallGraph/InlineCallGraphTest.expected
+++ b/python/ql/test/experimental/library-tests/CallGraph/InlineCallGraphTest.expected
@@ -20,10 +20,6 @@ pointsTo_found_typeTracker_notFound
| code/type_tracking_limitation.py:8:1:8:3 | ControlFlowNode for x() | my_func |
typeTracker_found_pointsTo_notFound
| code/callable_as_argument.py:29:5:29:12 | ControlFlowNode for Attribute() | test_class.InsideTestFunc.sm |
-| code/class_more_mro2.py:18:9:18:21 | ControlFlowNode for Attribute() | A.foo |
-| code/class_more_mro2.py:21:1:21:8 | ControlFlowNode for Attribute() | A.foo |
-| code/class_more_mro.py:24:9:24:21 | ControlFlowNode for Attribute() | A.foo |
-| code/class_more_mro.py:34:1:34:16 | ControlFlowNode for Attribute() | A.foo |
| code/class_special_methods.py:22:9:22:16 | ControlFlowNode for self() | Base.__call__ |
| code/class_special_methods.py:22:9:22:16 | ControlFlowNode for self() | Sub.__call__ |
| code/class_special_methods.py:33:1:33:5 | ControlFlowNode for b() | Base.__call__ |
@@ -41,16 +37,9 @@ typeTracker_found_pointsTo_notFound
| code/def_in_function.py:22:5:22:11 | ControlFlowNode for Attribute() | test.A.foo |
| code/isinstance.py:9:13:9:22 | ControlFlowNode for Attribute() | A.foo |
| code/isinstance.py:9:13:9:22 | ControlFlowNode for Attribute() | ASub.foo |
-| code/isinstance.py:9:13:9:22 | ControlFlowNode for Attribute() | B.foo |
-| code/isinstance.py:9:13:9:22 | ControlFlowNode for Attribute() | Base.foo |
| code/isinstance.py:14:13:14:22 | ControlFlowNode for Attribute() | A.foo |
| code/isinstance.py:14:13:14:22 | ControlFlowNode for Attribute() | ASub.foo |
| code/isinstance.py:14:13:14:22 | ControlFlowNode for Attribute() | B.foo |
-| code/isinstance.py:14:13:14:22 | ControlFlowNode for Attribute() | Base.foo |
| code/isinstance.py:17:13:17:22 | ControlFlowNode for Attribute() | A.foo |
-| code/isinstance.py:17:13:17:22 | ControlFlowNode for Attribute() | ASub.foo |
-| code/isinstance.py:17:13:17:22 | ControlFlowNode for Attribute() | B.foo |
-| code/isinstance.py:17:13:17:22 | ControlFlowNode for Attribute() | Base.foo |
-| code/isinstance.py:40:5:40:11 | ControlFlowNode for Attribute() | B.foo |
| code/nested_class.py:83:9:83:16 | ControlFlowNode for Attribute() | X.class_def_in_func.Y.meth |
| code/underscore_prefix_func_name.py:14:5:14:19 | ControlFlowNode for some_function() | some_function |
diff --git a/python/ql/test/experimental/library-tests/CallGraph/InlineCallGraphTest.ql b/python/ql/test/experimental/library-tests/CallGraph/InlineCallGraphTest.ql
index 327621fb7f2..d613460e749 100644
--- a/python/ql/test/experimental/library-tests/CallGraph/InlineCallGraphTest.ql
+++ b/python/ql/test/experimental/library-tests/CallGraph/InlineCallGraphTest.ql
@@ -53,24 +53,25 @@ class CallGraphTest extends InlineExpectationsTest {
|
location = call.getLocation() and
element = call.toString() and
- if call.getLocation().getFile() = target.getLocation().getFile()
- then value = betterQualName(target)
- else
- exists(string fixedRelativePath |
- fixedRelativePath =
- target
- .getLocation()
- .getFile()
- .getRelativePath()
- .regexpCapture(".*/CallGraph[^/]*/(.*)", 1)
- |
- // the value needs to be enclosed in quotes to allow special characters
- value = "\"" + fixedRelativePath + ":" + betterQualName(target) + "\""
- )
+ value = getCallEdgeValue(call, target)
)
}
}
+bindingset[call, target]
+string getCallEdgeValue(CallNode call, Function target) {
+ if call.getLocation().getFile() = target.getLocation().getFile()
+ then result = betterQualName(target)
+ else
+ exists(string fixedRelativePath |
+ fixedRelativePath =
+ target.getLocation().getFile().getRelativePath().regexpCapture(".*/CallGraph[^/]*/(.*)", 1)
+ |
+ // the value needs to be enclosed in quotes to allow special characters
+ result = "\"" + fixedRelativePath + ":" + betterQualName(target) + "\""
+ )
+}
+
bindingset[func]
string betterQualName(Function func) {
// note: `target.getQualifiedName` for Lambdas is just "lambda", so is not very useful :|
@@ -99,7 +100,14 @@ query predicate pointsTo_found_typeTracker_notFound(CallNode call, string qualna
exists(Function target |
pointsToCallEdge(call, target) and
not typeTrackerCallEdge(call, target) and
- qualname = betterQualName(target)
+ qualname = getCallEdgeValue(call, target) and
+ // ignore SPURIOUS call edges
+ not exists(FalsePositiveExpectation spuriousResult |
+ spuriousResult.getTag() = "pt" and
+ spuriousResult.getValue() = getCallEdgeValue(call, target) and
+ spuriousResult.getLocation().getFile() = call.getLocation().getFile() and
+ spuriousResult.getLocation().getStartLine() = call.getLocation().getStartLine()
+ )
)
}
@@ -107,10 +115,17 @@ query predicate typeTracker_found_pointsTo_notFound(CallNode call, string qualna
exists(Function target |
not pointsToCallEdge(call, target) and
typeTrackerCallEdge(call, target) and
- qualname = betterQualName(target) and
+ qualname = getCallEdgeValue(call, target) and
// We filter out result differences for points-to and type-tracking for class calls,
// since otherwise it gives too much noise (these are just handled differently
// between the two).
- not typeTrackerClassCall(call, target)
+ not typeTrackerClassCall(call, target) and
+ // ignore SPURIOUS call edges
+ not exists(FalsePositiveExpectation spuriousResult |
+ spuriousResult.getTag() = "tt" and
+ spuriousResult.getValue() = getCallEdgeValue(call, target) and
+ spuriousResult.getLocation().getFile() = call.getLocation().getFile() and
+ spuriousResult.getLocation().getStartLine() = call.getLocation().getStartLine()
+ )
)
}
From 8e0bb625168cbc434bf5790f5b8d66646befac21 Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Fri, 23 Sep 2022 16:28:58 +0200
Subject: [PATCH 036/415] Python: Remove `pragma[inline]` from `parameterMatch`
It's gotten complex enough that it doesn't by definition seem necessary
to inline it. (in the range of ~2200 results for django and pandas)
---
.../lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll | 1 -
1 file changed, 1 deletion(-)
diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
index ea8c4a76158..b9d0b65e876 100644
--- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
@@ -151,7 +151,6 @@ class ArgumentPosition extends TArgumentPosition {
}
/** Holds if arguments at position `apos` match parameters at position `ppos`. */
-pragma[inline]
predicate parameterMatch(ParameterPosition ppos, ArgumentPosition apos) {
ppos.isSelf() and apos.isSelf()
or
From 0cf13e99762ba128c722d59430b84a0b9de45d31 Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Thu, 20 Oct 2022 15:19:11 +0200
Subject: [PATCH 037/415] Python: Expand argument highlighting test
---
.../test/experimental/dataflow/calls/test.py | 29 ++++++++++++-------
1 file changed, 19 insertions(+), 10 deletions(-)
diff --git a/python/ql/test/experimental/dataflow/calls/test.py b/python/ql/test/experimental/dataflow/calls/test.py
index 3332d2caa9e..afb22f3659c 100644
--- a/python/ql/test/experimental/dataflow/calls/test.py
+++ b/python/ql/test/experimental/dataflow/calls/test.py
@@ -14,14 +14,23 @@ class MyClass(object):
def my_method(self, arg):
pass
+ def other_method(self):
+ self.my_method(42) # $ arg[self]=self call=self.my_method(..) callType=CallTypeNormalMethod arg[position 0]=42
+ self.sm(42) # $ call=self.sm(..) callType=CallTypeStaticMethod arg[position 0]=42
+
@staticmethod
- def staticmethod(arg):
+ def sm(arg):
pass
@classmethod
- def classmethod(cls, arg):
+ def cm(cls, arg):
pass
+ @classmethod
+ def other_classmethod(cls):
+ cls.cm(42) # $ call=cls.cm(..) callType=CallTypeClassMethod arg[position 0]=42 MISSING: arg[self]=cls
+ cls.sm(42) # $ call=cls.sm(..) callType=CallTypeStaticMethod arg[position 0]=42
+
def __getitem__(self, key):
pass
@@ -34,11 +43,11 @@ mm = x.my_method
mm(2) # $ call=mm(..) arg[self]=x arg[position 0]=2 callType=CallTypeNormalMethod
MyClass.my_method(x, 2) # $ call=MyClass.my_method(..) arg[position 0]=2 arg[self]=x callType=CallTypeMethodAsPlainFunction
-x.staticmethod(3) # $ call=x.staticmethod(..) arg[position 0]=3 callType=CallTypeStaticMethod
-MyClass.staticmethod(3) # $ call=MyClass.staticmethod(..) arg[position 0]=3 callType=CallTypeStaticMethod
+x.sm(3) # $ call=x.sm(..) arg[position 0]=3 callType=CallTypeStaticMethod
+MyClass.sm(3) # $ call=MyClass.sm(..) arg[position 0]=3 callType=CallTypeStaticMethod
-x.classmethod(4) # $ call=x.classmethod(..) arg[position 0]=4 callType=CallTypeClassMethod
-MyClass.classmethod(4) # $ call=MyClass.classmethod(..) arg[position 0]=4 arg[self]=MyClass callType=CallTypeClassMethod
+x.cm(4) # $ call=x.cm(..) arg[position 0]=4 callType=CallTypeClassMethod
+MyClass.cm(4) # $ call=MyClass.cm(..) arg[position 0]=4 arg[self]=MyClass callType=CallTypeClassMethod
x[5] # $ MISSING: call=x[5] arg[self]=x arg[position 0]=5
@@ -53,11 +62,11 @@ mm = y.my_method
mm(2) # $ call=mm(..) arg[self]=y arg[position 0]=2 callType=CallTypeNormalMethod
Subclass.my_method(y, 2) # $ call=Subclass.my_method(..) arg[self]=y arg[position 0]=2 callType=CallTypeMethodAsPlainFunction
-y.staticmethod(3) # $ call=y.staticmethod(..) arg[position 0]=3 callType=CallTypeStaticMethod
-Subclass.staticmethod(3) # $ call=Subclass.staticmethod(..) arg[position 0]=3 callType=CallTypeStaticMethod
+y.sm(3) # $ call=y.sm(..) arg[position 0]=3 callType=CallTypeStaticMethod
+Subclass.sm(3) # $ call=Subclass.sm(..) arg[position 0]=3 callType=CallTypeStaticMethod
-y.classmethod(4) # $ call=y.classmethod(..) arg[position 0]=4 callType=CallTypeClassMethod
-Subclass.classmethod(4) # $ call=Subclass.classmethod(..) arg[self]=Subclass arg[position 0]=4 callType=CallTypeClassMethod
+y.cm(4) # $ call=y.cm(..) arg[position 0]=4 callType=CallTypeClassMethod
+Subclass.cm(4) # $ call=Subclass.cm(..) arg[self]=Subclass arg[position 0]=4 callType=CallTypeClassMethod
y[5] # $ MISSING: call=y[5] arg[self]=y arg[position 0]=5
From 57c7dc8ea9ace2c70ee8872a2b41b4a0fea1bf52 Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Thu, 20 Oct 2022 15:11:49 +0200
Subject: [PATCH 038/415] Python: Allow `cls` passing to classmethod
---
.../semmle/python/dataflow/new/internal/DataFlowDispatch.qll | 2 +-
python/ql/test/experimental/dataflow/calls/test.py | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
index b9d0b65e876..7c2537a3b56 100644
--- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
@@ -1054,7 +1054,7 @@ predicate getCallArg(
type instanceof CallTypeClassMethod and
apos.isSelf() and
resolveMethodCall(call, target, type, arg) and
- arg = classTracker(_) and
+ (arg = classTracker(_) or arg = clsTracker(_)) and
// dataflow lib has requirement that arguments and calls are in same enclosing callable.
exists(CfgNode cfgNode | cfgNode.getNode() = call |
cfgNode.getEnclosingCallable() = arg.getEnclosingCallable()
diff --git a/python/ql/test/experimental/dataflow/calls/test.py b/python/ql/test/experimental/dataflow/calls/test.py
index afb22f3659c..0eb7e262cb4 100644
--- a/python/ql/test/experimental/dataflow/calls/test.py
+++ b/python/ql/test/experimental/dataflow/calls/test.py
@@ -28,7 +28,7 @@ class MyClass(object):
@classmethod
def other_classmethod(cls):
- cls.cm(42) # $ call=cls.cm(..) callType=CallTypeClassMethod arg[position 0]=42 MISSING: arg[self]=cls
+ cls.cm(42) # $ call=cls.cm(..) callType=CallTypeClassMethod arg[position 0]=42 arg[self]=cls
cls.sm(42) # $ call=cls.sm(..) callType=CallTypeStaticMethod arg[position 0]=42
def __getitem__(self, key):
From f040ad8dacd465bd0830e6af4a8203a2585ff221 Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Thu, 20 Oct 2022 14:32:48 +0200
Subject: [PATCH 039/415] Python: Add test of `__new__` handling
---
.../dataflow/calls/new_cls_param.py | 16 ++++++++++++
.../experimental/dataflow/fieldflow/test.py | 26 +++++++++++++++++++
2 files changed, 42 insertions(+)
create mode 100644 python/ql/test/experimental/dataflow/calls/new_cls_param.py
diff --git a/python/ql/test/experimental/dataflow/calls/new_cls_param.py b/python/ql/test/experimental/dataflow/calls/new_cls_param.py
new file mode 100644
index 00000000000..38274a9e160
--- /dev/null
+++ b/python/ql/test/experimental/dataflow/calls/new_cls_param.py
@@ -0,0 +1,16 @@
+# We want to ensure that the __new__ method is considered a classmethod even though it
+# doesn't have a decorator. This means that the `cls` parameter should be considered a
+# reference to the class (or subclass), and not an instance of the class. We can detect
+# this from looking at the arguments passed in the `cls.foo` call. if we see a `self`
+# argument, this means it has correct behavior (because we're targeting a classmethod),
+# if there is no `self` argument, this means we've only considered `cls` to be a class
+# instance, since we don't want to pass that to the `cls` parameter of the classmethod `WithNewImpl.foo`.
+
+class WithNewImpl(object):
+ def __new__(cls):
+ print("WithNewImpl.foo")
+ cls.foo() # $ call=cls.foo() callType=CallTypeClassMethod MISSING: arg[self]=cls
+
+ @classmethod
+ def foo(cls):
+ print("WithNewImpl.foo")
diff --git a/python/ql/test/experimental/dataflow/fieldflow/test.py b/python/ql/test/experimental/dataflow/fieldflow/test.py
index 68bb71bd278..5f1f6f47058 100644
--- a/python/ql/test/experimental/dataflow/fieldflow/test.py
+++ b/python/ql/test/experimental/dataflow/fieldflow/test.py
@@ -385,6 +385,32 @@ def test_potential_crosstalk_same_class(cond=True):
SINK_F(objx2.x)
+class NewTest(object):
+ def __new__(cls, arg):
+ cls.foo = arg
+ return super().__new__(cls) # $ unresolved_call=super().__new__(..)
+
+@expects(4) # $ unresolved_call=expects(..) unresolved_call=expects(..)(..)
+def test__new__():
+ # we want to make sure that we DON'T pass the synthetic pre-update node for
+ # the class instance to __new__, like we do for __init__.
+ nt = NewTest(SOURCE)
+ # the __new__ implementation sets the foo attribute on THE CLASS itself. The
+ # attribute lookup on the class instance will go to the class itself when the
+ # attribute isn't defined on the class instance, so we will actually see `nt.foo`
+ # contain the source, but the point of this test is that we should see identical
+ # behavior between NewTest.foo and nt.foo, which we dont!
+ #
+ # Also note that we currently (October 2022) dont' model writes to classes very
+ # well.
+
+ SINK(NewTest.foo) # $ MISSING: flow="SOURCE, l:-10 -> NewTest.foo"
+ SINK(nt.foo) # $ flow="SOURCE, l:-11 -> nt.foo"
+
+ NewTest.foo = NONSOURCE
+ SINK_F(NewTest.foo)
+ SINK_F(nt.foo) # $ SPURIOUS: flow="SOURCE, l:-15 -> nt.foo"
+
# ------------------------------------------------------------------------------
# Global scope
# ------------------------------------------------------------------------------
From 6fefd545336be7809729b8d868a78d1da5c0ede2 Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Wed, 12 Oct 2022 14:29:54 +0200
Subject: [PATCH 040/415] Python: Consider `__new__` a classmethod
---
.../new/internal/DataFlowDispatch.qll | 45 ++++++++++++-------
.../dataflow/new/internal/DataFlowPublic.qll | 2 +-
.../dataflow/calls/new_cls_param.py | 2 +-
3 files changed, 31 insertions(+), 18 deletions(-)
diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
index 7c2537a3b56..c239c66e55c 100644
--- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
@@ -174,18 +174,31 @@ predicate parameterMatch(ParameterPosition ppos, ArgumentPosition apos) {
// =============================================================================
// Helper predicates
// =============================================================================
-/** Holds if the function has a `staticmethod` decorator. */
-predicate hasStaticmethodDecorator(Function func) {
+/**
+ * Holds if the function `func` is a staticmethod -- either by having a
+ * `@staticmethod` decorator or by convention
+ * (like a `__new__` method on a class is a classmethod even without the decorator).
+ */
+predicate isStaticmethod(Function func) {
exists(NameNode id | id.getId() = "staticmethod" and id.isGlobal() |
func.getADecorator() = id.getNode()
)
}
-/** Holds if the function has a `classmethod` decorator. */
-predicate hasClassmethodDecorator(Function func) {
+/**
+ * Holds if the function `func` is a classmethod -- either by having a
+ * `@classmethod` decorator or by convention
+ * (like a `__new__` method on a class is a classmethod even without the decorator).
+ */
+predicate isClassmethod(Function func) {
exists(NameNode id | id.getId() = "classmethod" and id.isGlobal() |
func.getADecorator() = id.getNode()
)
+ or
+ exists(Class cls |
+ cls.getAMethod() = func and
+ func.getName() = "__new__"
+ )
}
// =============================================================================
@@ -309,12 +322,12 @@ class DataFlowMethod extends DataFlowFunction {
/** A classmethod. */
class DataFlowClassmethod extends DataFlowMethod {
- DataFlowClassmethod() { hasClassmethodDecorator(func) }
+ DataFlowClassmethod() { isClassmethod(func) }
}
/** A staticmethod. */
class DataFlowStaticmethod extends DataFlowMethod, DataFlowFunction {
- DataFlowStaticmethod() { hasStaticmethodDecorator(func) }
+ DataFlowStaticmethod() { isStaticmethod(func) }
override int positionalOffset() { result = 0 }
@@ -457,8 +470,8 @@ private TypeTrackingNode selfTracker(TypeTracker t, Class classWithMethod) {
t.start() and
exists(Function func |
func = classWithMethod.getAMethod() and
- not hasStaticmethodDecorator(func) and
- not hasClassmethodDecorator(func)
+ not isStaticmethod(func) and
+ not isClassmethod(func)
|
result.asExpr() = func.getArg(0)
)
@@ -482,7 +495,7 @@ private TypeTrackingNode clsTracker(TypeTracker t, Class classWithMethod) {
(
exists(Function func |
func = classWithMethod.getAMethod() and
- hasClassmethodDecorator(func)
+ isClassmethod(func)
|
result.asExpr() = func.getArg(0)
)
@@ -507,7 +520,7 @@ Node clsTracker(Class classWithMethod) {
* call happened in the method `func` (either a method or a classmethod).
*/
private TypeTrackingNode superCallNoArgumentTracker(TypeTracker t, Function func) {
- not hasStaticmethodDecorator(func) and
+ not isStaticmethod(func) and
t.start() and
exists(CallCfgNode call | result = call |
call = getSuperCall() and
@@ -884,22 +897,22 @@ private module MethodCalls {
or
self = selfTracker(_)
) and
- not hasStaticmethodDecorator(target) and
- not hasClassmethodDecorator(target)
+ not isStaticmethod(target) and
+ not isClassmethod(target)
or
// method as plain function call
type instanceof CallTypeMethodAsPlainFunction and
self = classTracker(_) and
- not hasStaticmethodDecorator(target) and
- not hasClassmethodDecorator(target)
+ not isStaticmethod(target) and
+ not isClassmethod(target)
or
// staticmethod call
type instanceof CallTypeStaticMethod and
- hasStaticmethodDecorator(target)
+ isStaticmethod(target)
or
// classmethod call
type instanceof CallTypeClassMethod and
- hasClassmethodDecorator(target)
+ isClassmethod(target)
)
}
}
diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll
index 336330ad924..79b711db9e8 100644
--- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll
@@ -63,7 +63,7 @@ newtype TNode =
// self parameter when used implicitly in `super()`
exists(Class cls, Function func, ParameterDefinition def |
func = cls.getAMethod() and
- not hasStaticmethodDecorator(func) and
+ not isStaticmethod(func) and
// this matches what we do in ExtractedParameterNode
def.getDefiningNode() = node and
def.getParameter() = func.getArg(0)
diff --git a/python/ql/test/experimental/dataflow/calls/new_cls_param.py b/python/ql/test/experimental/dataflow/calls/new_cls_param.py
index 38274a9e160..c2ec88acd51 100644
--- a/python/ql/test/experimental/dataflow/calls/new_cls_param.py
+++ b/python/ql/test/experimental/dataflow/calls/new_cls_param.py
@@ -9,7 +9,7 @@
class WithNewImpl(object):
def __new__(cls):
print("WithNewImpl.foo")
- cls.foo() # $ call=cls.foo() callType=CallTypeClassMethod MISSING: arg[self]=cls
+ cls.foo() # $ call=cls.foo() callType=CallTypeClassMethod arg[self]=cls
@classmethod
def foo(cls):
From 9949824810163dbecb5c2dd61385feb11a3bf0cc Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Wed, 12 Oct 2022 14:37:15 +0200
Subject: [PATCH 041/415] Python: Expand implicit classmethods
---
.../python/dataflow/new/internal/DataFlowDispatch.qll | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
index c239c66e55c..2347c8cf0f4 100644
--- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
@@ -197,7 +197,11 @@ predicate isClassmethod(Function func) {
or
exists(Class cls |
cls.getAMethod() = func and
- func.getName() = "__new__"
+ func.getName() in [
+ "__new__", // https://docs.python.org/3.10/reference/datamodel.html#object.__new__
+ "__init_subclass__", // https://docs.python.org/3.10/reference/datamodel.html#object.__init_subclass__
+ "__class_getitem__", // https://docs.python.org/3.10/reference/datamodel.html#object.__class_getitem__
+ ]
)
}
From 5e5bab5a7c92d6cfa06e1104513a52b4145d95ce Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Wed, 5 Oct 2022 16:56:19 +0200
Subject: [PATCH 042/415] Python: Don't pass synthetic class instance to
`__new__` on class calls
---
.../dataflow/new/internal/DataFlowDispatch.qll | 17 ++++++++++-------
.../experimental/dataflow/coverage/datamodel.py | 2 +-
.../experimental/dataflow/fieldflow/test.py | 4 ++--
3 files changed, 13 insertions(+), 10 deletions(-)
diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
index 2347c8cf0f4..81e6bf904fd 100644
--- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
@@ -937,17 +937,17 @@ predicate resolveClassCall(CallNode call, Class cls) {
}
/**
- * Gets a function (`__init__`/`__new__`) that will be invoked when `cls` is
- * constructed -- where the function lookup is based on our MRO calculation.
+ * Gets a function, either `__init__` or `__new__` as specified by `funcName`, that will
+ * be invoked when `cls` is constructed -- where the function lookup is based on our MRO
+ * calculation.
*/
-Function invokedFunctionFromClassConstruction(Class cls) {
- result = findFunctionAccordingToMroKnownStartingClass(cls, "__new__")
- or
+Function invokedFunctionFromClassConstruction(Class cls, string funcName) {
// as described in https://docs.python.org/3/reference/datamodel.html#object.__new__
// __init__ will only be called when __new__ returns an instance of the class (which
// is not a requirement). However, for simplicity, we assume that __init__ will always
// be called.
- result = findFunctionAccordingToMroKnownStartingClass(cls, "__init__")
+ funcName in ["__init__", "__new__"] and
+ result = findFunctionAccordingToMroKnownStartingClass(cls, funcName)
}
/**
@@ -984,7 +984,7 @@ predicate resolveCall(ControlFlowNode call, Function target, CallType type) {
type instanceof CallTypeClass and
exists(Class cls |
resolveClassCall(call, cls) and
- target = invokedFunctionFromClassConstruction(cls)
+ target = invokedFunctionFromClassConstruction(cls, _)
)
or
type instanceof CallTypeClassInstanceCall and
@@ -1106,6 +1106,9 @@ predicate getCallArg(
// class call
type instanceof CallTypeClass and
(
+ // only pass synthetic node for created object to __init__, and not __new__ since
+ // __new__ is a classmethod.
+ target = invokedFunctionFromClassConstruction(_, "__init__") and
apos.isSelf() and
arg = TSyntheticPreUpdateNode(call)
or
diff --git a/python/ql/test/experimental/dataflow/coverage/datamodel.py b/python/ql/test/experimental/dataflow/coverage/datamodel.py
index e30ffea164a..370fb32ca99 100644
--- a/python/ql/test/experimental/dataflow/coverage/datamodel.py
+++ b/python/ql/test/experimental/dataflow/coverage/datamodel.py
@@ -232,7 +232,7 @@ class Customized:
customized = Customized()
SINK(Customized.a) #$ MISSING:flow="SOURCE, l:-8 -> customized.a"
SINK_F(Customized.b)
-SINK(customized.a) #$ flow="SOURCE, l:-10 -> customized.a"
+SINK(customized.a) #$ MISSING: flow="SOURCE, l:-10 -> customized.a"
SINK(customized.b) #$ flow="SOURCE, l:-7 -> customized.b"
diff --git a/python/ql/test/experimental/dataflow/fieldflow/test.py b/python/ql/test/experimental/dataflow/fieldflow/test.py
index 5f1f6f47058..c090aea2089 100644
--- a/python/ql/test/experimental/dataflow/fieldflow/test.py
+++ b/python/ql/test/experimental/dataflow/fieldflow/test.py
@@ -405,11 +405,11 @@ def test__new__():
# well.
SINK(NewTest.foo) # $ MISSING: flow="SOURCE, l:-10 -> NewTest.foo"
- SINK(nt.foo) # $ flow="SOURCE, l:-11 -> nt.foo"
+ SINK(nt.foo) # $ MISSING: flow="SOURCE, l:-11 -> nt.foo"
NewTest.foo = NONSOURCE
SINK_F(NewTest.foo)
- SINK_F(nt.foo) # $ SPURIOUS: flow="SOURCE, l:-15 -> nt.foo"
+ SINK_F(nt.foo)
# ------------------------------------------------------------------------------
# Global scope
From 722c69edccc729bf12893bf6c40d5743936c420c Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Thu, 20 Oct 2022 11:22:38 +0200
Subject: [PATCH 043/415] Python: Add test showing self type-tracking problems
---
.../CallGraph/InlineCallGraphTest.expected | 4 +
.../CallGraph/code/self_passing.py | 94 +++++++++++++++++++
2 files changed, 98 insertions(+)
create mode 100644 python/ql/test/experimental/library-tests/CallGraph/code/self_passing.py
diff --git a/python/ql/test/experimental/library-tests/CallGraph/InlineCallGraphTest.expected b/python/ql/test/experimental/library-tests/CallGraph/InlineCallGraphTest.expected
index 534633980c3..2e2c1cd655f 100644
--- a/python/ql/test/experimental/library-tests/CallGraph/InlineCallGraphTest.expected
+++ b/python/ql/test/experimental/library-tests/CallGraph/InlineCallGraphTest.expected
@@ -42,4 +42,8 @@ typeTracker_found_pointsTo_notFound
| code/isinstance.py:14:13:14:22 | ControlFlowNode for Attribute() | B.foo |
| code/isinstance.py:17:13:17:22 | ControlFlowNode for Attribute() | A.foo |
| code/nested_class.py:83:9:83:16 | ControlFlowNode for Attribute() | X.class_def_in_func.Y.meth |
+| code/self_passing.py:16:9:16:18 | ControlFlowNode for Attribute() | A.foo |
+| code/self_passing.py:16:9:16:18 | ControlFlowNode for Attribute() | B.foo |
+| code/self_passing.py:67:9:67:16 | ControlFlowNode for Attribute() | Y.cm |
+| code/self_passing.py:69:9:69:17 | ControlFlowNode for Attribute() | X.foo |
| code/underscore_prefix_func_name.py:14:5:14:19 | ControlFlowNode for some_function() | some_function |
diff --git a/python/ql/test/experimental/library-tests/CallGraph/code/self_passing.py b/python/ql/test/experimental/library-tests/CallGraph/code/self_passing.py
new file mode 100644
index 00000000000..fcb22ca40e6
--- /dev/null
+++ b/python/ql/test/experimental/library-tests/CallGraph/code/self_passing.py
@@ -0,0 +1,94 @@
+# These test-cases illustrates what can happen if we allow the type trackers that are used
+# for tracking class instances to flow into self parameters.
+
+# This first case shows the problem of the call to `self.bar` inside A.foo, could be
+# considered a call to B.bar, if we allow the flow from the `self` parameter of
+# `Base.base_meth` to flow into A.foo (through the `self.foo` call). This is
+# problematic, and causes us to have different results for the `self.bar()` calls in
+# `A.foo` and `A.not_called`.
+
+from inspect import isclass
+
+
+class Base(object):
+ def base_meth(self):
+ print("Base.base_meth")
+ self.foo() # $ pt,tt=Base.foo tt=A.foo tt=B.foo
+
+ def foo(self):
+ print("Base.foo")
+
+class A(Base):
+ def foo(self):
+ print("A.foo")
+ self.bar() # $ pt,tt=A.bar SPURIOUS: tt=B.bar
+
+ def not_called(self):
+ self.bar() #$ pt,tt=A.bar
+
+ def bar(self):
+ print("A.bar")
+
+class B(Base):
+ def foo(self):
+ print("B.foo")
+
+ def bar(self):
+ print("B.bar")
+
+a = A()
+a.foo() # $ pt,tt=A.foo
+
+# Another problem is mixing up class instances and class references. In the example
+# below since `func` takes BOTH an instance of X, and the class Y, we used to end up
+# tracking _both_ to the self argument of X.foo, which meant that the self.meth() call
+# in X.foo was resolved to BOTH X.meth and Y.meth.
+
+class X(object):
+ def meth(self):
+ print("X.meth")
+
+ def foo(self):
+ print("X.foo")
+ self.meth() # $ pt,tt=X.meth SPURIOUS: tt=Y.meth
+
+
+class Y(object):
+ def meth(self):
+ print("Y.meth")
+
+ @classmethod
+ def cm(cls):
+ print("Y.cm")
+
+
+def func(obj):
+ if isclass(obj):
+ obj.cm() # $ tt=Y.cm
+ else:
+ obj.foo() # $ tt=X.foo
+
+func(Y) # $ pt,tt=func
+x = X()
+func(x) # $ pt,tt=func
+
+
+# While avoiding the two problems above is good, we have to be careful not to prune away
+# _all_ type-tracking flow to the self parameter (since it's the local source node for
+# all references to it within the function). So in the example below, we still want to
+# be able to resolve that some_function is assigned to the attribute `func` on self.
+
+
+class Example3(object):
+ def wat(self, f):
+ print("Example3.wat")
+ self.func = f
+ self.func() # $ pt,tt=some_function
+
+
+def some_function():
+ print("some_function")
+
+
+ex3 = Example3()
+ex3.wat(some_function) # $ pt,tt=Example3.wat
From b33f02f9dc4f216f6c0c0fa04dfa79c938f46279 Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Thu, 20 Oct 2022 11:24:33 +0200
Subject: [PATCH 044/415] Python: Fix self-passing problems
This also fixes performance problems for pandas-dev/pandas
---
.../dataflow/new/internal/DataFlowDispatch.qll | 18 ++++++++++++------
.../new/internal/TypeTrackerSpecific.qll | 1 -
.../CallGraph/code/self_passing.py | 4 ++--
3 files changed, 14 insertions(+), 9 deletions(-)
diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
index 81e6bf904fd..18bff54a023 100644
--- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
@@ -441,7 +441,8 @@ private TypeTrackingNode classTracker(TypeTracker t, Class cls) {
result.(CallCfgNode).getArg(0) = classInstanceTracker(cls)
)
or
- exists(TypeTracker t2 | result = classTracker(t2, cls).track(t2, t))
+ exists(TypeTracker t2 | result = classTracker(t2, cls).track(t2, t)) and
+ not result.(ParameterNodeImpl).isParameterOf(_, any(ParameterPosition pp | pp.isSelf()))
}
/**
@@ -456,7 +457,8 @@ private TypeTrackingNode classInstanceTracker(TypeTracker t, Class cls) {
t.start() and
result.(CallCfgNode).getFunction() = classTracker(cls)
or
- exists(TypeTracker t2 | result = classInstanceTracker(t2, cls).track(t2, t))
+ exists(TypeTracker t2 | result = classInstanceTracker(t2, cls).track(t2, t)) and
+ not result.(ParameterNodeImpl).isParameterOf(_, any(ParameterPosition pp | pp.isSelf()))
}
/**
@@ -480,7 +482,8 @@ private TypeTrackingNode selfTracker(TypeTracker t, Class classWithMethod) {
result.asExpr() = func.getArg(0)
)
or
- exists(TypeTracker t2 | result = selfTracker(t2, classWithMethod).track(t2, t))
+ exists(TypeTracker t2 | result = selfTracker(t2, classWithMethod).track(t2, t)) and
+ not result.(ParameterNodeImpl).isParameterOf(_, any(ParameterPosition pp | pp.isSelf()))
}
/**
@@ -509,7 +512,8 @@ private TypeTrackingNode clsTracker(TypeTracker t, Class classWithMethod) {
result.(CallCfgNode).getArg(0) = selfTracker(classWithMethod)
)
or
- exists(TypeTracker t2 | result = clsTracker(t2, classWithMethod).track(t2, t))
+ exists(TypeTracker t2 | result = clsTracker(t2, classWithMethod).track(t2, t)) and
+ not result.(ParameterNodeImpl).isParameterOf(_, any(ParameterPosition pp | pp.isSelf()))
}
/**
@@ -532,7 +536,8 @@ private TypeTrackingNode superCallNoArgumentTracker(TypeTracker t, Function func
call.getScope() = func
)
or
- exists(TypeTracker t2 | result = superCallNoArgumentTracker(t2, func).track(t2, t))
+ exists(TypeTracker t2 | result = superCallNoArgumentTracker(t2, func).track(t2, t)) and
+ not result.(ParameterNodeImpl).isParameterOf(_, any(ParameterPosition pp | pp.isSelf()))
}
/**
@@ -555,7 +560,8 @@ private TypeTrackingNode superCallTwoArgumentTracker(TypeTracker t, Class cls, N
call.getArg(1) = obj
)
or
- exists(TypeTracker t2 | result = superCallTwoArgumentTracker(t2, cls, obj).track(t2, t))
+ exists(TypeTracker t2 | result = superCallTwoArgumentTracker(t2, cls, obj).track(t2, t)) and
+ not result.(ParameterNodeImpl).isParameterOf(_, any(ParameterPosition pp | pp.isSelf()))
}
/**
diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackerSpecific.qll b/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackerSpecific.qll
index e00303d750b..67e3db984e8 100644
--- a/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackerSpecific.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackerSpecific.qll
@@ -68,7 +68,6 @@ string getPossibleContentName() {
* methods is done using API graphs (which uses type tracking).
*/
predicate callStep(DataFlowPublic::ArgumentNode nodeFrom, DataFlowPublic::ParameterNode nodeTo) {
- // TODO: Fix performance problem with pandas
exists(
DataFlowPrivate::DataFlowCall call, DataFlowPrivate::DataFlowCallable callable,
DataFlowPrivate::ArgumentPosition apos, DataFlowPrivate::ParameterPosition ppos
diff --git a/python/ql/test/experimental/library-tests/CallGraph/code/self_passing.py b/python/ql/test/experimental/library-tests/CallGraph/code/self_passing.py
index fcb22ca40e6..f5618450033 100644
--- a/python/ql/test/experimental/library-tests/CallGraph/code/self_passing.py
+++ b/python/ql/test/experimental/library-tests/CallGraph/code/self_passing.py
@@ -21,7 +21,7 @@ class Base(object):
class A(Base):
def foo(self):
print("A.foo")
- self.bar() # $ pt,tt=A.bar SPURIOUS: tt=B.bar
+ self.bar() # $ pt,tt=A.bar
def not_called(self):
self.bar() #$ pt,tt=A.bar
@@ -50,7 +50,7 @@ class X(object):
def foo(self):
print("X.foo")
- self.meth() # $ pt,tt=X.meth SPURIOUS: tt=Y.meth
+ self.meth() # $ pt,tt=X.meth
class Y(object):
From cba93ded77af314bdf109f9f7c0d421ee5236122 Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Thu, 20 Oct 2022 21:11:01 +0200
Subject: [PATCH 045/415] Python: Add test for `@property` problem
---
.../CallGraph/InlineCallGraphTest.expected | 6 +++---
.../library-tests/CallGraph/code/class_properties.py | 11 +++++++++++
2 files changed, 14 insertions(+), 3 deletions(-)
diff --git a/python/ql/test/experimental/library-tests/CallGraph/InlineCallGraphTest.expected b/python/ql/test/experimental/library-tests/CallGraph/InlineCallGraphTest.expected
index 2e2c1cd655f..ddeba0cf074 100644
--- a/python/ql/test/experimental/library-tests/CallGraph/InlineCallGraphTest.expected
+++ b/python/ql/test/experimental/library-tests/CallGraph/InlineCallGraphTest.expected
@@ -1,8 +1,8 @@
failures
debug_callableNotUnique
-| code/class_properties.py:7:5:7:18 | Function arg | Qualified function name 'Prop.arg' is not unique within its file. Please fix. |
-| code/class_properties.py:12:5:12:25 | Function arg | Qualified function name 'Prop.arg' is not unique within its file. Please fix. |
-| code/class_properties.py:17:5:17:18 | Function arg | Qualified function name 'Prop.arg' is not unique within its file. Please fix. |
+| code/class_properties.py:10:5:10:18 | Function arg | Qualified function name 'Prop.arg' is not unique within its file. Please fix. |
+| code/class_properties.py:15:5:15:25 | Function arg | Qualified function name 'Prop.arg' is not unique within its file. Please fix. |
+| code/class_properties.py:20:5:20:18 | Function arg | Qualified function name 'Prop.arg' is not unique within its file. Please fix. |
pointsTo_found_typeTracker_notFound
| code/class_attr_assign.py:10:9:10:27 | ControlFlowNode for Attribute() | my_func |
| code/class_attr_assign.py:11:9:11:25 | ControlFlowNode for Attribute() | my_func |
diff --git a/python/ql/test/experimental/library-tests/CallGraph/code/class_properties.py b/python/ql/test/experimental/library-tests/CallGraph/code/class_properties.py
index 06e4f3f3bd2..de436339115 100644
--- a/python/ql/test/experimental/library-tests/CallGraph/code/class_properties.py
+++ b/python/ql/test/experimental/library-tests/CallGraph/code/class_properties.py
@@ -1,3 +1,6 @@
+def func():
+ print("func")
+
class Prop(object):
def __init__(self, arg):
self._arg = arg
@@ -32,6 +35,11 @@ class Prop(object):
arg2 = property(_arg2_getter, _arg2_setter, _arg2_deleter)
+ @property
+ def func_prop(self):
+ print("Prop.func_prop getter")
+ return func
+
prop = Prop(42) # $ tt=Prop.__init__
prop.arg
@@ -41,3 +49,6 @@ del prop.arg
prop.arg2
prop.arg2 = 43
del prop.arg2
+
+f = prop.func_prop
+f() # $ SPURIOUS: tt=Prop.func_prop MISSING: tt=func
From 1e96ced3ab36f5ce621020fb36623e59f2a8d916 Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Fri, 14 Oct 2022 11:38:45 +0200
Subject: [PATCH 046/415] Python: Ignore functions with `@property` decorator
for now
---
.../dataflow/new/internal/DataFlowDispatch.qll | 13 ++++++++++++-
.../CallGraph/code/class_properties.py | 2 +-
2 files changed, 13 insertions(+), 2 deletions(-)
diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
index 18bff54a023..83b41b8ac33 100644
--- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
@@ -205,6 +205,13 @@ predicate isClassmethod(Function func) {
)
}
+/** Holds if the function `func` has a `property` decorator. */
+predicate hasPropertyDecorator(Function func) {
+ exists(NameNode id | id.getId() = "property" and id.isGlobal() |
+ func.getADecorator() = id.getNode()
+ )
+}
+
// =============================================================================
// Callables
// =============================================================================
@@ -251,7 +258,11 @@ abstract class DataFlowCallable extends TDataFlowCallable {
abstract class DataFlowFunction extends DataFlowCallable, TFunction {
Function func;
- DataFlowFunction() { this = TFunction(func) }
+ DataFlowFunction() {
+ this = TFunction(func) and
+ // TODO: Handle @property decorators
+ not hasPropertyDecorator(func)
+ }
override string toString() { result = func.toString() }
diff --git a/python/ql/test/experimental/library-tests/CallGraph/code/class_properties.py b/python/ql/test/experimental/library-tests/CallGraph/code/class_properties.py
index de436339115..ea7d7847adb 100644
--- a/python/ql/test/experimental/library-tests/CallGraph/code/class_properties.py
+++ b/python/ql/test/experimental/library-tests/CallGraph/code/class_properties.py
@@ -51,4 +51,4 @@ prop.arg2 = 43
del prop.arg2
f = prop.func_prop
-f() # $ SPURIOUS: tt=Prop.func_prop MISSING: tt=func
+f() # $ MISSING: tt=func
From 16483f7d400e08a2ec4ad1db7e1b869346179cf3 Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Mon, 24 Oct 2022 14:14:08 +0200
Subject: [PATCH 047/415] Python: Add funky call-graph regression
I don't even know how to phrase this :D
---
.../CallGraph/InlineCallGraphTest.expected | 1 +
.../CallGraph/code/funky_regression.py | 63 +++++++++++++++++++
2 files changed, 64 insertions(+)
create mode 100644 python/ql/test/experimental/library-tests/CallGraph/code/funky_regression.py
diff --git a/python/ql/test/experimental/library-tests/CallGraph/InlineCallGraphTest.expected b/python/ql/test/experimental/library-tests/CallGraph/InlineCallGraphTest.expected
index ddeba0cf074..b3bc72cca1e 100644
--- a/python/ql/test/experimental/library-tests/CallGraph/InlineCallGraphTest.expected
+++ b/python/ql/test/experimental/library-tests/CallGraph/InlineCallGraphTest.expected
@@ -17,6 +17,7 @@ pointsTo_found_typeTracker_notFound
| code/func_defined_outside_class.py:39:11:39:21 | ControlFlowNode for _gen() | B._gen |
| code/func_defined_outside_class.py:42:1:42:7 | ControlFlowNode for Attribute() | B._gen.func |
| code/func_defined_outside_class.py:43:1:43:7 | ControlFlowNode for Attribute() | B._gen.func |
+| code/funky_regression.py:15:9:15:17 | ControlFlowNode for Attribute() | Wat.f2 |
| code/type_tracking_limitation.py:8:1:8:3 | ControlFlowNode for x() | my_func |
typeTracker_found_pointsTo_notFound
| code/callable_as_argument.py:29:5:29:12 | ControlFlowNode for Attribute() | test_class.InsideTestFunc.sm |
diff --git a/python/ql/test/experimental/library-tests/CallGraph/code/funky_regression.py b/python/ql/test/experimental/library-tests/CallGraph/code/funky_regression.py
new file mode 100644
index 00000000000..bb87bfb47e9
--- /dev/null
+++ b/python/ql/test/experimental/library-tests/CallGraph/code/funky_regression.py
@@ -0,0 +1,63 @@
+# When this regression was discovered, we did not resolve the `self.f2()` call after the
+# try-except block, but ONLY when passing an attribute to a method, as indicated in the
+# other tests below.
+
+class Wat(object):
+ def f1(self, arg): pass
+ def f2(self): pass
+
+ def func(self, foo):
+ try:
+ self.f1(foo.bar) # $ pt,tt=Wat.f1
+ except Exception as e:
+ raise e
+
+ self.f2() # $ pt=Wat.f2 MISSING: tt=Wat.f2
+
+
+# ==============================================================================
+# variants that we are able to handle
+# ==============================================================================
+
+
+class Works(object):
+ "not using attribute"
+ def f1(self, arg): pass
+ def f2(self): pass
+
+ def func(self, foo):
+ try:
+ self.f1(foo) # $ pt,tt=Works.f1
+ except Exception as e:
+ raise e
+
+ self.f2() # $ pt,tt=Works.f2
+
+
+class AlsoWorks(object):
+ "no exception"
+ def f1(self, arg): pass
+ def f2(self): pass
+
+ def func(self, foo):
+ self.f1(foo.bar) # $ pt,tt=AlsoWorks.f1
+
+ self.f2() # $ pt,tt=AlsoWorks.f2
+
+
+def safe_func(arg):
+ pass
+
+
+class Works3(object):
+ "call to non-self function"
+ def f1(self, arg): pass
+ def f2(self): pass
+
+ def func(self, foo):
+ try:
+ safe_func(foo.bar) # $ pt,tt=safe_func
+ except Exception as e:
+ raise e
+
+ self.f2() # $ pt,tt=Works3.f2
From d43a48c265a2b947fb57a8f62b5cbd1e782b11d5 Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Mon, 24 Oct 2022 14:38:28 +0200
Subject: [PATCH 048/415] Python: Add `type(self)()` tests
---
.../CallGraph/InlineCallGraphTest.expected | 1 +
.../CallGraph/code/class_construction.py | 24 +++++++++++++++++--
2 files changed, 23 insertions(+), 2 deletions(-)
diff --git a/python/ql/test/experimental/library-tests/CallGraph/InlineCallGraphTest.expected b/python/ql/test/experimental/library-tests/CallGraph/InlineCallGraphTest.expected
index b3bc72cca1e..805054ffc62 100644
--- a/python/ql/test/experimental/library-tests/CallGraph/InlineCallGraphTest.expected
+++ b/python/ql/test/experimental/library-tests/CallGraph/InlineCallGraphTest.expected
@@ -7,6 +7,7 @@ pointsTo_found_typeTracker_notFound
| code/class_attr_assign.py:10:9:10:27 | ControlFlowNode for Attribute() | my_func |
| code/class_attr_assign.py:11:9:11:25 | ControlFlowNode for Attribute() | my_func |
| code/class_attr_assign.py:26:9:26:25 | ControlFlowNode for Attribute() | DummyObject.method |
+| code/class_construction.py:23:1:23:11 | ControlFlowNode for Attribute() | X.foo |
| code/class_super.py:50:1:50:6 | ControlFlowNode for Attribute() | outside_def |
| code/conditional_in_argument.py:18:5:18:11 | ControlFlowNode for Attribute() | X.bar |
| code/func_defined_outside_class.py:21:1:21:11 | ControlFlowNode for Attribute() | A.foo |
diff --git a/python/ql/test/experimental/library-tests/CallGraph/code/class_construction.py b/python/ql/test/experimental/library-tests/CallGraph/code/class_construction.py
index ce348fee15f..2cf6a8fb32b 100644
--- a/python/ql/test/experimental/library-tests/CallGraph/code/class_construction.py
+++ b/python/ql/test/experimental/library-tests/CallGraph/code/class_construction.py
@@ -1,8 +1,26 @@
class X(object):
def __init__(self, arg):
print("X.__init__", arg)
+ self.arg = arg
-X(42) # $ tt=X.__init__
+ def foo(self):
+ print("X.foo", self.arg)
+
+ def meth(self):
+ print("X.meth")
+ return type(self)(42.1) # $ MISSING: tt=X.__init__ tt=Y.__init__
+
+ @classmethod
+ def cm(cls):
+ print("X.cm")
+ cls(42.2) # $ MISSING: tt=X.__init__ tt=Y.__init__
+
+x = X(42.0) # $ tt=X.__init__
+x_421 = x.meth() # $ pt,tt=X.meth
+X.cm() # $ pt,tt=X.cm
+x.foo() # $ pt,tt=X.foo
+print()
+x_421.foo() # $ pt=X.foo MISSING: tt=X.foo
print()
@@ -11,7 +29,9 @@ class Y(X):
print("Y.__init__", arg)
super().__init__(-arg) # $ pt,tt=X.__init__
-Y(43) # $ tt=Y.__init__
+y = Y(43) # $ tt=Y.__init__
+y.meth() # $ pt,tt=X.meth
+y.cm() # $ pt,tt=X.cm
print()
# ---
From a4e6433942cc72369e0e28eec64ff2badc61f2ff Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Mon, 24 Oct 2022 14:40:23 +0200
Subject: [PATCH 049/415] Python: add support for `type(self)()`
---
.../python/dataflow/new/internal/DataFlowDispatch.qll | 8 +++++++-
.../library-tests/CallGraph/InlineCallGraphTest.expected | 1 -
.../library-tests/CallGraph/code/class_construction.py | 6 +++---
3 files changed, 10 insertions(+), 5 deletions(-)
diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
index 83b41b8ac33..d0016c28673 100644
--- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
@@ -466,7 +466,7 @@ Node classTracker(Class cls) { classTracker(TypeTracker::end(), cls).flowsTo(res
*/
private TypeTrackingNode classInstanceTracker(TypeTracker t, Class cls) {
t.start() and
- result.(CallCfgNode).getFunction() = classTracker(cls)
+ resolveClassCall(result.(CallCfgNode).asCfgNode(), cls)
or
exists(TypeTracker t2 | result = classInstanceTracker(t2, cls).track(t2, t)) and
not result.(ParameterNodeImpl).isParameterOf(_, any(ParameterPosition pp | pp.isSelf()))
@@ -951,6 +951,12 @@ import MethodCalls
*/
predicate resolveClassCall(CallNode call, Class cls) {
call.getFunction() = classTracker(cls).asCfgNode()
+ or
+ // `cls()` inside a classmethod (which also contains `type(self)()` inside a method)
+ exists(Class classWithMethod |
+ call.getFunction() = clsTracker(classWithMethod).asCfgNode() and
+ getADirectSuperclass*(cls) = classWithMethod
+ )
}
/**
diff --git a/python/ql/test/experimental/library-tests/CallGraph/InlineCallGraphTest.expected b/python/ql/test/experimental/library-tests/CallGraph/InlineCallGraphTest.expected
index 805054ffc62..b3bc72cca1e 100644
--- a/python/ql/test/experimental/library-tests/CallGraph/InlineCallGraphTest.expected
+++ b/python/ql/test/experimental/library-tests/CallGraph/InlineCallGraphTest.expected
@@ -7,7 +7,6 @@ pointsTo_found_typeTracker_notFound
| code/class_attr_assign.py:10:9:10:27 | ControlFlowNode for Attribute() | my_func |
| code/class_attr_assign.py:11:9:11:25 | ControlFlowNode for Attribute() | my_func |
| code/class_attr_assign.py:26:9:26:25 | ControlFlowNode for Attribute() | DummyObject.method |
-| code/class_construction.py:23:1:23:11 | ControlFlowNode for Attribute() | X.foo |
| code/class_super.py:50:1:50:6 | ControlFlowNode for Attribute() | outside_def |
| code/conditional_in_argument.py:18:5:18:11 | ControlFlowNode for Attribute() | X.bar |
| code/func_defined_outside_class.py:21:1:21:11 | ControlFlowNode for Attribute() | A.foo |
diff --git a/python/ql/test/experimental/library-tests/CallGraph/code/class_construction.py b/python/ql/test/experimental/library-tests/CallGraph/code/class_construction.py
index 2cf6a8fb32b..8543fc94ee5 100644
--- a/python/ql/test/experimental/library-tests/CallGraph/code/class_construction.py
+++ b/python/ql/test/experimental/library-tests/CallGraph/code/class_construction.py
@@ -8,19 +8,19 @@ class X(object):
def meth(self):
print("X.meth")
- return type(self)(42.1) # $ MISSING: tt=X.__init__ tt=Y.__init__
+ return type(self)(42.1) # $ tt=X.__init__ tt=Y.__init__
@classmethod
def cm(cls):
print("X.cm")
- cls(42.2) # $ MISSING: tt=X.__init__ tt=Y.__init__
+ cls(42.2) # $ tt=X.__init__ tt=Y.__init__
x = X(42.0) # $ tt=X.__init__
x_421 = x.meth() # $ pt,tt=X.meth
X.cm() # $ pt,tt=X.cm
x.foo() # $ pt,tt=X.foo
print()
-x_421.foo() # $ pt=X.foo MISSING: tt=X.foo
+x_421.foo() # $ pt=X.foo tt=X.foo
print()
From 2b76964f7f7adace6087a9571b871b7d2b4f12ef Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Mon, 24 Oct 2022 15:16:26 +0200
Subject: [PATCH 050/415] Python: Expand tests of `__new__` a bit more
---
.../CallGraph/code/class_construction.py | 16 ++++++++++++++--
1 file changed, 14 insertions(+), 2 deletions(-)
diff --git a/python/ql/test/experimental/library-tests/CallGraph/code/class_construction.py b/python/ql/test/experimental/library-tests/CallGraph/code/class_construction.py
index 8543fc94ee5..06669902714 100644
--- a/python/ql/test/experimental/library-tests/CallGraph/code/class_construction.py
+++ b/python/ql/test/experimental/library-tests/CallGraph/code/class_construction.py
@@ -44,15 +44,27 @@ class WithNew(object):
inst.some_method() # $ MISSING: pt,tt=WithNew.some_method
return inst
- def __init__(self, arg):
+ def __init__(self, arg=None):
print("WithNew.__init__", arg)
def some_method(self):
- print("WithNew.__init__")
+ print("WithNew.some_method")
WithNew(44) # $ tt=WithNew.__new__ tt=WithNew.__init__
print()
+class WithNewSub(WithNew):
+ def __new__(cls):
+ print("WithNewSub.__new__")
+ inst = super().__new__(cls, 44.1) # $ pt,tt=WithNew.__new__
+ assert isinstance(inst, cls)
+ inst.some_method() # $ MISSING: pt,tt=WithNew.some_method
+ return inst
+
+WithNewSub() # $ tt=WithNewSub.__new__ tt=WithNew.__init__
+print()
+
+# ------------------------------------------------------------------------------
class ExtraCallToInit(object):
def __new__(cls, arg):
From 8a56b48357d7a6a59a44bf476a820ed516966444 Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Mon, 24 Oct 2022 15:35:53 +0200
Subject: [PATCH 051/415] Python: Support `super().__new__(cls)`
---
.../dataflow/new/internal/DataFlowDispatch.qll | 17 +++++++++++++++++
.../CallGraph/InlineCallGraphTest.expected | 3 +++
.../CallGraph/code/class_construction.py | 6 +++---
3 files changed, 23 insertions(+), 3 deletions(-)
diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
index d0016c28673..5f957553a76 100644
--- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
@@ -468,6 +468,13 @@ private TypeTrackingNode classInstanceTracker(TypeTracker t, Class cls) {
t.start() and
resolveClassCall(result.(CallCfgNode).asCfgNode(), cls)
or
+ // result of `super().__new__` as used in a `__new__` method implementation
+ t.start() and
+ exists(Class classUsedInSuper |
+ fromSuperNewCall(result.(CallCfgNode).asCfgNode(), classUsedInSuper, _, _) and
+ classUsedInSuper = getADirectSuperclass*(cls)
+ )
+ or
exists(TypeTracker t2 | result = classInstanceTracker(t2, cls).track(t2, t)) and
not result.(ParameterNodeImpl).isParameterOf(_, any(ParameterPosition pp | pp.isSelf()))
}
@@ -856,6 +863,16 @@ private module MethodCalls {
attr.accesses(self, functionName)
}
+ /**
+ * Like `fromSuper`, but only for `__new__`, and without requirement for being able to
+ * resolve the call to a known target (since the only super class might be the
+ * builtin `object`, so we never have the implementation of `__new__` in the DB).
+ */
+ predicate fromSuperNewCall(CallNode call, Class classUsedInSuper, AttrRead attr, Node self) {
+ fromSuper_join(call, "__new__", classUsedInSuper, attr, self) and
+ self in [classTracker(_), clsTracker(_)]
+ }
+
/**
* Holds if `call` is a call to a method `target`, derived from a use of `super`, either
* as:
diff --git a/python/ql/test/experimental/library-tests/CallGraph/InlineCallGraphTest.expected b/python/ql/test/experimental/library-tests/CallGraph/InlineCallGraphTest.expected
index b3bc72cca1e..2478db5a060 100644
--- a/python/ql/test/experimental/library-tests/CallGraph/InlineCallGraphTest.expected
+++ b/python/ql/test/experimental/library-tests/CallGraph/InlineCallGraphTest.expected
@@ -21,6 +21,9 @@ pointsTo_found_typeTracker_notFound
| code/type_tracking_limitation.py:8:1:8:3 | ControlFlowNode for x() | my_func |
typeTracker_found_pointsTo_notFound
| code/callable_as_argument.py:29:5:29:12 | ControlFlowNode for Attribute() | test_class.InsideTestFunc.sm |
+| code/class_construction.py:44:9:44:26 | ControlFlowNode for Attribute() | WithNew.some_method |
+| code/class_construction.py:61:9:61:26 | ControlFlowNode for Attribute() | WithNew.some_method |
+| code/class_construction.py:75:9:75:27 | ControlFlowNode for Attribute() | ExtraCallToInit.__init__ |
| code/class_special_methods.py:22:9:22:16 | ControlFlowNode for self() | Base.__call__ |
| code/class_special_methods.py:22:9:22:16 | ControlFlowNode for self() | Sub.__call__ |
| code/class_special_methods.py:33:1:33:5 | ControlFlowNode for b() | Base.__call__ |
diff --git a/python/ql/test/experimental/library-tests/CallGraph/code/class_construction.py b/python/ql/test/experimental/library-tests/CallGraph/code/class_construction.py
index 06669902714..1ae696edf61 100644
--- a/python/ql/test/experimental/library-tests/CallGraph/code/class_construction.py
+++ b/python/ql/test/experimental/library-tests/CallGraph/code/class_construction.py
@@ -41,7 +41,7 @@ class WithNew(object):
print("WithNew.__new__", arg)
inst = super().__new__(cls)
assert isinstance(inst, cls)
- inst.some_method() # $ MISSING: pt,tt=WithNew.some_method
+ inst.some_method() # $ tt=WithNew.some_method
return inst
def __init__(self, arg=None):
@@ -58,7 +58,7 @@ class WithNewSub(WithNew):
print("WithNewSub.__new__")
inst = super().__new__(cls, 44.1) # $ pt,tt=WithNew.__new__
assert isinstance(inst, cls)
- inst.some_method() # $ MISSING: pt,tt=WithNew.some_method
+ inst.some_method() # $ tt=WithNew.some_method
return inst
WithNewSub() # $ tt=WithNewSub.__new__ tt=WithNew.__init__
@@ -72,7 +72,7 @@ class ExtraCallToInit(object):
inst = super().__new__(cls)
assert isinstance(inst, cls)
# you're not supposed to do this, since it will cause the __init__ method will be run twice.
- inst.__init__(1001) # $ MISSING: pt,tt=ExtraCallToInit.__init__
+ inst.__init__(1001) # $ tt=ExtraCallToInit.__init__
return inst
def __init__(self, arg):
From 276a825cd0d8c8d8574e694ce36b6085e958d7e3 Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Mon, 24 Oct 2022 16:03:04 +0200
Subject: [PATCH 052/415] Python: Allow same function name in call-graph tests
---
.../CallGraph/InlineCallGraphTest.expected | 3 ---
.../library-tests/CallGraph/InlineCallGraphTest.ql | 10 ++++++++--
2 files changed, 8 insertions(+), 5 deletions(-)
diff --git a/python/ql/test/experimental/library-tests/CallGraph/InlineCallGraphTest.expected b/python/ql/test/experimental/library-tests/CallGraph/InlineCallGraphTest.expected
index 2478db5a060..31ea3f55ff2 100644
--- a/python/ql/test/experimental/library-tests/CallGraph/InlineCallGraphTest.expected
+++ b/python/ql/test/experimental/library-tests/CallGraph/InlineCallGraphTest.expected
@@ -1,8 +1,5 @@
failures
debug_callableNotUnique
-| code/class_properties.py:10:5:10:18 | Function arg | Qualified function name 'Prop.arg' is not unique within its file. Please fix. |
-| code/class_properties.py:15:5:15:25 | Function arg | Qualified function name 'Prop.arg' is not unique within its file. Please fix. |
-| code/class_properties.py:20:5:20:18 | Function arg | Qualified function name 'Prop.arg' is not unique within its file. Please fix. |
pointsTo_found_typeTracker_notFound
| code/class_attr_assign.py:10:9:10:27 | ControlFlowNode for Attribute() | my_func |
| code/class_attr_assign.py:11:9:11:25 | ControlFlowNode for Attribute() | my_func |
diff --git a/python/ql/test/experimental/library-tests/CallGraph/InlineCallGraphTest.ql b/python/ql/test/experimental/library-tests/CallGraph/InlineCallGraphTest.ql
index d613460e749..fa658d892f0 100644
--- a/python/ql/test/experimental/library-tests/CallGraph/InlineCallGraphTest.ql
+++ b/python/ql/test/experimental/library-tests/CallGraph/InlineCallGraphTest.ql
@@ -76,7 +76,13 @@ bindingset[func]
string betterQualName(Function func) {
// note: `target.getQualifiedName` for Lambdas is just "lambda", so is not very useful :|
not func.isLambda() and
- result = func.getQualifiedName()
+ if
+ strictcount(Function f |
+ f.getEnclosingModule() = func.getEnclosingModule() and
+ f.getQualifiedName() = func.getQualifiedName()
+ ) = 1
+ then result = func.getQualifiedName()
+ else result = func.getLocation().getStartLine() + ":" + func.getQualifiedName()
or
func.isLambda() and
result =
@@ -88,7 +94,7 @@ query predicate debug_callableNotUnique(Function callable, string message) {
exists(callable.getLocation().getFile().getRelativePath()) and
exists(Function f |
f != callable and
- f.getQualifiedName() = callable.getQualifiedName() and
+ betterQualName(f) = betterQualName(callable) and
f.getLocation().getFile() = callable.getLocation().getFile()
) and
message =
From fb0cc184d97e813ce9c14a289808530f3de2c348 Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Mon, 24 Oct 2022 16:31:39 +0200
Subject: [PATCH 053/415] Python: Add test of multi func def based on runtime
decision
---
.../CallGraph/InlineCallGraphTest.expected | 2 ++
.../CallGraph/code/runtime_decision.py | 18 ++++++++++++++++++
.../CallGraph/code/runtime_decision_defns.py | 8 ++++++++
3 files changed, 28 insertions(+)
create mode 100644 python/ql/test/experimental/library-tests/CallGraph/code/runtime_decision_defns.py
diff --git a/python/ql/test/experimental/library-tests/CallGraph/InlineCallGraphTest.expected b/python/ql/test/experimental/library-tests/CallGraph/InlineCallGraphTest.expected
index 31ea3f55ff2..9e79328c82b 100644
--- a/python/ql/test/experimental/library-tests/CallGraph/InlineCallGraphTest.expected
+++ b/python/ql/test/experimental/library-tests/CallGraph/InlineCallGraphTest.expected
@@ -15,6 +15,8 @@ pointsTo_found_typeTracker_notFound
| code/func_defined_outside_class.py:42:1:42:7 | ControlFlowNode for Attribute() | B._gen.func |
| code/func_defined_outside_class.py:43:1:43:7 | ControlFlowNode for Attribute() | B._gen.func |
| code/funky_regression.py:15:9:15:17 | ControlFlowNode for Attribute() | Wat.f2 |
+| code/runtime_decision.py:44:1:44:7 | ControlFlowNode for func4() | "code/runtime_decision_defns.py:4:func4" |
+| code/runtime_decision.py:44:1:44:7 | ControlFlowNode for func4() | "code/runtime_decision_defns.py:7:func4" |
| code/type_tracking_limitation.py:8:1:8:3 | ControlFlowNode for x() | my_func |
typeTracker_found_pointsTo_notFound
| code/callable_as_argument.py:29:5:29:12 | ControlFlowNode for Attribute() | test_class.InsideTestFunc.sm |
diff --git a/python/ql/test/experimental/library-tests/CallGraph/code/runtime_decision.py b/python/ql/test/experimental/library-tests/CallGraph/code/runtime_decision.py
index 3901a770188..d3800080589 100644
--- a/python/ql/test/experimental/library-tests/CallGraph/code/runtime_decision.py
+++ b/python/ql/test/experimental/library-tests/CallGraph/code/runtime_decision.py
@@ -24,3 +24,21 @@ else:
func2 = rd_bar
func2() # $ pt,tt=rd_foo pt,tt=rd_bar
+
+
+# ==============================================================================
+# definition is random
+
+if random.random() < 0.5:
+ def func3():
+ print("func3 A")
+else:
+ def func3():
+ print("func3 B")
+
+func3() # $ pt,tt=33:func3 pt,tt=36:func3
+
+
+# func4 uses same setup as func3, it's just defined in an other file
+from code.runtime_decision_defns import func4
+func4() # $ pt="code/runtime_decision_defns.py:4:func4" pt="code/runtime_decision_defns.py:7:func4" MISSING: tt
diff --git a/python/ql/test/experimental/library-tests/CallGraph/code/runtime_decision_defns.py b/python/ql/test/experimental/library-tests/CallGraph/code/runtime_decision_defns.py
new file mode 100644
index 00000000000..931d9246fa1
--- /dev/null
+++ b/python/ql/test/experimental/library-tests/CallGraph/code/runtime_decision_defns.py
@@ -0,0 +1,8 @@
+import random
+
+if random.random() < 0.5:
+ def func4():
+ print("func4 A")
+else:
+ def func4():
+ print("func4 B")
From e5fdeae6fcf290604bf39cf298b43cba3b61f589 Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Mon, 24 Oct 2022 16:42:02 +0200
Subject: [PATCH 054/415] Python: Add `return (func_ref, ...)` test
---
.../CallGraph/InlineCallGraphTest.expected | 1 +
.../CallGraph/code/tuple_function_return.py | 15 +++++++++++++++
2 files changed, 16 insertions(+)
create mode 100644 python/ql/test/experimental/library-tests/CallGraph/code/tuple_function_return.py
diff --git a/python/ql/test/experimental/library-tests/CallGraph/InlineCallGraphTest.expected b/python/ql/test/experimental/library-tests/CallGraph/InlineCallGraphTest.expected
index 9e79328c82b..72d792b6623 100644
--- a/python/ql/test/experimental/library-tests/CallGraph/InlineCallGraphTest.expected
+++ b/python/ql/test/experimental/library-tests/CallGraph/InlineCallGraphTest.expected
@@ -17,6 +17,7 @@ pointsTo_found_typeTracker_notFound
| code/funky_regression.py:15:9:15:17 | ControlFlowNode for Attribute() | Wat.f2 |
| code/runtime_decision.py:44:1:44:7 | ControlFlowNode for func4() | "code/runtime_decision_defns.py:4:func4" |
| code/runtime_decision.py:44:1:44:7 | ControlFlowNode for func4() | "code/runtime_decision_defns.py:7:func4" |
+| code/tuple_function_return.py:15:1:15:4 | ControlFlowNode for f2() | func |
| code/type_tracking_limitation.py:8:1:8:3 | ControlFlowNode for x() | my_func |
typeTracker_found_pointsTo_notFound
| code/callable_as_argument.py:29:5:29:12 | ControlFlowNode for Attribute() | test_class.InsideTestFunc.sm |
diff --git a/python/ql/test/experimental/library-tests/CallGraph/code/tuple_function_return.py b/python/ql/test/experimental/library-tests/CallGraph/code/tuple_function_return.py
new file mode 100644
index 00000000000..f87b1aa23e8
--- /dev/null
+++ b/python/ql/test/experimental/library-tests/CallGraph/code/tuple_function_return.py
@@ -0,0 +1,15 @@
+def func():
+ print("func()")
+
+def return_func():
+ return func
+
+def return_func_in_tuple():
+ return (func, 42)
+
+f1 = return_func() # $ pt,tt=return_func
+f1() # $ pt,tt=func
+
+
+f2, _ = return_func_in_tuple() # $ pt,tt=return_func_in_tuple
+f2() # $ pt=func MISSING: tt
From c4122275dc95b48568ae01304dae914bedec97ef Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Mon, 31 Oct 2022 18:11:07 +0100
Subject: [PATCH 055/415] Python: Bring back support for flow-summaries
Also needed to fix up `TestUtil/UnresolvedCalls.qll` after a bad merge
conflict resolution. Since all calls are now DataFlowCall, and not JUST
the ones that can be resolved, we need to put in the restriction that
the callable can also be resolved.
---
.../new/internal/DataFlowDispatch.qll | 92 ++++++++++++-------
.../dataflow/new/internal/DataFlowPrivate.qll | 16 +---
.../dataflow/new/internal/DataFlowPublic.qll | 14 ++-
.../new/internal/FlowSummaryImplSpecific.qll | 78 ++++++++++++----
.../dataflow/TestUtil/UnresolvedCalls.qll | 4 +-
.../dataflow/basic/callGraphSinks.expected | 1 +
.../dataflow/basic/callGraphSources.expected | 1 +
.../dataflow/basic/global.expected | 1 +
.../dataflow/basic/globalStep.expected | 1 +
.../dataflow/basic/local.expected | 5 +
.../dataflow/basic/localStep.expected | 1 +
.../dataflow/basic/sinks.expected | 4 +
.../dataflow/basic/sources.expected | 4 +
.../NormalTaintTrackingTest.expected | 12 ---
.../dataflow/summaries/summaries.expected | 76 ++++++++++++---
.../basic/LocalTaintStep.expected | 1 +
16 files changed, 218 insertions(+), 93 deletions(-)
diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
index 5f957553a76..b6524382a71 100644
--- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
@@ -36,12 +36,25 @@ private import python
private import DataFlowPublic
private import DataFlowPrivate
private import FlowSummaryImpl as FlowSummaryImpl
+private import FlowSummaryImplSpecific as FlowSummaryImplSpecific
newtype TParameterPosition =
/** Used for `self` in methods, and `cls` in classmethods. */
TSelfParameterPosition() or
- TPositionalParameterPosition(int pos) { pos = any(Parameter p).getPosition() } or
- TKeywordParameterPosition(string name) { name = any(Parameter p).getName() } or
+ TPositionalParameterPosition(int pos) {
+ pos = any(Parameter p).getPosition()
+ or
+ // since synthetic parameters are made for a synthetic summary callable, based on
+ // what Argument positions they have flow for, we need to make sure we have such
+ // parameter positions available.
+ FlowSummaryImplSpecific::ParsePositions::isParsedPositionalArgumentPosition(_, pos)
+ } or
+ TKeywordParameterPosition(string name) {
+ name = any(Parameter p).getName()
+ or
+ // see comment for TPositionalParameterPosition
+ FlowSummaryImplSpecific::ParsePositions::isParsedKeywordArgumentPosition(_, name)
+ } or
TStarArgsParameterPosition(int pos) {
// since `.getPosition` does not work for `*args`, we need *args parameter positions
// at index 1 larger than the largest positional parameter position (and 0 must be
@@ -114,8 +127,20 @@ class ParameterPosition extends TParameterPosition {
newtype TArgumentPosition =
/** Used for `self` in methods, and `cls` in classmethods. */
TSelfArgumentPosition() or
- TPositionalArgumentPosition(int pos) { exists(any(CallNode c).getArg(pos)) } or
- TKeywordArgumentPosition(string name) { exists(any(CallNode c).getArgByName(name)) } or
+ TPositionalArgumentPosition(int pos) {
+ exists(any(CallNode c).getArg(pos))
+ or
+ // since synthetic calls within a summarized callable could use a unique argument
+ // position, we need to ensure we make these available (these are specified as
+ // parameters in the flow-summary spec)
+ FlowSummaryImplSpecific::ParsePositions::isParsedPositionalParameterPosition(_, pos)
+ } or
+ TKeywordArgumentPosition(string name) {
+ exists(any(CallNode c).getArgByName(name))
+ or
+ // see comment for TPositionalArgumentPosition
+ FlowSummaryImplSpecific::ParsePositions::isParsedKeywordParameterPosition(_, name)
+ } or
TStarArgsArgumentPosition(int pos) { exists(Call c | c.getPositionalArg(pos) instanceof Starred) } or
TDictSplatArgumentPosition()
@@ -376,7 +401,7 @@ class LibraryCallableValue extends DataFlowCallable, TLibraryCallable {
LibraryCallableValue() { this = TLibraryCallable(callable) }
- override string toString() { result = callable.toString() }
+ override string toString() { result = "LibraryCallableValue: " + callable.toString() }
override string getQualifiedName() { result = callable.toString() }
@@ -1038,7 +1063,8 @@ predicate resolveCall(ControlFlowNode call, Function target, CallType type) {
* Holds if the argument of `call` at position `apos` is `arg`. This is just a helper
* predicate that maps ArgumentPositions to the arguments of the underlying `CallNode`.
*/
-private predicate normalCallArg(CallNode call, Node arg, ArgumentPosition apos) {
+cached
+predicate normalCallArg(CallNode call, Node arg, ArgumentPosition apos) {
exists(int index |
apos.isPositional(index) and
arg.asCfgNode() = call.getArg(index)
@@ -1170,8 +1196,8 @@ predicate getCallArg(
// DataFlowCall
// =============================================================================
newtype TDataFlowCall =
- TNormalCall(CallNode call, Function target, CallType type) { resolveCall(call, target, type) }
- or
+ TNormalCall(CallNode call, Function target, CallType type) { resolveCall(call, target, type) } or
+ TPotentialLibraryCall(CallNode call) or
/** A synthesized call inside a summarized callable */
TSummaryCall(FlowSummaryImpl::Public::SummarizedCallable c, Node receiver) {
FlowSummaryImpl::Private::summaryCallbackRange(c, receiver)
@@ -1253,49 +1279,44 @@ class NormalCall extends ExtractedDataFlowCall, TNormalCall {
}
/**
- * A call to a summarized callable, a `LibraryCallable`.
+ * A potential call to a summarized callable, a `LibraryCallable`.
*
* We currently exclude all resolved calls. This means that a call to, say, `map`, which
* is a `ClassCall`, cannot currently be given a summary.
* We hope to lift this restriction in the future and include all potential calls to summaries
* in this class.
*/
-class LibraryCall extends DataFlowCall {
- LibraryCall() {
- // TODO(call-graph): implement this!
- none()
- }
+class PotentialLibraryCall extends ExtractedDataFlowCall, TPotentialLibraryCall {
+ CallNode call;
+
+ PotentialLibraryCall() { this = TPotentialLibraryCall(call) }
override string toString() {
- // TODO(call-graph): implement this!
- none()
+ // note: if we used toString directly on the CallNode we would get
+ // `ControlFlowNode for func()`
+ // but the `ControlFlowNode` part is just clutter, so we go directly to the AST node
+ // instead.
+ result = call.getNode().toString()
}
- // We cannot refer to a `LibraryCallable` here,
+ // We cannot refer to a `PotentialLibraryCall` here,
// as that could in turn refer to type tracking.
- // This call will be tied to a `LibraryCallable` via
- // `getViableCallabe` when the global data flow is assembled.
+ // This call will be tied to a `PotentialLibraryCall` via
+ // `viableCallable` when the global data flow is assembled.
override DataFlowCallable getCallable() { none() }
override ArgumentNode getArgument(ArgumentPosition apos) {
- // TODO(call-graph): implement this!
- none()
+ normalCallArg(call, result, apos)
+ or
+ // potential self argument, from `foo.bar()` -- note that this could also just be a
+ // module reference, but we really don't have a good way of knowing :|
+ apos.isSelf() and
+ result = any(MethodCallNode mc | mc.getFunction().asCfgNode() = call.getFunction()).getObject()
}
- override ControlFlowNode getNode() {
- // TODO(call-graph): implement this!
- none()
- }
+ override ControlFlowNode getNode() { result = call }
- override DataFlowCallable getEnclosingCallable() {
- // TODO(call-graph): implement this!
- none()
- }
-
- override Location getLocation() {
- // TODO(call-graph): implement this!
- none()
- }
+ override DataFlowCallable getEnclosingCallable() { result.getScope() = call.getScope() }
}
/**
@@ -1433,7 +1454,8 @@ DataFlowCallable viableCallable(ExtractedDataFlowCall call) {
// Instead we resolve the call from the summary.
exists(LibraryCallable callable |
result = TLibraryCallable(callable) and
- call.getNode() = callable.getACall().getNode()
+ call.getNode() = callable.getACall().getNode() and
+ call instanceof PotentialLibraryCall
)
}
diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll
index 8d4f45bcdeb..ed611c93549 100644
--- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll
@@ -981,18 +981,10 @@ class LambdaCallKind = Unit;
/** Holds if `creation` is an expression that creates a lambda of kind `kind` for `c`. */
predicate lambdaCreation(Node creation, LambdaCallKind kind, DataFlowCallable c) {
- // TODO(call-graph): implement this!
- //
- // // lambda
- // kind = kind and
- // creation.asExpr() = c.(DataFlowLambda).getDefinition()
- // or
- // // normal function
- // exists(FunctionDef def |
- // def.defines(creation.asVar().getSourceVariable()) and
- // def.getDefinedFunction() = c.(DataFlowCallableValue).getCallableValue().getScope()
- // )
- // or
+ // lambda and plain functions
+ kind = kind and
+ creation.asExpr() = c.(DataFlowPlainFunction).getScope().getDefinition()
+ or
// summarized function
exists(kind) and // avoid warning on unused 'kind'
exists(Call call |
diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll
index 79b711db9e8..b03bb3de0a0 100644
--- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll
@@ -337,11 +337,19 @@ abstract class ArgumentNode extends Node {
}
/**
- * A data flow node that represents a call argument found in the source code,
- * where the call can be resolved.
+ * A data flow node that represents a call argument found in the source code.
*/
class ExtractedArgumentNode extends ArgumentNode {
- ExtractedArgumentNode() { getCallArg(_, _, _, this, _) }
+ ExtractedArgumentNode() {
+ // for resolved calls, we need to allow all argument nodes
+ getCallArg(_, _, _, this, _)
+ or
+ // for potential summaries we allow all normal call arguments
+ normalCallArg(_, this, _)
+ or
+ // and self arguments
+ this = any(MethodCallNode mc).getObject()
+ }
final override predicate argumentOf(DataFlowCall call, ArgumentPosition pos) {
this = call.getArgument(pos) and
diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/FlowSummaryImplSpecific.qll b/python/ql/lib/semmle/python/dataflow/new/internal/FlowSummaryImplSpecific.qll
index 5d950247369..90411e658b2 100644
--- a/python/ql/lib/semmle/python/dataflow/new/internal/FlowSummaryImplSpecific.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/internal/FlowSummaryImplSpecific.qll
@@ -115,10 +115,34 @@ string getComponentSpecificCsv(SummaryComponent sc) {
}
/** Gets the textual representation of a parameter position in the format used for flow summaries. */
-string getParameterPositionCsv(ParameterPosition pos) { result = pos.toString() }
+string getParameterPositionCsv(ParameterPosition pos) {
+ pos.isSelf() and result = "self"
+ or
+ exists(int i |
+ pos.isPositional(i) and
+ result = i.toString()
+ )
+ or
+ exists(string name |
+ pos.isKeyword(name) and
+ result = name + ":"
+ )
+}
/** Gets the textual representation of an argument position in the format used for flow summaries. */
-string getArgumentPositionCsv(ArgumentPosition pos) { result = pos.toString() }
+string getArgumentPositionCsv(ArgumentPosition pos) {
+ pos.isSelf() and result = "self"
+ or
+ exists(int i |
+ pos.isPositional(i) and
+ result = i.toString()
+ )
+ or
+ exists(string name |
+ pos.isKeyword(name) and
+ result = name + ":"
+ )
+}
/** Holds if input specification component `c` needs a reference. */
predicate inputNeedsReferenceSpecific(string c) { none() }
@@ -200,33 +224,55 @@ module ParsePositions {
)
}
- predicate isParsedParameterPosition(string c, int i) {
+ predicate isParsedPositionalParameterPosition(string c, int i) {
isParamBody(c) and
i = AccessPath::parseInt(c)
}
- predicate isParsedArgumentPosition(string c, int i) {
+ predicate isParsedKeywordParameterPosition(string c, string paramName) {
+ isParamBody(c) and
+ c = paramName + ":"
+ }
+
+ predicate isParsedPositionalArgumentPosition(string c, int i) {
isArgBody(c) and
i = AccessPath::parseInt(c)
}
+
+ predicate isParsedKeywordArgumentPosition(string c, string argName) {
+ isArgBody(c) and
+ c = argName + ":"
+ }
}
/** Gets the argument position obtained by parsing `X` in `Parameter[X]`. */
ArgumentPosition parseParamBody(string s) {
- none()
- // TODO(call-graph): implement this!
- // exists(int i |
- // ParsePositions::isParsedParameterPosition(s, i) and
- // result.isPositional(i)
- // )
+ exists(int i |
+ ParsePositions::isParsedPositionalParameterPosition(s, i) and
+ result.isPositional(i)
+ )
+ or
+ exists(string name |
+ ParsePositions::isParsedKeywordParameterPosition(s, name) and
+ result.isKeyword(name)
+ )
+ or
+ s = "self" and
+ result.isSelf()
}
/** Gets the parameter position obtained by parsing `X` in `Argument[X]`. */
ParameterPosition parseArgBody(string s) {
- none()
- // TODO(call-graph): implement this!
- // exists(int i |
- // ParsePositions::isParsedArgumentPosition(s, i) and
- // result.isPositional(i)
- // )
+ exists(int i |
+ ParsePositions::isParsedPositionalArgumentPosition(s, i) and
+ result.isPositional(i)
+ )
+ or
+ exists(string name |
+ ParsePositions::isParsedKeywordArgumentPosition(s, name) and
+ result.isKeyword(name)
+ )
+ or
+ s = "self" and
+ result.isSelf()
}
diff --git a/python/ql/test/experimental/dataflow/TestUtil/UnresolvedCalls.qll b/python/ql/test/experimental/dataflow/TestUtil/UnresolvedCalls.qll
index fbdcca3ef04..b84f8e6f165 100644
--- a/python/ql/test/experimental/dataflow/TestUtil/UnresolvedCalls.qll
+++ b/python/ql/test/experimental/dataflow/TestUtil/UnresolvedCalls.qll
@@ -12,7 +12,9 @@ class UnresolvedCallExpectations extends InlineExpectationsTest {
override predicate hasActualResult(Location location, string element, string tag, string value) {
exists(location.getFile().getRelativePath()) and
exists(CallNode call |
- not exists(DataFlowPrivate::DataFlowCall dfc | dfc.getNode() = call) and
+ not exists(DataFlowPrivate::DataFlowCall dfc |
+ exists(dfc.getCallable()) and dfc.getNode() = call
+ ) and
not DataFlowPrivate::resolveClassCall(call, _) and
not call = API::builtin(_).getACall().asCfgNode() and
location = call.getLocation() and
diff --git a/python/ql/test/experimental/dataflow/basic/callGraphSinks.expected b/python/ql/test/experimental/dataflow/basic/callGraphSinks.expected
index e4b8f905530..0f87376ef1a 100644
--- a/python/ql/test/experimental/dataflow/basic/callGraphSinks.expected
+++ b/python/ql/test/experimental/dataflow/basic/callGraphSinks.expected
@@ -1,3 +1,4 @@
+| file://:0:0:0:0 | parameter position 0 of builtins.reversed |
| test.py:1:1:1:21 | SynthDictSplatParameterNode |
| test.py:1:19:1:19 | ControlFlowNode for x |
| test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() |
diff --git a/python/ql/test/experimental/dataflow/basic/callGraphSources.expected b/python/ql/test/experimental/dataflow/basic/callGraphSources.expected
index 4023ba8f3ea..0b4613c42de 100644
--- a/python/ql/test/experimental/dataflow/basic/callGraphSources.expected
+++ b/python/ql/test/experimental/dataflow/basic/callGraphSources.expected
@@ -1,2 +1,3 @@
+| file://:0:0:0:0 | [summary] to write: return (return) in builtins.reversed |
| test.py:4:10:4:10 | ControlFlowNode for z |
| test.py:7:19:7:19 | ControlFlowNode for a |
diff --git a/python/ql/test/experimental/dataflow/basic/global.expected b/python/ql/test/experimental/dataflow/basic/global.expected
index 8894bcc190a..800312b07be 100644
--- a/python/ql/test/experimental/dataflow/basic/global.expected
+++ b/python/ql/test/experimental/dataflow/basic/global.expected
@@ -1,3 +1,4 @@
+| file://:0:0:0:0 | [summary] read: argument position 0.List element in builtins.reversed | file://:0:0:0:0 | [summary] to write: return (return).List element in builtins.reversed |
| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:1:5:1:17 | GSSA Variable obfuscated_id |
| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:7:5:7:17 | ControlFlowNode for obfuscated_id |
| test.py:1:5:1:17 | GSSA Variable obfuscated_id | test.py:7:5:7:17 | ControlFlowNode for obfuscated_id |
diff --git a/python/ql/test/experimental/dataflow/basic/globalStep.expected b/python/ql/test/experimental/dataflow/basic/globalStep.expected
index 9f228998b9c..fa5b20486c2 100644
--- a/python/ql/test/experimental/dataflow/basic/globalStep.expected
+++ b/python/ql/test/experimental/dataflow/basic/globalStep.expected
@@ -1,3 +1,4 @@
+| file://:0:0:0:0 | [summary] read: argument position 0.List element in builtins.reversed | file://:0:0:0:0 | [summary] to write: return (return).List element in builtins.reversed |
| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:1:5:1:17 | GSSA Variable obfuscated_id |
| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:1:5:1:17 | GSSA Variable obfuscated_id |
| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:7:5:7:17 | ControlFlowNode for obfuscated_id |
diff --git a/python/ql/test/experimental/dataflow/basic/local.expected b/python/ql/test/experimental/dataflow/basic/local.expected
index cdf40018ed0..2354efea8e5 100644
--- a/python/ql/test/experimental/dataflow/basic/local.expected
+++ b/python/ql/test/experimental/dataflow/basic/local.expected
@@ -1,3 +1,8 @@
+| file://:0:0:0:0 | [summary] read: argument position 0.List element in builtins.reversed | file://:0:0:0:0 | [summary] read: argument position 0.List element in builtins.reversed |
+| file://:0:0:0:0 | [summary] read: argument position 0.List element in builtins.reversed | file://:0:0:0:0 | [summary] to write: return (return).List element in builtins.reversed |
+| file://:0:0:0:0 | [summary] to write: return (return) in builtins.reversed | file://:0:0:0:0 | [summary] to write: return (return) in builtins.reversed |
+| file://:0:0:0:0 | [summary] to write: return (return).List element in builtins.reversed | file://:0:0:0:0 | [summary] to write: return (return).List element in builtins.reversed |
+| file://:0:0:0:0 | parameter position 0 of builtins.reversed | file://:0:0:0:0 | parameter position 0 of builtins.reversed |
| test.py:0:0:0:0 | GSSA Variable __name__ | test.py:0:0:0:0 | GSSA Variable __name__ |
| test.py:0:0:0:0 | GSSA Variable __package__ | test.py:0:0:0:0 | GSSA Variable __package__ |
| test.py:0:0:0:0 | GSSA Variable b | test.py:0:0:0:0 | GSSA Variable b |
diff --git a/python/ql/test/experimental/dataflow/basic/localStep.expected b/python/ql/test/experimental/dataflow/basic/localStep.expected
index e147bb9f4fc..534c31da1a6 100644
--- a/python/ql/test/experimental/dataflow/basic/localStep.expected
+++ b/python/ql/test/experimental/dataflow/basic/localStep.expected
@@ -1,3 +1,4 @@
+| file://:0:0:0:0 | [summary] read: argument position 0.List element in builtins.reversed | file://:0:0:0:0 | [summary] to write: return (return).List element in builtins.reversed |
| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:1:5:1:17 | GSSA Variable obfuscated_id |
| test.py:1:5:1:17 | GSSA Variable obfuscated_id | test.py:7:5:7:17 | ControlFlowNode for obfuscated_id |
| test.py:1:19:1:19 | ControlFlowNode for x | test.py:1:19:1:19 | SSA variable x |
diff --git a/python/ql/test/experimental/dataflow/basic/sinks.expected b/python/ql/test/experimental/dataflow/basic/sinks.expected
index 944f8190aa5..aafff76bbe2 100644
--- a/python/ql/test/experimental/dataflow/basic/sinks.expected
+++ b/python/ql/test/experimental/dataflow/basic/sinks.expected
@@ -1,3 +1,7 @@
+| file://:0:0:0:0 | [summary] read: argument position 0.List element in builtins.reversed |
+| file://:0:0:0:0 | [summary] to write: return (return) in builtins.reversed |
+| file://:0:0:0:0 | [summary] to write: return (return).List element in builtins.reversed |
+| file://:0:0:0:0 | parameter position 0 of builtins.reversed |
| test.py:0:0:0:0 | GSSA Variable __name__ |
| test.py:0:0:0:0 | GSSA Variable __package__ |
| test.py:0:0:0:0 | GSSA Variable b |
diff --git a/python/ql/test/experimental/dataflow/basic/sources.expected b/python/ql/test/experimental/dataflow/basic/sources.expected
index 944f8190aa5..aafff76bbe2 100644
--- a/python/ql/test/experimental/dataflow/basic/sources.expected
+++ b/python/ql/test/experimental/dataflow/basic/sources.expected
@@ -1,3 +1,7 @@
+| file://:0:0:0:0 | [summary] read: argument position 0.List element in builtins.reversed |
+| file://:0:0:0:0 | [summary] to write: return (return) in builtins.reversed |
+| file://:0:0:0:0 | [summary] to write: return (return).List element in builtins.reversed |
+| file://:0:0:0:0 | parameter position 0 of builtins.reversed |
| test.py:0:0:0:0 | GSSA Variable __name__ |
| test.py:0:0:0:0 | GSSA Variable __package__ |
| test.py:0:0:0:0 | GSSA Variable b |
diff --git a/python/ql/test/experimental/dataflow/summaries/NormalTaintTrackingTest.expected b/python/ql/test/experimental/dataflow/summaries/NormalTaintTrackingTest.expected
index 8e04ba142cb..3875da4e143 100644
--- a/python/ql/test/experimental/dataflow/summaries/NormalTaintTrackingTest.expected
+++ b/python/ql/test/experimental/dataflow/summaries/NormalTaintTrackingTest.expected
@@ -1,14 +1,2 @@
missingAnnotationOnSink
-| summaries.py:33:6:33:12 | summaries.py:33 | ERROR, you should add `# $ MISSING: flow` annotation | tainted |
-| summaries.py:37:6:37:19 | summaries.py:37 | ERROR, you should add `# $ MISSING: flow` annotation | tainted_lambda |
-| summaries.py:52:6:52:22 | summaries.py:52 | ERROR, you should add `# $ MISSING: flow` annotation | tainted_mapped[0] |
-| summaries.py:58:6:58:31 | summaries.py:58 | ERROR, you should add `# $ MISSING: flow` annotation | tainted_mapped_explicit[0] |
-| summaries.py:61:6:61:30 | summaries.py:61 | ERROR, you should add `# $ MISSING: flow` annotation | tainted_mapped_summary[0] |
-| summaries.py:64:6:64:20 | summaries.py:64 | ERROR, you should add `# $ MISSING: flow` annotation | tainted_list[0] |
failures
-| summaries.py:33:16:33:49 | Comment # $ flow="SOURCE, l:-1 -> tainted" | Missing result:flow="SOURCE, l:-1 -> tainted" |
-| summaries.py:37:23:37:63 | Comment # $ flow="SOURCE, l:-1 -> tainted_lambda" | Missing result:flow="SOURCE, l:-1 -> tainted_lambda" |
-| summaries.py:52:26:52:69 | Comment # $ flow="SOURCE, l:-1 -> tainted_mapped[0]" | Missing result:flow="SOURCE, l:-1 -> tainted_mapped[0]" |
-| summaries.py:58:35:58:87 | Comment # $ flow="SOURCE, l:-1 -> tainted_mapped_explicit[0]" | Missing result:flow="SOURCE, l:-1 -> tainted_mapped_explicit[0]" |
-| summaries.py:61:34:61:85 | Comment # $ flow="SOURCE, l:-1 -> tainted_mapped_summary[0]" | Missing result:flow="SOURCE, l:-1 -> tainted_mapped_summary[0]" |
-| summaries.py:64:24:64:65 | Comment # $ flow="SOURCE, l:-1 -> tainted_list[0]" | Missing result:flow="SOURCE, l:-1 -> tainted_list[0]" |
diff --git a/python/ql/test/experimental/dataflow/summaries/summaries.expected b/python/ql/test/experimental/dataflow/summaries/summaries.expected
index 8f5366ed6c2..b566cbdedc6 100644
--- a/python/ql/test/experimental/dataflow/summaries/summaries.expected
+++ b/python/ql/test/experimental/dataflow/summaries/summaries.expected
@@ -1,29 +1,77 @@
edges
+| summaries.py:32:11:32:26 | ControlFlowNode for identity() | summaries.py:33:6:33:12 | ControlFlowNode for tainted |
+| summaries.py:32:20:32:25 | ControlFlowNode for SOURCE | summaries.py:32:11:32:26 | ControlFlowNode for identity() |
+| summaries.py:36:18:36:54 | ControlFlowNode for apply_lambda() | summaries.py:37:6:37:19 | ControlFlowNode for tainted_lambda |
+| summaries.py:36:48:36:53 | ControlFlowNode for SOURCE | summaries.py:36:18:36:54 | ControlFlowNode for apply_lambda() |
+| summaries.py:44:16:44:33 | ControlFlowNode for reversed() [List element] | summaries.py:45:6:45:17 | ControlFlowNode for tainted_list [List element] |
| summaries.py:44:25:44:32 | ControlFlowNode for List | summaries.py:45:6:45:20 | ControlFlowNode for Subscript |
+| summaries.py:44:25:44:32 | ControlFlowNode for List [List element] | summaries.py:44:16:44:33 | ControlFlowNode for reversed() [List element] |
| summaries.py:44:26:44:31 | ControlFlowNode for SOURCE | summaries.py:44:25:44:32 | ControlFlowNode for List |
+| summaries.py:44:26:44:31 | ControlFlowNode for SOURCE | summaries.py:44:25:44:32 | ControlFlowNode for List [List element] |
+| summaries.py:45:6:45:17 | ControlFlowNode for tainted_list [List element] | summaries.py:45:6:45:20 | ControlFlowNode for Subscript |
+| summaries.py:51:18:51:46 | ControlFlowNode for list_map() [List element] | summaries.py:52:6:52:19 | ControlFlowNode for tainted_mapped [List element] |
+| summaries.py:51:38:51:45 | ControlFlowNode for List [List element] | summaries.py:51:18:51:46 | ControlFlowNode for list_map() [List element] |
+| summaries.py:51:39:51:44 | ControlFlowNode for SOURCE | summaries.py:51:38:51:45 | ControlFlowNode for List [List element] |
+| summaries.py:52:6:52:19 | ControlFlowNode for tainted_mapped [List element] | summaries.py:52:6:52:22 | ControlFlowNode for Subscript |
+| summaries.py:57:27:57:63 | ControlFlowNode for list_map() [List element] | summaries.py:58:6:58:28 | ControlFlowNode for tainted_mapped_explicit [List element] |
+| summaries.py:57:55:57:62 | ControlFlowNode for List [List element] | summaries.py:57:27:57:63 | ControlFlowNode for list_map() [List element] |
+| summaries.py:57:56:57:61 | ControlFlowNode for SOURCE | summaries.py:57:55:57:62 | ControlFlowNode for List [List element] |
+| summaries.py:58:6:58:28 | ControlFlowNode for tainted_mapped_explicit [List element] | summaries.py:58:6:58:31 | ControlFlowNode for Subscript |
+| summaries.py:60:26:60:53 | ControlFlowNode for list_map() [List element] | summaries.py:61:6:61:27 | ControlFlowNode for tainted_mapped_summary [List element] |
+| summaries.py:60:45:60:52 | ControlFlowNode for List [List element] | summaries.py:60:26:60:53 | ControlFlowNode for list_map() [List element] |
+| summaries.py:60:46:60:51 | ControlFlowNode for SOURCE | summaries.py:60:45:60:52 | ControlFlowNode for List [List element] |
+| summaries.py:61:6:61:27 | ControlFlowNode for tainted_mapped_summary [List element] | summaries.py:61:6:61:30 | ControlFlowNode for Subscript |
+| summaries.py:63:16:63:41 | ControlFlowNode for append_to_list() [List element] | summaries.py:64:6:64:17 | ControlFlowNode for tainted_list [List element] |
+| summaries.py:63:35:63:40 | ControlFlowNode for SOURCE | summaries.py:63:16:63:41 | ControlFlowNode for append_to_list() [List element] |
+| summaries.py:64:6:64:17 | ControlFlowNode for tainted_list [List element] | summaries.py:64:6:64:20 | ControlFlowNode for Subscript |
+| summaries.py:67:22:67:39 | ControlFlowNode for json_loads() [List element] | summaries.py:68:6:68:23 | ControlFlowNode for tainted_resultlist [List element] |
+| summaries.py:67:33:67:38 | ControlFlowNode for SOURCE | summaries.py:67:22:67:39 | ControlFlowNode for json_loads() [List element] |
| summaries.py:67:33:67:38 | ControlFlowNode for SOURCE | summaries.py:68:6:68:26 | ControlFlowNode for Subscript |
+| summaries.py:68:6:68:23 | ControlFlowNode for tainted_resultlist [List element] | summaries.py:68:6:68:26 | ControlFlowNode for Subscript |
nodes
+| summaries.py:32:11:32:26 | ControlFlowNode for identity() | semmle.label | ControlFlowNode for identity() |
+| summaries.py:32:20:32:25 | ControlFlowNode for SOURCE | semmle.label | ControlFlowNode for SOURCE |
+| summaries.py:33:6:33:12 | ControlFlowNode for tainted | semmle.label | ControlFlowNode for tainted |
+| summaries.py:36:18:36:54 | ControlFlowNode for apply_lambda() | semmle.label | ControlFlowNode for apply_lambda() |
+| summaries.py:36:48:36:53 | ControlFlowNode for SOURCE | semmle.label | ControlFlowNode for SOURCE |
+| summaries.py:37:6:37:19 | ControlFlowNode for tainted_lambda | semmle.label | ControlFlowNode for tainted_lambda |
+| summaries.py:44:16:44:33 | ControlFlowNode for reversed() [List element] | semmle.label | ControlFlowNode for reversed() [List element] |
| summaries.py:44:25:44:32 | ControlFlowNode for List | semmle.label | ControlFlowNode for List |
+| summaries.py:44:25:44:32 | ControlFlowNode for List [List element] | semmle.label | ControlFlowNode for List [List element] |
| summaries.py:44:26:44:31 | ControlFlowNode for SOURCE | semmle.label | ControlFlowNode for SOURCE |
+| summaries.py:45:6:45:17 | ControlFlowNode for tainted_list [List element] | semmle.label | ControlFlowNode for tainted_list [List element] |
| summaries.py:45:6:45:20 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
+| summaries.py:51:18:51:46 | ControlFlowNode for list_map() [List element] | semmle.label | ControlFlowNode for list_map() [List element] |
+| summaries.py:51:38:51:45 | ControlFlowNode for List [List element] | semmle.label | ControlFlowNode for List [List element] |
+| summaries.py:51:39:51:44 | ControlFlowNode for SOURCE | semmle.label | ControlFlowNode for SOURCE |
+| summaries.py:52:6:52:19 | ControlFlowNode for tainted_mapped [List element] | semmle.label | ControlFlowNode for tainted_mapped [List element] |
+| summaries.py:52:6:52:22 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
+| summaries.py:57:27:57:63 | ControlFlowNode for list_map() [List element] | semmle.label | ControlFlowNode for list_map() [List element] |
+| summaries.py:57:55:57:62 | ControlFlowNode for List [List element] | semmle.label | ControlFlowNode for List [List element] |
+| summaries.py:57:56:57:61 | ControlFlowNode for SOURCE | semmle.label | ControlFlowNode for SOURCE |
+| summaries.py:58:6:58:28 | ControlFlowNode for tainted_mapped_explicit [List element] | semmle.label | ControlFlowNode for tainted_mapped_explicit [List element] |
+| summaries.py:58:6:58:31 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
+| summaries.py:60:26:60:53 | ControlFlowNode for list_map() [List element] | semmle.label | ControlFlowNode for list_map() [List element] |
+| summaries.py:60:45:60:52 | ControlFlowNode for List [List element] | semmle.label | ControlFlowNode for List [List element] |
+| summaries.py:60:46:60:51 | ControlFlowNode for SOURCE | semmle.label | ControlFlowNode for SOURCE |
+| summaries.py:61:6:61:27 | ControlFlowNode for tainted_mapped_summary [List element] | semmle.label | ControlFlowNode for tainted_mapped_summary [List element] |
+| summaries.py:61:6:61:30 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
+| summaries.py:63:16:63:41 | ControlFlowNode for append_to_list() [List element] | semmle.label | ControlFlowNode for append_to_list() [List element] |
+| summaries.py:63:35:63:40 | ControlFlowNode for SOURCE | semmle.label | ControlFlowNode for SOURCE |
+| summaries.py:64:6:64:17 | ControlFlowNode for tainted_list [List element] | semmle.label | ControlFlowNode for tainted_list [List element] |
+| summaries.py:64:6:64:20 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
+| summaries.py:67:22:67:39 | ControlFlowNode for json_loads() [List element] | semmle.label | ControlFlowNode for json_loads() [List element] |
| summaries.py:67:33:67:38 | ControlFlowNode for SOURCE | semmle.label | ControlFlowNode for SOURCE |
+| summaries.py:68:6:68:23 | ControlFlowNode for tainted_resultlist [List element] | semmle.label | ControlFlowNode for tainted_resultlist [List element] |
| summaries.py:68:6:68:26 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
subpaths
invalidSpecComponent
-| append_to_list | Argument[0] | Argument[0] |
-| append_to_list | Argument[1] | Argument[1] |
-| apply_lambda | Argument[0].Parameter[0] | Argument[0] |
-| apply_lambda | Argument[0].Parameter[0] | Parameter[0] |
-| apply_lambda | Argument[0].ReturnValue | Argument[0] |
-| apply_lambda | Argument[1] | Argument[1] |
-| builtins.reversed | Argument[0].ListElement | Argument[0] |
-| identity | Argument[0] | Argument[0] |
-| json.loads | Argument[0] | Argument[0] |
-| list_map | Argument[0].Parameter[0] | Argument[0] |
-| list_map | Argument[0].Parameter[0] | Parameter[0] |
-| list_map | Argument[0].ReturnValue | Argument[0] |
-| list_map | Argument[1].ListElement | Argument[1] |
-| reversed | Argument[0].ListElement | Argument[0] |
#select
+| summaries.py:33:6:33:12 | ControlFlowNode for tainted | summaries.py:32:20:32:25 | ControlFlowNode for SOURCE | summaries.py:33:6:33:12 | ControlFlowNode for tainted | $@ | summaries.py:32:20:32:25 | ControlFlowNode for SOURCE | ControlFlowNode for SOURCE |
+| summaries.py:37:6:37:19 | ControlFlowNode for tainted_lambda | summaries.py:36:48:36:53 | ControlFlowNode for SOURCE | summaries.py:37:6:37:19 | ControlFlowNode for tainted_lambda | $@ | summaries.py:36:48:36:53 | ControlFlowNode for SOURCE | ControlFlowNode for SOURCE |
| summaries.py:45:6:45:20 | ControlFlowNode for Subscript | summaries.py:44:26:44:31 | ControlFlowNode for SOURCE | summaries.py:45:6:45:20 | ControlFlowNode for Subscript | $@ | summaries.py:44:26:44:31 | ControlFlowNode for SOURCE | ControlFlowNode for SOURCE |
+| summaries.py:52:6:52:22 | ControlFlowNode for Subscript | summaries.py:51:39:51:44 | ControlFlowNode for SOURCE | summaries.py:52:6:52:22 | ControlFlowNode for Subscript | $@ | summaries.py:51:39:51:44 | ControlFlowNode for SOURCE | ControlFlowNode for SOURCE |
+| summaries.py:58:6:58:31 | ControlFlowNode for Subscript | summaries.py:57:56:57:61 | ControlFlowNode for SOURCE | summaries.py:58:6:58:31 | ControlFlowNode for Subscript | $@ | summaries.py:57:56:57:61 | ControlFlowNode for SOURCE | ControlFlowNode for SOURCE |
+| summaries.py:61:6:61:30 | ControlFlowNode for Subscript | summaries.py:60:46:60:51 | ControlFlowNode for SOURCE | summaries.py:61:6:61:30 | ControlFlowNode for Subscript | $@ | summaries.py:60:46:60:51 | ControlFlowNode for SOURCE | ControlFlowNode for SOURCE |
+| summaries.py:64:6:64:20 | ControlFlowNode for Subscript | summaries.py:63:35:63:40 | ControlFlowNode for SOURCE | summaries.py:64:6:64:20 | ControlFlowNode for Subscript | $@ | summaries.py:63:35:63:40 | ControlFlowNode for SOURCE | ControlFlowNode for SOURCE |
| summaries.py:68:6:68:26 | ControlFlowNode for Subscript | summaries.py:67:33:67:38 | ControlFlowNode for SOURCE | summaries.py:68:6:68:26 | ControlFlowNode for Subscript | $@ | summaries.py:67:33:67:38 | ControlFlowNode for SOURCE | ControlFlowNode for SOURCE |
diff --git a/python/ql/test/experimental/dataflow/tainttracking/basic/LocalTaintStep.expected b/python/ql/test/experimental/dataflow/tainttracking/basic/LocalTaintStep.expected
index 3b3f18c5b9e..05b64297f71 100644
--- a/python/ql/test/experimental/dataflow/tainttracking/basic/LocalTaintStep.expected
+++ b/python/ql/test/experimental/dataflow/tainttracking/basic/LocalTaintStep.expected
@@ -1,3 +1,4 @@
+| file://:0:0:0:0 | [summary] read: argument position 0.List element in builtins.reversed | file://:0:0:0:0 | [summary] to write: return (return).List element in builtins.reversed |
| test.py:3:1:3:7 | GSSA Variable tainted | test.py:4:6:4:12 | ControlFlowNode for tainted |
| test.py:3:11:3:16 | ControlFlowNode for SOURCE | test.py:3:1:3:7 | GSSA Variable tainted |
| test.py:6:1:6:11 | ControlFlowNode for FunctionExpr | test.py:6:5:6:8 | GSSA Variable func |
From 478f5ffe965db5f062af74ebd9ce21bc7fc623c6 Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Tue, 1 Nov 2022 10:03:23 +0100
Subject: [PATCH 056/415] Python: Limit `self` argument for
`PotentialLibraryCall`
Using the object from `MethodCallNode` meant that in the code below,
`lib` from the import expression would be considered a self argument
(this showed up in dataflow-consistency query results, that were not
comitted... sorry)
```
from lib import func
func()
```
---
.../semmle/python/dataflow/new/internal/DataFlowDispatch.qll | 2 +-
.../lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
index b6524382a71..55c4b0ae240 100644
--- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
@@ -1311,7 +1311,7 @@ class PotentialLibraryCall extends ExtractedDataFlowCall, TPotentialLibraryCall
// potential self argument, from `foo.bar()` -- note that this could also just be a
// module reference, but we really don't have a good way of knowing :|
apos.isSelf() and
- result = any(MethodCallNode mc | mc.getFunction().asCfgNode() = call.getFunction()).getObject()
+ result.asCfgNode() = call.getFunction().(AttrNode).getObject()
}
override ControlFlowNode getNode() { result = call }
diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll
index b03bb3de0a0..d46712738cf 100644
--- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll
@@ -348,7 +348,7 @@ class ExtractedArgumentNode extends ArgumentNode {
normalCallArg(_, this, _)
or
// and self arguments
- this = any(MethodCallNode mc).getObject()
+ this.asCfgNode() = any(CallNode c).getFunction().(AttrNode).getObject()
}
final override predicate argumentOf(DataFlowCall call, ArgumentPosition pos) {
From df4d09b3f9ed863c27acf24cc347fe34bbb3ce0a Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Tue, 1 Nov 2022 16:04:25 +0100
Subject: [PATCH 057/415] Python: Don't rely on all `DataFlowCall` being
resolved
I've been living dangerously with that assumption :|
---
python/ql/src/Security/CWE-020-ExternalAPIs/ExternalAPIs.qll | 1 +
python/ql/test/experimental/dataflow/calls/DataFlowCallTest.ql | 3 ++-
2 files changed, 3 insertions(+), 1 deletion(-)
diff --git a/python/ql/src/Security/CWE-020-ExternalAPIs/ExternalAPIs.qll b/python/ql/src/Security/CWE-020-ExternalAPIs/ExternalAPIs.qll
index 76aa68c5162..dbc92f9d151 100644
--- a/python/ql/src/Security/CWE-020-ExternalAPIs/ExternalAPIs.qll
+++ b/python/ql/src/Security/CWE-020-ExternalAPIs/ExternalAPIs.qll
@@ -87,6 +87,7 @@ newtype TInterestingExternalApiCall =
} or
TResolvedCall(DataFlowPrivate::DataFlowCall call) {
exists(call.getLocation().getFile().getRelativePath()) and
+ exists(call.getCallable()) and
not call.getCallable() = any(SafeExternalApi safe).getSafeCallable() and
// ignore calls inside codebase, and ignore calls that are marked as safe. This is
// only needed as long as we extract dependencies. When we stop doing that, all
diff --git a/python/ql/test/experimental/dataflow/calls/DataFlowCallTest.ql b/python/ql/test/experimental/dataflow/calls/DataFlowCallTest.ql
index b71e92db337..d939b3092e4 100644
--- a/python/ql/test/experimental/dataflow/calls/DataFlowCallTest.ql
+++ b/python/ql/test/experimental/dataflow/calls/DataFlowCallTest.ql
@@ -17,7 +17,8 @@ class DataFlowCallTest extends InlineExpectationsTest {
exists(location.getFile().getRelativePath()) and
exists(DataFlowDispatch::DataFlowCall call |
location = call.getLocation() and
- element = call.toString()
+ element = call.toString() and
+ exists(call.getCallable())
|
value = prettyExpr(call.getNode().getNode()) and
tag = "call"
From 9d29a0a04418a346c1e9cb6e3bb5e284eced5978 Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Wed, 24 Aug 2022 13:27:51 +0200
Subject: [PATCH 058/415] Python: Accept changes to .expected from more pathlib
flow
But we don't want to keep this, this commit is just to show why we need a fix :)
---
.../frameworks/stdlib-py3/FileSystemAccess.py | 2 +-
.../CWE-312-CleartextStorage-py3/CleartextStorage.expected | 7 +++++++
2 files changed, 8 insertions(+), 1 deletion(-)
diff --git a/python/ql/test/library-tests/frameworks/stdlib-py3/FileSystemAccess.py b/python/ql/test/library-tests/frameworks/stdlib-py3/FileSystemAccess.py
index 4de7f3a3c32..0da230d66fc 100644
--- a/python/ql/test/library-tests/frameworks/stdlib-py3/FileSystemAccess.py
+++ b/python/ql/test/library-tests/frameworks/stdlib-py3/FileSystemAccess.py
@@ -13,7 +13,7 @@ with p.open() as f: # $ getAPathArgument=p
p.write_bytes(b"hello") # $ getAPathArgument=p fileWriteData=b"hello"
p.write_text("hello") # $ getAPathArgument=p fileWriteData="hello"
-p.open("wt").write("hello") # $ getAPathArgument=p fileWriteData="hello"
+p.open("wt").write("hello") # $ getAPathArgument=p fileWriteData="hello" SPURIOUS: getAPathArgument=self
name = windows.parent.name
o = open
diff --git a/python/ql/test/query-tests/Security/CWE-312-CleartextStorage-py3/CleartextStorage.expected b/python/ql/test/query-tests/Security/CWE-312-CleartextStorage-py3/CleartextStorage.expected
index f2b0894ec0a..b27e24e30d7 100644
--- a/python/ql/test/query-tests/Security/CWE-312-CleartextStorage-py3/CleartextStorage.expected
+++ b/python/ql/test/query-tests/Security/CWE-312-CleartextStorage-py3/CleartextStorage.expected
@@ -1,14 +1,21 @@
edges
+| file:///usr/lib/python3.8/pathlib.py:1248:26:1248:29 | ControlFlowNode for data | file:///usr/lib/python3.8/pathlib.py:1256:28:1256:31 | ControlFlowNode for data |
+| test.py:9:12:9:21 | ControlFlowNode for get_cert() | test.py:12:21:12:24 | ControlFlowNode for cert |
| test.py:9:12:9:21 | ControlFlowNode for get_cert() | test.py:12:21:12:24 | ControlFlowNode for cert |
| test.py:9:12:9:21 | ControlFlowNode for get_cert() | test.py:13:22:13:41 | ControlFlowNode for Attribute() |
| test.py:9:12:9:21 | ControlFlowNode for get_cert() | test.py:15:26:15:29 | ControlFlowNode for cert |
+| test.py:12:21:12:24 | ControlFlowNode for cert | file:///usr/lib/python3.8/pathlib.py:1248:26:1248:29 | ControlFlowNode for data |
nodes
+| file:///usr/lib/python3.8/pathlib.py:1248:26:1248:29 | ControlFlowNode for data | semmle.label | ControlFlowNode for data |
+| file:///usr/lib/python3.8/pathlib.py:1256:28:1256:31 | ControlFlowNode for data | semmle.label | ControlFlowNode for data |
| test.py:9:12:9:21 | ControlFlowNode for get_cert() | semmle.label | ControlFlowNode for get_cert() |
| test.py:12:21:12:24 | ControlFlowNode for cert | semmle.label | ControlFlowNode for cert |
+| test.py:12:21:12:24 | ControlFlowNode for cert | semmle.label | ControlFlowNode for cert |
| test.py:13:22:13:41 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
| test.py:15:26:15:29 | ControlFlowNode for cert | semmle.label | ControlFlowNode for cert |
subpaths
#select
+| file:///usr/lib/python3.8/pathlib.py:1256:28:1256:31 | ControlFlowNode for data | test.py:9:12:9:21 | ControlFlowNode for get_cert() | file:///usr/lib/python3.8/pathlib.py:1256:28:1256:31 | ControlFlowNode for data | This expression stores $@ as clear text. | test.py:9:12:9:21 | ControlFlowNode for get_cert() | sensitive data (certificate) |
| test.py:12:21:12:24 | ControlFlowNode for cert | test.py:9:12:9:21 | ControlFlowNode for get_cert() | test.py:12:21:12:24 | ControlFlowNode for cert | This expression stores $@ as clear text. | test.py:9:12:9:21 | ControlFlowNode for get_cert() | sensitive data (certificate) |
| test.py:13:22:13:41 | ControlFlowNode for Attribute() | test.py:9:12:9:21 | ControlFlowNode for get_cert() | test.py:13:22:13:41 | ControlFlowNode for Attribute() | This expression stores $@ as clear text. | test.py:9:12:9:21 | ControlFlowNode for get_cert() | sensitive data (certificate) |
| test.py:15:26:15:29 | ControlFlowNode for cert | test.py:9:12:9:21 | ControlFlowNode for get_cert() | test.py:15:26:15:29 | ControlFlowNode for cert | This expression stores $@ as clear text. | test.py:9:12:9:21 | ControlFlowNode for get_cert() | sensitive data (certificate) |
From edcaff26af11e33879ffe6d1771cd443863e8d03 Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Wed, 24 Aug 2022 14:12:10 +0200
Subject: [PATCH 059/415] Python: Add path-injection test using pathlib
Since it has the same problem of showing sinks inside the extracted
stdlib
---
.../PathInjection.expected | 22 +++++++++++++++++++
.../CWE-022-PathInjection/pathlib_use.py | 17 ++++++++++++++
2 files changed, 39 insertions(+)
create mode 100644 python/ql/test/query-tests/Security/CWE-022-PathInjection/pathlib_use.py
diff --git a/python/ql/test/query-tests/Security/CWE-022-PathInjection/PathInjection.expected b/python/ql/test/query-tests/Security/CWE-022-PathInjection/PathInjection.expected
index 4d92275ff36..231b111f44b 100644
--- a/python/ql/test/query-tests/Security/CWE-022-PathInjection/PathInjection.expected
+++ b/python/ql/test/query-tests/Security/CWE-022-PathInjection/PathInjection.expected
@@ -1,4 +1,5 @@
edges
+| file:///usr/lib/python3.8/pathlib.py:1214:14:1214:17 | ControlFlowNode for self | file:///usr/lib/python3.8/pathlib.py:1222:24:1222:27 | ControlFlowNode for self |
| flask_path_injection.py:0:0:0:0 | ModuleVariableNode for flask_path_injection.request | flask_path_injection.py:19:15:19:21 | ControlFlowNode for request |
| flask_path_injection.py:1:26:1:32 | ControlFlowNode for ImportMember | flask_path_injection.py:1:26:1:32 | GSSA Variable request |
| flask_path_injection.py:1:26:1:32 | GSSA Variable request | flask_path_injection.py:0:0:0:0 | ModuleVariableNode for flask_path_injection.request |
@@ -49,6 +50,14 @@ edges
| path_injection.py:138:16:138:27 | ControlFlowNode for Attribute | path_injection.py:142:14:142:17 | ControlFlowNode for path |
| path_injection.py:149:16:149:22 | ControlFlowNode for request | path_injection.py:149:16:149:27 | ControlFlowNode for Attribute |
| path_injection.py:149:16:149:27 | ControlFlowNode for Attribute | path_injection.py:152:18:152:21 | ControlFlowNode for path |
+| pathlib_use.py:0:0:0:0 | ModuleVariableNode for pathlib_use.request | pathlib_use.py:12:16:12:22 | ControlFlowNode for request |
+| pathlib_use.py:3:26:3:32 | ControlFlowNode for ImportMember | pathlib_use.py:3:26:3:32 | GSSA Variable request |
+| pathlib_use.py:3:26:3:32 | GSSA Variable request | pathlib_use.py:0:0:0:0 | ModuleVariableNode for pathlib_use.request |
+| pathlib_use.py:12:16:12:22 | ControlFlowNode for request | pathlib_use.py:12:16:12:27 | ControlFlowNode for Attribute |
+| pathlib_use.py:12:16:12:27 | ControlFlowNode for Attribute | pathlib_use.py:14:5:14:5 | ControlFlowNode for p |
+| pathlib_use.py:12:16:12:27 | ControlFlowNode for Attribute | pathlib_use.py:17:5:17:6 | ControlFlowNode for p2 |
+| pathlib_use.py:12:16:12:27 | ControlFlowNode for Attribute | pathlib_use.py:17:5:17:6 | ControlFlowNode for p2 |
+| pathlib_use.py:17:5:17:6 | ControlFlowNode for p2 | file:///usr/lib/python3.8/pathlib.py:1214:14:1214:17 | ControlFlowNode for self |
| test.py:0:0:0:0 | ModuleVariableNode for test.request | test.py:9:12:9:18 | ControlFlowNode for request |
| test.py:3:26:3:32 | ControlFlowNode for ImportMember | test.py:3:26:3:32 | GSSA Variable request |
| test.py:3:26:3:32 | GSSA Variable request | test.py:0:0:0:0 | ModuleVariableNode for test.request |
@@ -71,6 +80,8 @@ edges
| test.py:48:23:48:23 | ControlFlowNode for x | test.py:12:15:12:15 | ControlFlowNode for x |
| test.py:48:23:48:23 | ControlFlowNode for x | test.py:48:13:48:24 | ControlFlowNode for normalize() |
nodes
+| file:///usr/lib/python3.8/pathlib.py:1214:14:1214:17 | ControlFlowNode for self | semmle.label | ControlFlowNode for self |
+| file:///usr/lib/python3.8/pathlib.py:1222:24:1222:27 | ControlFlowNode for self | semmle.label | ControlFlowNode for self |
| flask_path_injection.py:0:0:0:0 | ModuleVariableNode for flask_path_injection.request | semmle.label | ModuleVariableNode for flask_path_injection.request |
| flask_path_injection.py:1:26:1:32 | ControlFlowNode for ImportMember | semmle.label | ControlFlowNode for ImportMember |
| flask_path_injection.py:1:26:1:32 | GSSA Variable request | semmle.label | GSSA Variable request |
@@ -125,6 +136,14 @@ nodes
| path_injection.py:149:16:149:22 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
| path_injection.py:149:16:149:27 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
| path_injection.py:152:18:152:21 | ControlFlowNode for path | semmle.label | ControlFlowNode for path |
+| pathlib_use.py:0:0:0:0 | ModuleVariableNode for pathlib_use.request | semmle.label | ModuleVariableNode for pathlib_use.request |
+| pathlib_use.py:3:26:3:32 | ControlFlowNode for ImportMember | semmle.label | ControlFlowNode for ImportMember |
+| pathlib_use.py:3:26:3:32 | GSSA Variable request | semmle.label | GSSA Variable request |
+| pathlib_use.py:12:16:12:22 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
+| pathlib_use.py:12:16:12:27 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
+| pathlib_use.py:14:5:14:5 | ControlFlowNode for p | semmle.label | ControlFlowNode for p |
+| pathlib_use.py:17:5:17:6 | ControlFlowNode for p2 | semmle.label | ControlFlowNode for p2 |
+| pathlib_use.py:17:5:17:6 | ControlFlowNode for p2 | semmle.label | ControlFlowNode for p2 |
| test.py:0:0:0:0 | ModuleVariableNode for test.request | semmle.label | ModuleVariableNode for test.request |
| test.py:3:26:3:32 | ControlFlowNode for ImportMember | semmle.label | ControlFlowNode for ImportMember |
| test.py:3:26:3:32 | GSSA Variable request | semmle.label | GSSA Variable request |
@@ -150,6 +169,7 @@ subpaths
| test.py:25:19:25:19 | ControlFlowNode for x | test.py:12:15:12:15 | ControlFlowNode for x | test.py:13:12:13:30 | ControlFlowNode for Attribute() | test.py:25:9:25:20 | ControlFlowNode for normalize() |
| test.py:48:23:48:23 | ControlFlowNode for x | test.py:12:15:12:15 | ControlFlowNode for x | test.py:13:12:13:30 | ControlFlowNode for Attribute() | test.py:48:13:48:24 | ControlFlowNode for normalize() |
#select
+| file:///usr/lib/python3.8/pathlib.py:1222:24:1222:27 | ControlFlowNode for self | pathlib_use.py:3:26:3:32 | ControlFlowNode for ImportMember | file:///usr/lib/python3.8/pathlib.py:1222:24:1222:27 | ControlFlowNode for self | This path depends on a $@. | pathlib_use.py:3:26:3:32 | ControlFlowNode for ImportMember | user-provided value |
| flask_path_injection.py:21:32:21:38 | ControlFlowNode for dirname | flask_path_injection.py:1:26:1:32 | ControlFlowNode for ImportMember | flask_path_injection.py:21:32:21:38 | ControlFlowNode for dirname | This path depends on a $@. | flask_path_injection.py:1:26:1:32 | ControlFlowNode for ImportMember | user-provided value |
| path_injection.py:13:14:13:47 | ControlFlowNode for Attribute() | path_injection.py:3:26:3:32 | ControlFlowNode for ImportMember | path_injection.py:13:14:13:47 | ControlFlowNode for Attribute() | This path depends on a $@. | path_injection.py:3:26:3:32 | ControlFlowNode for ImportMember | user-provided value |
| path_injection.py:21:14:21:18 | ControlFlowNode for npath | path_injection.py:3:26:3:32 | ControlFlowNode for ImportMember | path_injection.py:21:14:21:18 | ControlFlowNode for npath | This path depends on a $@. | path_injection.py:3:26:3:32 | ControlFlowNode for ImportMember | user-provided value |
@@ -164,6 +184,8 @@ subpaths
| path_injection.py:132:14:132:22 | ControlFlowNode for sanitized | path_injection.py:3:26:3:32 | ControlFlowNode for ImportMember | path_injection.py:132:14:132:22 | ControlFlowNode for sanitized | This path depends on a $@. | path_injection.py:3:26:3:32 | ControlFlowNode for ImportMember | user-provided value |
| path_injection.py:142:14:142:17 | ControlFlowNode for path | path_injection.py:3:26:3:32 | ControlFlowNode for ImportMember | path_injection.py:142:14:142:17 | ControlFlowNode for path | This path depends on a $@. | path_injection.py:3:26:3:32 | ControlFlowNode for ImportMember | user-provided value |
| path_injection.py:152:18:152:21 | ControlFlowNode for path | path_injection.py:3:26:3:32 | ControlFlowNode for ImportMember | path_injection.py:152:18:152:21 | ControlFlowNode for path | This path depends on a $@. | path_injection.py:3:26:3:32 | ControlFlowNode for ImportMember | user-provided value |
+| pathlib_use.py:14:5:14:5 | ControlFlowNode for p | pathlib_use.py:3:26:3:32 | ControlFlowNode for ImportMember | pathlib_use.py:14:5:14:5 | ControlFlowNode for p | This path depends on a $@. | pathlib_use.py:3:26:3:32 | ControlFlowNode for ImportMember | user-provided value |
+| pathlib_use.py:17:5:17:6 | ControlFlowNode for p2 | pathlib_use.py:3:26:3:32 | ControlFlowNode for ImportMember | pathlib_use.py:17:5:17:6 | ControlFlowNode for p2 | This path depends on a $@. | pathlib_use.py:3:26:3:32 | ControlFlowNode for ImportMember | user-provided value |
| test.py:19:10:19:10 | ControlFlowNode for x | test.py:3:26:3:32 | ControlFlowNode for ImportMember | test.py:19:10:19:10 | ControlFlowNode for x | This path depends on a $@. | test.py:3:26:3:32 | ControlFlowNode for ImportMember | user-provided value |
| test.py:26:10:26:10 | ControlFlowNode for y | test.py:3:26:3:32 | ControlFlowNode for ImportMember | test.py:26:10:26:10 | ControlFlowNode for y | This path depends on a $@. | test.py:3:26:3:32 | ControlFlowNode for ImportMember | user-provided value |
| test.py:33:14:33:14 | ControlFlowNode for x | test.py:3:26:3:32 | ControlFlowNode for ImportMember | test.py:33:14:33:14 | ControlFlowNode for x | This path depends on a $@. | test.py:3:26:3:32 | ControlFlowNode for ImportMember | user-provided value |
diff --git a/python/ql/test/query-tests/Security/CWE-022-PathInjection/pathlib_use.py b/python/ql/test/query-tests/Security/CWE-022-PathInjection/pathlib_use.py
new file mode 100644
index 00000000000..4eb5909a61d
--- /dev/null
+++ b/python/ql/test/query-tests/Security/CWE-022-PathInjection/pathlib_use.py
@@ -0,0 +1,17 @@
+import pathlib
+
+from flask import Flask, request
+app = Flask(__name__)
+
+
+STATIC_DIR = pathlib.Path("/server/static/")
+
+
+@app.route("/pathlib_use")
+def path_injection():
+ filename = request.args.get('filename', '')
+ p = STATIC_DIR / filename
+ p.open() # NOT OK
+
+ p2 = pathlib.Path(STATIC_DIR, filename)
+ p2.open() # NOT OK
From 39ce50fadc775ed5b70a7f0e1876cff76576d7c8 Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Wed, 24 Aug 2022 14:14:41 +0200
Subject: [PATCH 060/415] Python: Fix problems with sinks in pathlib
This must mean that we did not have this flow with the old call-graph,
which means the new call-graph is doing a better job (yay).
---
.../lib/semmle/python/frameworks/Stdlib.qll | 14 ++++++++-
.../CleartextStorageCustomizations.qll | 29 ++++++++++++++++++-
.../CommandInjectionCustomizations.qll | 2 ++
.../dataflow/PathInjectionCustomizations.qll | 28 +++++++++++++++++-
.../frameworks/stdlib-py3/FileSystemAccess.py | 2 +-
.../PathInjection.expected | 7 -----
.../CleartextStorage.expected | 7 -----
7 files changed, 71 insertions(+), 18 deletions(-)
diff --git a/python/ql/lib/semmle/python/frameworks/Stdlib.qll b/python/ql/lib/semmle/python/frameworks/Stdlib.qll
index f5d6dd8df1c..29c0e0f77b1 100644
--- a/python/ql/lib/semmle/python/frameworks/Stdlib.qll
+++ b/python/ql/lib/semmle/python/frameworks/Stdlib.qll
@@ -1462,7 +1462,19 @@ private module StdlibPrivate {
t.start() and
result = openCall and
(
- openCall instanceof OpenCall
+ openCall instanceof OpenCall and
+ // don't include the open call inside of Path.open in pathlib.py since
+ // the call to `path_obj.open` is covered by `PathLibOpenCall`.
+ not exists(Module mod, Class cls, Function func |
+ openCall.(OpenCall).asCfgNode().getScope() = func and
+ func.getName() = "open" and
+ func.getScope() = cls and
+ cls.getName() = "Path" and
+ cls.getScope() = mod and
+ mod.getName() = "pathlib" and
+ // do allow this call if we're analyzing pathlib.py as part of CPython though
+ not exists(mod.getFile().getRelativePath())
+ )
or
openCall instanceof PathLibOpenCall
)
diff --git a/python/ql/lib/semmle/python/security/dataflow/CleartextStorageCustomizations.qll b/python/ql/lib/semmle/python/security/dataflow/CleartextStorageCustomizations.qll
index 0ff32823d68..001b9395ef4 100644
--- a/python/ql/lib/semmle/python/security/dataflow/CleartextStorageCustomizations.qll
+++ b/python/ql/lib/semmle/python/security/dataflow/CleartextStorageCustomizations.qll
@@ -50,7 +50,34 @@ module CleartextStorage {
/** The data written to a file, considered as a flow sink. */
class FileWriteDataAsSink extends Sink {
- FileWriteDataAsSink() { this = any(FileSystemWriteAccess write).getADataNode() }
+ FileWriteDataAsSink() {
+ this = any(FileSystemWriteAccess write).getADataNode() and
+ // since implementation of Path.write_bytes in pathlib.py is like
+ // ```py
+ // def write_bytes(self, data):
+ // with self.open(mode='wb') as f:
+ // return f.write(data)
+ // ```
+ // any time we would report flow to the `Path.write_bytes` sink, we can ALSO report
+ // the flow from the `data` parameter to the `f.write` sink -- obviously we
+ // don't want that.
+ //
+ // However, simply removing taint edges out of a sink is not a good enough solution,
+ // since we would only flag one of the `p.write` calls in the following example
+ // due to use-use flow
+ // ```py
+ // p.write(user_controlled)
+ // p.write(user_controlled)
+ // ```
+ //
+ // The same approach is used in the command injection query.
+ not exists(Module pathlib |
+ pathlib.getName() = "pathlib" and
+ this.getScope().getEnclosingModule() = pathlib and
+ // do allow this call if we're analyzing pathlib.py as part of CPython though
+ not exists(pathlib.getFile().getRelativePath())
+ )
+ }
}
/** The data written to a cookie on a HTTP response, considered as a flow sink. */
diff --git a/python/ql/lib/semmle/python/security/dataflow/CommandInjectionCustomizations.qll b/python/ql/lib/semmle/python/security/dataflow/CommandInjectionCustomizations.qll
index a18bfe73372..8d688bf357a 100644
--- a/python/ql/lib/semmle/python/security/dataflow/CommandInjectionCustomizations.qll
+++ b/python/ql/lib/semmle/python/security/dataflow/CommandInjectionCustomizations.qll
@@ -76,6 +76,8 @@ module CommandInjection {
// `subprocess`. See:
// https://github.com/python/cpython/blob/fa7ce080175f65d678a7d5756c94f82887fc9803/Lib/os.py#L974
// https://github.com/python/cpython/blob/fa7ce080175f65d678a7d5756c94f82887fc9803/Lib/subprocess.py#L341
+ //
+ // The same approach is used in the path-injection and cleartext-storage queries.
not this.getScope().getEnclosingModule().getName() in [
"os", "subprocess", "platform", "popen2"
]
diff --git a/python/ql/lib/semmle/python/security/dataflow/PathInjectionCustomizations.qll b/python/ql/lib/semmle/python/security/dataflow/PathInjectionCustomizations.qll
index a96bbb996bc..b50ff70fde2 100644
--- a/python/ql/lib/semmle/python/security/dataflow/PathInjectionCustomizations.qll
+++ b/python/ql/lib/semmle/python/security/dataflow/PathInjectionCustomizations.qll
@@ -58,7 +58,33 @@ module PathInjection {
* A file system access, considered as a flow sink.
*/
class FileSystemAccessAsSink extends Sink {
- FileSystemAccessAsSink() { this = any(FileSystemAccess e).getAPathArgument() }
+ FileSystemAccessAsSink() {
+ this = any(FileSystemAccess e).getAPathArgument() and
+ // since implementation of Path.open in pathlib.py is like
+ // ```py
+ // def open(self, ...):
+ // return io.open(self, ...)
+ // ```
+ // any time we would report flow to the `path_obj.open` sink, we can ALSO report
+ // the flow from the `self` parameter to the `io.open` sink -- obviously we
+ // don't want that.
+ //
+ // However, simply removing taint edges out of a sink is not a good enough solution,
+ // since we would only flag one of the `p.open` calls in the following example
+ // due to use-use flow
+ // ```py
+ // p.open()
+ // p.open()
+ // ```
+ //
+ // The same approach is used in the command injection query.
+ not exists(Module pathlib |
+ pathlib.getName() = "pathlib" and
+ this.getScope().getEnclosingModule() = pathlib and
+ // do allow this call if we're analyzing pathlib.py as part of CPython though
+ not exists(pathlib.getFile().getRelativePath())
+ )
+ }
}
private import semmle.python.frameworks.data.ModelsAsData
diff --git a/python/ql/test/library-tests/frameworks/stdlib-py3/FileSystemAccess.py b/python/ql/test/library-tests/frameworks/stdlib-py3/FileSystemAccess.py
index 0da230d66fc..4de7f3a3c32 100644
--- a/python/ql/test/library-tests/frameworks/stdlib-py3/FileSystemAccess.py
+++ b/python/ql/test/library-tests/frameworks/stdlib-py3/FileSystemAccess.py
@@ -13,7 +13,7 @@ with p.open() as f: # $ getAPathArgument=p
p.write_bytes(b"hello") # $ getAPathArgument=p fileWriteData=b"hello"
p.write_text("hello") # $ getAPathArgument=p fileWriteData="hello"
-p.open("wt").write("hello") # $ getAPathArgument=p fileWriteData="hello" SPURIOUS: getAPathArgument=self
+p.open("wt").write("hello") # $ getAPathArgument=p fileWriteData="hello"
name = windows.parent.name
o = open
diff --git a/python/ql/test/query-tests/Security/CWE-022-PathInjection/PathInjection.expected b/python/ql/test/query-tests/Security/CWE-022-PathInjection/PathInjection.expected
index 231b111f44b..a824d44adfa 100644
--- a/python/ql/test/query-tests/Security/CWE-022-PathInjection/PathInjection.expected
+++ b/python/ql/test/query-tests/Security/CWE-022-PathInjection/PathInjection.expected
@@ -1,5 +1,4 @@
edges
-| file:///usr/lib/python3.8/pathlib.py:1214:14:1214:17 | ControlFlowNode for self | file:///usr/lib/python3.8/pathlib.py:1222:24:1222:27 | ControlFlowNode for self |
| flask_path_injection.py:0:0:0:0 | ModuleVariableNode for flask_path_injection.request | flask_path_injection.py:19:15:19:21 | ControlFlowNode for request |
| flask_path_injection.py:1:26:1:32 | ControlFlowNode for ImportMember | flask_path_injection.py:1:26:1:32 | GSSA Variable request |
| flask_path_injection.py:1:26:1:32 | GSSA Variable request | flask_path_injection.py:0:0:0:0 | ModuleVariableNode for flask_path_injection.request |
@@ -56,8 +55,6 @@ edges
| pathlib_use.py:12:16:12:22 | ControlFlowNode for request | pathlib_use.py:12:16:12:27 | ControlFlowNode for Attribute |
| pathlib_use.py:12:16:12:27 | ControlFlowNode for Attribute | pathlib_use.py:14:5:14:5 | ControlFlowNode for p |
| pathlib_use.py:12:16:12:27 | ControlFlowNode for Attribute | pathlib_use.py:17:5:17:6 | ControlFlowNode for p2 |
-| pathlib_use.py:12:16:12:27 | ControlFlowNode for Attribute | pathlib_use.py:17:5:17:6 | ControlFlowNode for p2 |
-| pathlib_use.py:17:5:17:6 | ControlFlowNode for p2 | file:///usr/lib/python3.8/pathlib.py:1214:14:1214:17 | ControlFlowNode for self |
| test.py:0:0:0:0 | ModuleVariableNode for test.request | test.py:9:12:9:18 | ControlFlowNode for request |
| test.py:3:26:3:32 | ControlFlowNode for ImportMember | test.py:3:26:3:32 | GSSA Variable request |
| test.py:3:26:3:32 | GSSA Variable request | test.py:0:0:0:0 | ModuleVariableNode for test.request |
@@ -80,8 +77,6 @@ edges
| test.py:48:23:48:23 | ControlFlowNode for x | test.py:12:15:12:15 | ControlFlowNode for x |
| test.py:48:23:48:23 | ControlFlowNode for x | test.py:48:13:48:24 | ControlFlowNode for normalize() |
nodes
-| file:///usr/lib/python3.8/pathlib.py:1214:14:1214:17 | ControlFlowNode for self | semmle.label | ControlFlowNode for self |
-| file:///usr/lib/python3.8/pathlib.py:1222:24:1222:27 | ControlFlowNode for self | semmle.label | ControlFlowNode for self |
| flask_path_injection.py:0:0:0:0 | ModuleVariableNode for flask_path_injection.request | semmle.label | ModuleVariableNode for flask_path_injection.request |
| flask_path_injection.py:1:26:1:32 | ControlFlowNode for ImportMember | semmle.label | ControlFlowNode for ImportMember |
| flask_path_injection.py:1:26:1:32 | GSSA Variable request | semmle.label | GSSA Variable request |
@@ -143,7 +138,6 @@ nodes
| pathlib_use.py:12:16:12:27 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
| pathlib_use.py:14:5:14:5 | ControlFlowNode for p | semmle.label | ControlFlowNode for p |
| pathlib_use.py:17:5:17:6 | ControlFlowNode for p2 | semmle.label | ControlFlowNode for p2 |
-| pathlib_use.py:17:5:17:6 | ControlFlowNode for p2 | semmle.label | ControlFlowNode for p2 |
| test.py:0:0:0:0 | ModuleVariableNode for test.request | semmle.label | ModuleVariableNode for test.request |
| test.py:3:26:3:32 | ControlFlowNode for ImportMember | semmle.label | ControlFlowNode for ImportMember |
| test.py:3:26:3:32 | GSSA Variable request | semmle.label | GSSA Variable request |
@@ -169,7 +163,6 @@ subpaths
| test.py:25:19:25:19 | ControlFlowNode for x | test.py:12:15:12:15 | ControlFlowNode for x | test.py:13:12:13:30 | ControlFlowNode for Attribute() | test.py:25:9:25:20 | ControlFlowNode for normalize() |
| test.py:48:23:48:23 | ControlFlowNode for x | test.py:12:15:12:15 | ControlFlowNode for x | test.py:13:12:13:30 | ControlFlowNode for Attribute() | test.py:48:13:48:24 | ControlFlowNode for normalize() |
#select
-| file:///usr/lib/python3.8/pathlib.py:1222:24:1222:27 | ControlFlowNode for self | pathlib_use.py:3:26:3:32 | ControlFlowNode for ImportMember | file:///usr/lib/python3.8/pathlib.py:1222:24:1222:27 | ControlFlowNode for self | This path depends on a $@. | pathlib_use.py:3:26:3:32 | ControlFlowNode for ImportMember | user-provided value |
| flask_path_injection.py:21:32:21:38 | ControlFlowNode for dirname | flask_path_injection.py:1:26:1:32 | ControlFlowNode for ImportMember | flask_path_injection.py:21:32:21:38 | ControlFlowNode for dirname | This path depends on a $@. | flask_path_injection.py:1:26:1:32 | ControlFlowNode for ImportMember | user-provided value |
| path_injection.py:13:14:13:47 | ControlFlowNode for Attribute() | path_injection.py:3:26:3:32 | ControlFlowNode for ImportMember | path_injection.py:13:14:13:47 | ControlFlowNode for Attribute() | This path depends on a $@. | path_injection.py:3:26:3:32 | ControlFlowNode for ImportMember | user-provided value |
| path_injection.py:21:14:21:18 | ControlFlowNode for npath | path_injection.py:3:26:3:32 | ControlFlowNode for ImportMember | path_injection.py:21:14:21:18 | ControlFlowNode for npath | This path depends on a $@. | path_injection.py:3:26:3:32 | ControlFlowNode for ImportMember | user-provided value |
diff --git a/python/ql/test/query-tests/Security/CWE-312-CleartextStorage-py3/CleartextStorage.expected b/python/ql/test/query-tests/Security/CWE-312-CleartextStorage-py3/CleartextStorage.expected
index b27e24e30d7..f2b0894ec0a 100644
--- a/python/ql/test/query-tests/Security/CWE-312-CleartextStorage-py3/CleartextStorage.expected
+++ b/python/ql/test/query-tests/Security/CWE-312-CleartextStorage-py3/CleartextStorage.expected
@@ -1,21 +1,14 @@
edges
-| file:///usr/lib/python3.8/pathlib.py:1248:26:1248:29 | ControlFlowNode for data | file:///usr/lib/python3.8/pathlib.py:1256:28:1256:31 | ControlFlowNode for data |
-| test.py:9:12:9:21 | ControlFlowNode for get_cert() | test.py:12:21:12:24 | ControlFlowNode for cert |
| test.py:9:12:9:21 | ControlFlowNode for get_cert() | test.py:12:21:12:24 | ControlFlowNode for cert |
| test.py:9:12:9:21 | ControlFlowNode for get_cert() | test.py:13:22:13:41 | ControlFlowNode for Attribute() |
| test.py:9:12:9:21 | ControlFlowNode for get_cert() | test.py:15:26:15:29 | ControlFlowNode for cert |
-| test.py:12:21:12:24 | ControlFlowNode for cert | file:///usr/lib/python3.8/pathlib.py:1248:26:1248:29 | ControlFlowNode for data |
nodes
-| file:///usr/lib/python3.8/pathlib.py:1248:26:1248:29 | ControlFlowNode for data | semmle.label | ControlFlowNode for data |
-| file:///usr/lib/python3.8/pathlib.py:1256:28:1256:31 | ControlFlowNode for data | semmle.label | ControlFlowNode for data |
| test.py:9:12:9:21 | ControlFlowNode for get_cert() | semmle.label | ControlFlowNode for get_cert() |
| test.py:12:21:12:24 | ControlFlowNode for cert | semmle.label | ControlFlowNode for cert |
-| test.py:12:21:12:24 | ControlFlowNode for cert | semmle.label | ControlFlowNode for cert |
| test.py:13:22:13:41 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
| test.py:15:26:15:29 | ControlFlowNode for cert | semmle.label | ControlFlowNode for cert |
subpaths
#select
-| file:///usr/lib/python3.8/pathlib.py:1256:28:1256:31 | ControlFlowNode for data | test.py:9:12:9:21 | ControlFlowNode for get_cert() | file:///usr/lib/python3.8/pathlib.py:1256:28:1256:31 | ControlFlowNode for data | This expression stores $@ as clear text. | test.py:9:12:9:21 | ControlFlowNode for get_cert() | sensitive data (certificate) |
| test.py:12:21:12:24 | ControlFlowNode for cert | test.py:9:12:9:21 | ControlFlowNode for get_cert() | test.py:12:21:12:24 | ControlFlowNode for cert | This expression stores $@ as clear text. | test.py:9:12:9:21 | ControlFlowNode for get_cert() | sensitive data (certificate) |
| test.py:13:22:13:41 | ControlFlowNode for Attribute() | test.py:9:12:9:21 | ControlFlowNode for get_cert() | test.py:13:22:13:41 | ControlFlowNode for Attribute() | This expression stores $@ as clear text. | test.py:9:12:9:21 | ControlFlowNode for get_cert() | sensitive data (certificate) |
| test.py:15:26:15:29 | ControlFlowNode for cert | test.py:9:12:9:21 | ControlFlowNode for get_cert() | test.py:15:26:15:29 | ControlFlowNode for cert | This expression stores $@ as clear text. | test.py:9:12:9:21 | ControlFlowNode for get_cert() | sensitive data (certificate) |
From 0a41d8d2c1a8affa740c4298e018998e538859e9 Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Tue, 1 Nov 2022 16:22:12 +0100
Subject: [PATCH 061/415] Python: Accept bad `CleartextLogging.expected`
---
.../CleartextLogging.expected | 97 +++++++++++++++++++
1 file changed, 97 insertions(+)
diff --git a/python/ql/test/query-tests/Security/CWE-312-CleartextLogging/CleartextLogging.expected b/python/ql/test/query-tests/Security/CWE-312-CleartextLogging/CleartextLogging.expected
index e9b5ac67585..2d6dafde6ab 100644
--- a/python/ql/test/query-tests/Security/CWE-312-CleartextLogging/CleartextLogging.expected
+++ b/python/ql/test/query-tests/Security/CWE-312-CleartextLogging/CleartextLogging.expected
@@ -1,18 +1,111 @@
edges
+| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:283:23:283:26 | ControlFlowNode for args [List element] | file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:310:20:310:23 | ControlFlowNode for args [List element] |
+| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:310:20:310:23 | ControlFlowNode for args [List element] | file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:310:20:310:26 | ControlFlowNode for Subscript |
+| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:310:20:310:26 | ControlFlowNode for Subscript | file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:311:21:311:24 | ControlFlowNode for args |
+| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:311:21:311:24 | ControlFlowNode for args | file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:311:9:311:12 | [post] ControlFlowNode for self [Attribute args] |
+| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:939:22:939:27 | ControlFlowNode for record [Attribute args] | file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:952:27:952:32 | ControlFlowNode for record [Attribute args] |
+| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:952:27:952:32 | ControlFlowNode for record [Attribute args] | file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1071:20:1071:25 | ControlFlowNode for record [Attribute args] |
+| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:952:27:952:32 | ControlFlowNode for record [Attribute args] | file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1178:20:1178:25 | ControlFlowNode for record [Attribute args] |
+| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:989:27:989:32 | ControlFlowNode for record [Attribute args] | file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1023:59:1023:64 | ControlFlowNode for record [Attribute args] |
+| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1022:59:1023:69 | ControlFlowNode for Tuple | file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1021:38:1023:70 | ControlFlowNode for BinaryExpr |
+| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1023:59:1023:64 | ControlFlowNode for record [Attribute args] | file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1023:59:1023:69 | ControlFlowNode for Attribute |
+| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1023:59:1023:69 | ControlFlowNode for Attribute | file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1022:59:1023:69 | ControlFlowNode for Tuple |
+| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1071:20:1071:25 | ControlFlowNode for record [Attribute args] | file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1091:30:1091:35 | ControlFlowNode for record [Attribute args] |
+| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1091:30:1091:35 | ControlFlowNode for record [Attribute args] | file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:989:27:989:32 | ControlFlowNode for record [Attribute args] |
+| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1178:20:1178:25 | ControlFlowNode for record [Attribute args] | file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1187:34:1187:39 | ControlFlowNode for record [Attribute args] |
+| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1187:34:1187:39 | ControlFlowNode for record [Attribute args] | file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1071:20:1071:25 | ControlFlowNode for record [Attribute args] |
+| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1424:27:1424:30 | ControlFlowNode for args [List element] | file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1434:35:1434:38 | ControlFlowNode for args [List element] |
+| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1434:35:1434:38 | ControlFlowNode for args [List element] | file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1565:32:1565:35 | ControlFlowNode for args [List element] |
+| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1436:26:1436:29 | ControlFlowNode for args [List element] | file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1446:34:1446:37 | ControlFlowNode for args [List element] |
+| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1446:34:1446:37 | ControlFlowNode for args [List element] | file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1565:32:1565:35 | ControlFlowNode for args [List element] |
+| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1497:32:1497:35 | ControlFlowNode for args [List element] | file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1512:35:1512:38 | ControlFlowNode for args [List element] |
+| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1512:35:1512:38 | ControlFlowNode for args [List element] | file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1565:32:1565:35 | ControlFlowNode for args [List element] |
+| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1550:53:1550:56 | ControlFlowNode for args [List element] | file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1556:59:1556:62 | ControlFlowNode for args [List element] |
+| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1556:14:1557:35 | ControlFlowNode for _logRecordFactory() [Attribute args] | file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1563:16:1563:17 | ControlFlowNode for rv [Attribute args] |
+| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1556:59:1556:62 | ControlFlowNode for args [List element] | file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:283:23:283:26 | ControlFlowNode for args [List element] |
+| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1556:59:1556:62 | ControlFlowNode for args [List element] | file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1556:14:1557:35 | ControlFlowNode for _logRecordFactory() [Attribute args] |
+| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1565:32:1565:35 | ControlFlowNode for args [List element] | file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1587:66:1587:69 | ControlFlowNode for args [List element] |
+| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1587:18:1588:62 | ControlFlowNode for Attribute() [Attribute args] | file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1589:21:1589:26 | ControlFlowNode for record [Attribute args] |
+| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1587:66:1587:69 | ControlFlowNode for args [List element] | file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1550:53:1550:56 | ControlFlowNode for args [List element] |
+| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1587:66:1587:69 | ControlFlowNode for args [List element] | file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1587:18:1588:62 | ControlFlowNode for Attribute() [Attribute args] |
+| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1589:21:1589:26 | ControlFlowNode for record [Attribute args] | file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1591:22:1591:27 | ControlFlowNode for record [Attribute args] |
+| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1591:22:1591:27 | ControlFlowNode for record [Attribute args] | file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1599:31:1599:36 | ControlFlowNode for record [Attribute args] |
+| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1599:31:1599:36 | ControlFlowNode for record [Attribute args] | file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1645:28:1645:33 | ControlFlowNode for record [Attribute args] |
+| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1645:28:1645:33 | ControlFlowNode for record [Attribute args] | file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1669:39:1669:44 | ControlFlowNode for record [Attribute args] |
+| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1669:39:1669:44 | ControlFlowNode for record [Attribute args] | file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:939:22:939:27 | ControlFlowNode for record [Attribute args] |
+| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:2089:16:2089:19 | ControlFlowNode for args [List element] | file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:2097:21:2097:24 | ControlFlowNode for args [List element] |
+| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:2097:21:2097:24 | ControlFlowNode for args [List element] | file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1436:26:1436:29 | ControlFlowNode for args [List element] |
+| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:2099:17:2099:20 | ControlFlowNode for args [List element] | file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:2107:22:2107:25 | ControlFlowNode for args [List element] |
+| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:2107:22:2107:25 | ControlFlowNode for args [List element] | file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1424:27:1424:30 | ControlFlowNode for args [List element] |
+| test.py:19:16:19:29 | ControlFlowNode for get_password() | test.py:20:48:20:55 | ControlFlowNode for password |
| test.py:19:16:19:29 | ControlFlowNode for get_password() | test.py:20:48:20:55 | ControlFlowNode for password |
| test.py:19:16:19:29 | ControlFlowNode for get_password() | test.py:22:58:22:65 | ControlFlowNode for password |
+| test.py:19:16:19:29 | ControlFlowNode for get_password() | test.py:22:58:22:65 | ControlFlowNode for password |
+| test.py:19:16:19:29 | ControlFlowNode for get_password() | test.py:23:58:23:65 | ControlFlowNode for password |
| test.py:19:16:19:29 | ControlFlowNode for get_password() | test.py:23:58:23:65 | ControlFlowNode for password |
| test.py:19:16:19:29 | ControlFlowNode for get_password() | test.py:27:40:27:47 | ControlFlowNode for password |
+| test.py:19:16:19:29 | ControlFlowNode for get_password() | test.py:27:40:27:47 | ControlFlowNode for password |
| test.py:19:16:19:29 | ControlFlowNode for get_password() | test.py:30:58:30:65 | ControlFlowNode for password |
+| test.py:19:16:19:29 | ControlFlowNode for get_password() | test.py:30:58:30:65 | ControlFlowNode for password |
+| test.py:20:48:20:55 | ControlFlowNode for password | file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:2089:16:2089:19 | ControlFlowNode for args [List element] |
+| test.py:22:58:22:65 | ControlFlowNode for password | file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1497:32:1497:35 | ControlFlowNode for args [List element] |
+| test.py:23:58:23:65 | ControlFlowNode for password | file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1436:26:1436:29 | ControlFlowNode for args [List element] |
+| test.py:27:40:27:47 | ControlFlowNode for password | file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1436:26:1436:29 | ControlFlowNode for args [List element] |
+| test.py:30:58:30:65 | ControlFlowNode for password | file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1436:26:1436:29 | ControlFlowNode for args [List element] |
+| test.py:34:30:34:39 | ControlFlowNode for get_cert() | file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:2099:17:2099:20 | ControlFlowNode for args [List element] |
| test.py:65:14:68:5 | ControlFlowNode for Dict | test.py:69:11:69:31 | ControlFlowNode for Subscript |
| test.py:67:21:67:37 | ControlFlowNode for Attribute | test.py:65:14:68:5 | ControlFlowNode for Dict |
nodes
+| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:283:23:283:26 | ControlFlowNode for args [List element] | semmle.label | ControlFlowNode for args [List element] |
+| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:310:20:310:23 | ControlFlowNode for args [List element] | semmle.label | ControlFlowNode for args [List element] |
+| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:310:20:310:26 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
+| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:311:9:311:12 | [post] ControlFlowNode for self [Attribute args] | semmle.label | [post] ControlFlowNode for self [Attribute args] |
+| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:311:21:311:24 | ControlFlowNode for args | semmle.label | ControlFlowNode for args |
+| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:939:22:939:27 | ControlFlowNode for record [Attribute args] | semmle.label | ControlFlowNode for record [Attribute args] |
+| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:952:27:952:32 | ControlFlowNode for record [Attribute args] | semmle.label | ControlFlowNode for record [Attribute args] |
+| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:989:27:989:32 | ControlFlowNode for record [Attribute args] | semmle.label | ControlFlowNode for record [Attribute args] |
+| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1021:38:1023:70 | ControlFlowNode for BinaryExpr | semmle.label | ControlFlowNode for BinaryExpr |
+| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1022:59:1023:69 | ControlFlowNode for Tuple | semmle.label | ControlFlowNode for Tuple |
+| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1023:59:1023:64 | ControlFlowNode for record [Attribute args] | semmle.label | ControlFlowNode for record [Attribute args] |
+| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1023:59:1023:69 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
+| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1071:20:1071:25 | ControlFlowNode for record [Attribute args] | semmle.label | ControlFlowNode for record [Attribute args] |
+| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1091:30:1091:35 | ControlFlowNode for record [Attribute args] | semmle.label | ControlFlowNode for record [Attribute args] |
+| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1178:20:1178:25 | ControlFlowNode for record [Attribute args] | semmle.label | ControlFlowNode for record [Attribute args] |
+| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1187:34:1187:39 | ControlFlowNode for record [Attribute args] | semmle.label | ControlFlowNode for record [Attribute args] |
+| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1424:27:1424:30 | ControlFlowNode for args [List element] | semmle.label | ControlFlowNode for args [List element] |
+| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1434:35:1434:38 | ControlFlowNode for args [List element] | semmle.label | ControlFlowNode for args [List element] |
+| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1436:26:1436:29 | ControlFlowNode for args [List element] | semmle.label | ControlFlowNode for args [List element] |
+| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1446:34:1446:37 | ControlFlowNode for args [List element] | semmle.label | ControlFlowNode for args [List element] |
+| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1497:32:1497:35 | ControlFlowNode for args [List element] | semmle.label | ControlFlowNode for args [List element] |
+| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1512:35:1512:38 | ControlFlowNode for args [List element] | semmle.label | ControlFlowNode for args [List element] |
+| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1550:53:1550:56 | ControlFlowNode for args [List element] | semmle.label | ControlFlowNode for args [List element] |
+| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1556:14:1557:35 | ControlFlowNode for _logRecordFactory() [Attribute args] | semmle.label | ControlFlowNode for _logRecordFactory() [Attribute args] |
+| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1556:59:1556:62 | ControlFlowNode for args [List element] | semmle.label | ControlFlowNode for args [List element] |
+| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1563:16:1563:17 | ControlFlowNode for rv [Attribute args] | semmle.label | ControlFlowNode for rv [Attribute args] |
+| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1565:32:1565:35 | ControlFlowNode for args [List element] | semmle.label | ControlFlowNode for args [List element] |
+| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1587:18:1588:62 | ControlFlowNode for Attribute() [Attribute args] | semmle.label | ControlFlowNode for Attribute() [Attribute args] |
+| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1587:66:1587:69 | ControlFlowNode for args [List element] | semmle.label | ControlFlowNode for args [List element] |
+| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1589:21:1589:26 | ControlFlowNode for record [Attribute args] | semmle.label | ControlFlowNode for record [Attribute args] |
+| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1591:22:1591:27 | ControlFlowNode for record [Attribute args] | semmle.label | ControlFlowNode for record [Attribute args] |
+| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1599:31:1599:36 | ControlFlowNode for record [Attribute args] | semmle.label | ControlFlowNode for record [Attribute args] |
+| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1645:28:1645:33 | ControlFlowNode for record [Attribute args] | semmle.label | ControlFlowNode for record [Attribute args] |
+| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1669:39:1669:44 | ControlFlowNode for record [Attribute args] | semmle.label | ControlFlowNode for record [Attribute args] |
+| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:2089:16:2089:19 | ControlFlowNode for args [List element] | semmle.label | ControlFlowNode for args [List element] |
+| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:2097:21:2097:24 | ControlFlowNode for args [List element] | semmle.label | ControlFlowNode for args [List element] |
+| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:2099:17:2099:20 | ControlFlowNode for args [List element] | semmle.label | ControlFlowNode for args [List element] |
+| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:2107:22:2107:25 | ControlFlowNode for args [List element] | semmle.label | ControlFlowNode for args [List element] |
| test.py:19:16:19:29 | ControlFlowNode for get_password() | semmle.label | ControlFlowNode for get_password() |
| test.py:20:48:20:55 | ControlFlowNode for password | semmle.label | ControlFlowNode for password |
+| test.py:20:48:20:55 | ControlFlowNode for password | semmle.label | ControlFlowNode for password |
+| test.py:22:58:22:65 | ControlFlowNode for password | semmle.label | ControlFlowNode for password |
| test.py:22:58:22:65 | ControlFlowNode for password | semmle.label | ControlFlowNode for password |
| test.py:23:58:23:65 | ControlFlowNode for password | semmle.label | ControlFlowNode for password |
+| test.py:23:58:23:65 | ControlFlowNode for password | semmle.label | ControlFlowNode for password |
+| test.py:27:40:27:47 | ControlFlowNode for password | semmle.label | ControlFlowNode for password |
| test.py:27:40:27:47 | ControlFlowNode for password | semmle.label | ControlFlowNode for password |
| test.py:30:58:30:65 | ControlFlowNode for password | semmle.label | ControlFlowNode for password |
+| test.py:30:58:30:65 | ControlFlowNode for password | semmle.label | ControlFlowNode for password |
+| test.py:34:30:34:39 | ControlFlowNode for get_cert() | semmle.label | ControlFlowNode for get_cert() |
| test.py:34:30:34:39 | ControlFlowNode for get_cert() | semmle.label | ControlFlowNode for get_cert() |
| test.py:37:11:37:24 | ControlFlowNode for get_password() | semmle.label | ControlFlowNode for get_password() |
| test.py:39:22:39:35 | ControlFlowNode for get_password() | semmle.label | ControlFlowNode for get_password() |
@@ -21,7 +114,11 @@ nodes
| test.py:67:21:67:37 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
| test.py:69:11:69:31 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
subpaths
+| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1556:59:1556:62 | ControlFlowNode for args [List element] | file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:283:23:283:26 | ControlFlowNode for args [List element] | file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:311:9:311:12 | [post] ControlFlowNode for self [Attribute args] | file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1556:14:1557:35 | ControlFlowNode for _logRecordFactory() [Attribute args] |
+| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1587:66:1587:69 | ControlFlowNode for args [List element] | file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1550:53:1550:56 | ControlFlowNode for args [List element] | file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1563:16:1563:17 | ControlFlowNode for rv [Attribute args] | file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1587:18:1588:62 | ControlFlowNode for Attribute() [Attribute args] |
#select
+| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1021:38:1023:70 | ControlFlowNode for BinaryExpr | test.py:19:16:19:29 | ControlFlowNode for get_password() | file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1021:38:1023:70 | ControlFlowNode for BinaryExpr | This expression logs $@ as clear text. | test.py:19:16:19:29 | ControlFlowNode for get_password() | sensitive data (password) |
+| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1021:38:1023:70 | ControlFlowNode for BinaryExpr | test.py:34:30:34:39 | ControlFlowNode for get_cert() | file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1021:38:1023:70 | ControlFlowNode for BinaryExpr | This expression logs $@ as clear text. | test.py:34:30:34:39 | ControlFlowNode for get_cert() | sensitive data (certificate) |
| test.py:20:48:20:55 | ControlFlowNode for password | test.py:19:16:19:29 | ControlFlowNode for get_password() | test.py:20:48:20:55 | ControlFlowNode for password | This expression logs $@ as clear text. | test.py:19:16:19:29 | ControlFlowNode for get_password() | sensitive data (password) |
| test.py:22:58:22:65 | ControlFlowNode for password | test.py:19:16:19:29 | ControlFlowNode for get_password() | test.py:22:58:22:65 | ControlFlowNode for password | This expression logs $@ as clear text. | test.py:19:16:19:29 | ControlFlowNode for get_password() | sensitive data (password) |
| test.py:23:58:23:65 | ControlFlowNode for password | test.py:19:16:19:29 | ControlFlowNode for get_password() | test.py:23:58:23:65 | ControlFlowNode for password | This expression logs $@ as clear text. | test.py:19:16:19:29 | ControlFlowNode for get_password() | sensitive data (password) |
From a301c93ebff6114083b386ddf113fc637d3dc7ea Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Tue, 1 Nov 2022 16:36:31 +0100
Subject: [PATCH 062/415] Python: Fix results outside DB for `CleartextLogging`
---
.../CleartextLoggingCustomizations.qll | 47 +++++++--
.../CommandInjectionCustomizations.qll | 3 +-
.../CleartextLogging.expected | 97 -------------------
3 files changed, 39 insertions(+), 108 deletions(-)
diff --git a/python/ql/lib/semmle/python/security/dataflow/CleartextLoggingCustomizations.qll b/python/ql/lib/semmle/python/security/dataflow/CleartextLoggingCustomizations.qll
index fcf8885f3f4..ae61bd04314 100644
--- a/python/ql/lib/semmle/python/security/dataflow/CleartextLoggingCustomizations.qll
+++ b/python/ql/lib/semmle/python/security/dataflow/CleartextLoggingCustomizations.qll
@@ -57,16 +57,43 @@ module CleartextLogging {
/** A piece of data printed, considered as a flow sink. */
class PrintedDataAsSink extends Sink {
PrintedDataAsSink() {
- this = API::builtin("print").getACall().getArg(_)
- or
- // special handling of writing to `sys.stdout` and `sys.stderr`, which is
- // essentially the same as printing
- this =
- API::moduleImport("sys")
- .getMember(["stdout", "stderr"])
- .getMember("write")
- .getACall()
- .getArg(0)
+ (
+ this = API::builtin("print").getACall().getArg(_)
+ or
+ // special handling of writing to `sys.stdout` and `sys.stderr`, which is
+ // essentially the same as printing
+ this =
+ API::moduleImport("sys")
+ .getMember(["stdout", "stderr"])
+ .getMember("write")
+ .getACall()
+ .getArg(0)
+ ) and
+ // since some of the inner error handling implementation of the logging module is
+ // ```py
+ // sys.stderr.write('Message: %r\n'
+ // 'Arguments: %s\n' % (record.msg,
+ // record.args))
+ // ```
+ // any time we would report flow to such a logging sink, we can ALSO report
+ // the flow to the `record.msg`/`record.args` sinks -- obviously we
+ // don't want that.
+ //
+ // However, simply removing taint edges out of a sink is not a good enough solution,
+ // since we would only flag one of the `logging.info` calls in the following example
+ // due to use-use flow
+ // ```py
+ // logging.info(user_controlled)
+ // logging.info(user_controlled)
+ // ```
+ //
+ // The same approach is used in the command injection query.
+ not exists(Module loggingInit |
+ loggingInit.getName() = "logging.__init__" and
+ this.getScope().getEnclosingModule() = loggingInit and
+ // do allow this call if we're analyzing logging/__init__.py as part of CPython though
+ not exists(loggingInit.getFile().getRelativePath())
+ )
}
}
}
diff --git a/python/ql/lib/semmle/python/security/dataflow/CommandInjectionCustomizations.qll b/python/ql/lib/semmle/python/security/dataflow/CommandInjectionCustomizations.qll
index 8d688bf357a..d43095a04f8 100644
--- a/python/ql/lib/semmle/python/security/dataflow/CommandInjectionCustomizations.qll
+++ b/python/ql/lib/semmle/python/security/dataflow/CommandInjectionCustomizations.qll
@@ -77,7 +77,8 @@ module CommandInjection {
// https://github.com/python/cpython/blob/fa7ce080175f65d678a7d5756c94f82887fc9803/Lib/os.py#L974
// https://github.com/python/cpython/blob/fa7ce080175f65d678a7d5756c94f82887fc9803/Lib/subprocess.py#L341
//
- // The same approach is used in the path-injection and cleartext-storage queries.
+ // The same approach is used in the path-injection, cleartext-storage, and
+ // cleartext-logging queries.
not this.getScope().getEnclosingModule().getName() in [
"os", "subprocess", "platform", "popen2"
]
diff --git a/python/ql/test/query-tests/Security/CWE-312-CleartextLogging/CleartextLogging.expected b/python/ql/test/query-tests/Security/CWE-312-CleartextLogging/CleartextLogging.expected
index 2d6dafde6ab..e9b5ac67585 100644
--- a/python/ql/test/query-tests/Security/CWE-312-CleartextLogging/CleartextLogging.expected
+++ b/python/ql/test/query-tests/Security/CWE-312-CleartextLogging/CleartextLogging.expected
@@ -1,111 +1,18 @@
edges
-| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:283:23:283:26 | ControlFlowNode for args [List element] | file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:310:20:310:23 | ControlFlowNode for args [List element] |
-| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:310:20:310:23 | ControlFlowNode for args [List element] | file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:310:20:310:26 | ControlFlowNode for Subscript |
-| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:310:20:310:26 | ControlFlowNode for Subscript | file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:311:21:311:24 | ControlFlowNode for args |
-| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:311:21:311:24 | ControlFlowNode for args | file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:311:9:311:12 | [post] ControlFlowNode for self [Attribute args] |
-| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:939:22:939:27 | ControlFlowNode for record [Attribute args] | file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:952:27:952:32 | ControlFlowNode for record [Attribute args] |
-| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:952:27:952:32 | ControlFlowNode for record [Attribute args] | file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1071:20:1071:25 | ControlFlowNode for record [Attribute args] |
-| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:952:27:952:32 | ControlFlowNode for record [Attribute args] | file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1178:20:1178:25 | ControlFlowNode for record [Attribute args] |
-| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:989:27:989:32 | ControlFlowNode for record [Attribute args] | file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1023:59:1023:64 | ControlFlowNode for record [Attribute args] |
-| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1022:59:1023:69 | ControlFlowNode for Tuple | file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1021:38:1023:70 | ControlFlowNode for BinaryExpr |
-| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1023:59:1023:64 | ControlFlowNode for record [Attribute args] | file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1023:59:1023:69 | ControlFlowNode for Attribute |
-| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1023:59:1023:69 | ControlFlowNode for Attribute | file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1022:59:1023:69 | ControlFlowNode for Tuple |
-| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1071:20:1071:25 | ControlFlowNode for record [Attribute args] | file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1091:30:1091:35 | ControlFlowNode for record [Attribute args] |
-| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1091:30:1091:35 | ControlFlowNode for record [Attribute args] | file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:989:27:989:32 | ControlFlowNode for record [Attribute args] |
-| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1178:20:1178:25 | ControlFlowNode for record [Attribute args] | file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1187:34:1187:39 | ControlFlowNode for record [Attribute args] |
-| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1187:34:1187:39 | ControlFlowNode for record [Attribute args] | file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1071:20:1071:25 | ControlFlowNode for record [Attribute args] |
-| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1424:27:1424:30 | ControlFlowNode for args [List element] | file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1434:35:1434:38 | ControlFlowNode for args [List element] |
-| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1434:35:1434:38 | ControlFlowNode for args [List element] | file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1565:32:1565:35 | ControlFlowNode for args [List element] |
-| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1436:26:1436:29 | ControlFlowNode for args [List element] | file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1446:34:1446:37 | ControlFlowNode for args [List element] |
-| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1446:34:1446:37 | ControlFlowNode for args [List element] | file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1565:32:1565:35 | ControlFlowNode for args [List element] |
-| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1497:32:1497:35 | ControlFlowNode for args [List element] | file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1512:35:1512:38 | ControlFlowNode for args [List element] |
-| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1512:35:1512:38 | ControlFlowNode for args [List element] | file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1565:32:1565:35 | ControlFlowNode for args [List element] |
-| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1550:53:1550:56 | ControlFlowNode for args [List element] | file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1556:59:1556:62 | ControlFlowNode for args [List element] |
-| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1556:14:1557:35 | ControlFlowNode for _logRecordFactory() [Attribute args] | file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1563:16:1563:17 | ControlFlowNode for rv [Attribute args] |
-| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1556:59:1556:62 | ControlFlowNode for args [List element] | file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:283:23:283:26 | ControlFlowNode for args [List element] |
-| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1556:59:1556:62 | ControlFlowNode for args [List element] | file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1556:14:1557:35 | ControlFlowNode for _logRecordFactory() [Attribute args] |
-| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1565:32:1565:35 | ControlFlowNode for args [List element] | file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1587:66:1587:69 | ControlFlowNode for args [List element] |
-| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1587:18:1588:62 | ControlFlowNode for Attribute() [Attribute args] | file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1589:21:1589:26 | ControlFlowNode for record [Attribute args] |
-| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1587:66:1587:69 | ControlFlowNode for args [List element] | file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1550:53:1550:56 | ControlFlowNode for args [List element] |
-| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1587:66:1587:69 | ControlFlowNode for args [List element] | file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1587:18:1588:62 | ControlFlowNode for Attribute() [Attribute args] |
-| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1589:21:1589:26 | ControlFlowNode for record [Attribute args] | file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1591:22:1591:27 | ControlFlowNode for record [Attribute args] |
-| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1591:22:1591:27 | ControlFlowNode for record [Attribute args] | file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1599:31:1599:36 | ControlFlowNode for record [Attribute args] |
-| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1599:31:1599:36 | ControlFlowNode for record [Attribute args] | file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1645:28:1645:33 | ControlFlowNode for record [Attribute args] |
-| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1645:28:1645:33 | ControlFlowNode for record [Attribute args] | file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1669:39:1669:44 | ControlFlowNode for record [Attribute args] |
-| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1669:39:1669:44 | ControlFlowNode for record [Attribute args] | file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:939:22:939:27 | ControlFlowNode for record [Attribute args] |
-| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:2089:16:2089:19 | ControlFlowNode for args [List element] | file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:2097:21:2097:24 | ControlFlowNode for args [List element] |
-| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:2097:21:2097:24 | ControlFlowNode for args [List element] | file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1436:26:1436:29 | ControlFlowNode for args [List element] |
-| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:2099:17:2099:20 | ControlFlowNode for args [List element] | file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:2107:22:2107:25 | ControlFlowNode for args [List element] |
-| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:2107:22:2107:25 | ControlFlowNode for args [List element] | file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1424:27:1424:30 | ControlFlowNode for args [List element] |
-| test.py:19:16:19:29 | ControlFlowNode for get_password() | test.py:20:48:20:55 | ControlFlowNode for password |
| test.py:19:16:19:29 | ControlFlowNode for get_password() | test.py:20:48:20:55 | ControlFlowNode for password |
| test.py:19:16:19:29 | ControlFlowNode for get_password() | test.py:22:58:22:65 | ControlFlowNode for password |
-| test.py:19:16:19:29 | ControlFlowNode for get_password() | test.py:22:58:22:65 | ControlFlowNode for password |
-| test.py:19:16:19:29 | ControlFlowNode for get_password() | test.py:23:58:23:65 | ControlFlowNode for password |
| test.py:19:16:19:29 | ControlFlowNode for get_password() | test.py:23:58:23:65 | ControlFlowNode for password |
| test.py:19:16:19:29 | ControlFlowNode for get_password() | test.py:27:40:27:47 | ControlFlowNode for password |
-| test.py:19:16:19:29 | ControlFlowNode for get_password() | test.py:27:40:27:47 | ControlFlowNode for password |
| test.py:19:16:19:29 | ControlFlowNode for get_password() | test.py:30:58:30:65 | ControlFlowNode for password |
-| test.py:19:16:19:29 | ControlFlowNode for get_password() | test.py:30:58:30:65 | ControlFlowNode for password |
-| test.py:20:48:20:55 | ControlFlowNode for password | file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:2089:16:2089:19 | ControlFlowNode for args [List element] |
-| test.py:22:58:22:65 | ControlFlowNode for password | file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1497:32:1497:35 | ControlFlowNode for args [List element] |
-| test.py:23:58:23:65 | ControlFlowNode for password | file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1436:26:1436:29 | ControlFlowNode for args [List element] |
-| test.py:27:40:27:47 | ControlFlowNode for password | file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1436:26:1436:29 | ControlFlowNode for args [List element] |
-| test.py:30:58:30:65 | ControlFlowNode for password | file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1436:26:1436:29 | ControlFlowNode for args [List element] |
-| test.py:34:30:34:39 | ControlFlowNode for get_cert() | file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:2099:17:2099:20 | ControlFlowNode for args [List element] |
| test.py:65:14:68:5 | ControlFlowNode for Dict | test.py:69:11:69:31 | ControlFlowNode for Subscript |
| test.py:67:21:67:37 | ControlFlowNode for Attribute | test.py:65:14:68:5 | ControlFlowNode for Dict |
nodes
-| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:283:23:283:26 | ControlFlowNode for args [List element] | semmle.label | ControlFlowNode for args [List element] |
-| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:310:20:310:23 | ControlFlowNode for args [List element] | semmle.label | ControlFlowNode for args [List element] |
-| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:310:20:310:26 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
-| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:311:9:311:12 | [post] ControlFlowNode for self [Attribute args] | semmle.label | [post] ControlFlowNode for self [Attribute args] |
-| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:311:21:311:24 | ControlFlowNode for args | semmle.label | ControlFlowNode for args |
-| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:939:22:939:27 | ControlFlowNode for record [Attribute args] | semmle.label | ControlFlowNode for record [Attribute args] |
-| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:952:27:952:32 | ControlFlowNode for record [Attribute args] | semmle.label | ControlFlowNode for record [Attribute args] |
-| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:989:27:989:32 | ControlFlowNode for record [Attribute args] | semmle.label | ControlFlowNode for record [Attribute args] |
-| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1021:38:1023:70 | ControlFlowNode for BinaryExpr | semmle.label | ControlFlowNode for BinaryExpr |
-| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1022:59:1023:69 | ControlFlowNode for Tuple | semmle.label | ControlFlowNode for Tuple |
-| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1023:59:1023:64 | ControlFlowNode for record [Attribute args] | semmle.label | ControlFlowNode for record [Attribute args] |
-| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1023:59:1023:69 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
-| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1071:20:1071:25 | ControlFlowNode for record [Attribute args] | semmle.label | ControlFlowNode for record [Attribute args] |
-| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1091:30:1091:35 | ControlFlowNode for record [Attribute args] | semmle.label | ControlFlowNode for record [Attribute args] |
-| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1178:20:1178:25 | ControlFlowNode for record [Attribute args] | semmle.label | ControlFlowNode for record [Attribute args] |
-| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1187:34:1187:39 | ControlFlowNode for record [Attribute args] | semmle.label | ControlFlowNode for record [Attribute args] |
-| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1424:27:1424:30 | ControlFlowNode for args [List element] | semmle.label | ControlFlowNode for args [List element] |
-| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1434:35:1434:38 | ControlFlowNode for args [List element] | semmle.label | ControlFlowNode for args [List element] |
-| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1436:26:1436:29 | ControlFlowNode for args [List element] | semmle.label | ControlFlowNode for args [List element] |
-| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1446:34:1446:37 | ControlFlowNode for args [List element] | semmle.label | ControlFlowNode for args [List element] |
-| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1497:32:1497:35 | ControlFlowNode for args [List element] | semmle.label | ControlFlowNode for args [List element] |
-| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1512:35:1512:38 | ControlFlowNode for args [List element] | semmle.label | ControlFlowNode for args [List element] |
-| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1550:53:1550:56 | ControlFlowNode for args [List element] | semmle.label | ControlFlowNode for args [List element] |
-| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1556:14:1557:35 | ControlFlowNode for _logRecordFactory() [Attribute args] | semmle.label | ControlFlowNode for _logRecordFactory() [Attribute args] |
-| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1556:59:1556:62 | ControlFlowNode for args [List element] | semmle.label | ControlFlowNode for args [List element] |
-| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1563:16:1563:17 | ControlFlowNode for rv [Attribute args] | semmle.label | ControlFlowNode for rv [Attribute args] |
-| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1565:32:1565:35 | ControlFlowNode for args [List element] | semmle.label | ControlFlowNode for args [List element] |
-| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1587:18:1588:62 | ControlFlowNode for Attribute() [Attribute args] | semmle.label | ControlFlowNode for Attribute() [Attribute args] |
-| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1587:66:1587:69 | ControlFlowNode for args [List element] | semmle.label | ControlFlowNode for args [List element] |
-| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1589:21:1589:26 | ControlFlowNode for record [Attribute args] | semmle.label | ControlFlowNode for record [Attribute args] |
-| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1591:22:1591:27 | ControlFlowNode for record [Attribute args] | semmle.label | ControlFlowNode for record [Attribute args] |
-| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1599:31:1599:36 | ControlFlowNode for record [Attribute args] | semmle.label | ControlFlowNode for record [Attribute args] |
-| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1645:28:1645:33 | ControlFlowNode for record [Attribute args] | semmle.label | ControlFlowNode for record [Attribute args] |
-| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1669:39:1669:44 | ControlFlowNode for record [Attribute args] | semmle.label | ControlFlowNode for record [Attribute args] |
-| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:2089:16:2089:19 | ControlFlowNode for args [List element] | semmle.label | ControlFlowNode for args [List element] |
-| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:2097:21:2097:24 | ControlFlowNode for args [List element] | semmle.label | ControlFlowNode for args [List element] |
-| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:2099:17:2099:20 | ControlFlowNode for args [List element] | semmle.label | ControlFlowNode for args [List element] |
-| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:2107:22:2107:25 | ControlFlowNode for args [List element] | semmle.label | ControlFlowNode for args [List element] |
| test.py:19:16:19:29 | ControlFlowNode for get_password() | semmle.label | ControlFlowNode for get_password() |
| test.py:20:48:20:55 | ControlFlowNode for password | semmle.label | ControlFlowNode for password |
-| test.py:20:48:20:55 | ControlFlowNode for password | semmle.label | ControlFlowNode for password |
-| test.py:22:58:22:65 | ControlFlowNode for password | semmle.label | ControlFlowNode for password |
| test.py:22:58:22:65 | ControlFlowNode for password | semmle.label | ControlFlowNode for password |
| test.py:23:58:23:65 | ControlFlowNode for password | semmle.label | ControlFlowNode for password |
-| test.py:23:58:23:65 | ControlFlowNode for password | semmle.label | ControlFlowNode for password |
-| test.py:27:40:27:47 | ControlFlowNode for password | semmle.label | ControlFlowNode for password |
| test.py:27:40:27:47 | ControlFlowNode for password | semmle.label | ControlFlowNode for password |
| test.py:30:58:30:65 | ControlFlowNode for password | semmle.label | ControlFlowNode for password |
-| test.py:30:58:30:65 | ControlFlowNode for password | semmle.label | ControlFlowNode for password |
-| test.py:34:30:34:39 | ControlFlowNode for get_cert() | semmle.label | ControlFlowNode for get_cert() |
| test.py:34:30:34:39 | ControlFlowNode for get_cert() | semmle.label | ControlFlowNode for get_cert() |
| test.py:37:11:37:24 | ControlFlowNode for get_password() | semmle.label | ControlFlowNode for get_password() |
| test.py:39:22:39:35 | ControlFlowNode for get_password() | semmle.label | ControlFlowNode for get_password() |
@@ -114,11 +21,7 @@ nodes
| test.py:67:21:67:37 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
| test.py:69:11:69:31 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
subpaths
-| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1556:59:1556:62 | ControlFlowNode for args [List element] | file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:283:23:283:26 | ControlFlowNode for args [List element] | file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:311:9:311:12 | [post] ControlFlowNode for self [Attribute args] | file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1556:14:1557:35 | ControlFlowNode for _logRecordFactory() [Attribute args] |
-| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1587:66:1587:69 | ControlFlowNode for args [List element] | file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1550:53:1550:56 | ControlFlowNode for args [List element] | file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1563:16:1563:17 | ControlFlowNode for rv [Attribute args] | file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1587:18:1588:62 | ControlFlowNode for Attribute() [Attribute args] |
#select
-| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1021:38:1023:70 | ControlFlowNode for BinaryExpr | test.py:19:16:19:29 | ControlFlowNode for get_password() | file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1021:38:1023:70 | ControlFlowNode for BinaryExpr | This expression logs $@ as clear text. | test.py:19:16:19:29 | ControlFlowNode for get_password() | sensitive data (password) |
-| file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1021:38:1023:70 | ControlFlowNode for BinaryExpr | test.py:34:30:34:39 | ControlFlowNode for get_cert() | file:///home/rasmus/.pyenv/versions/3.9.5/lib/python3.9/logging/__init__.py:1021:38:1023:70 | ControlFlowNode for BinaryExpr | This expression logs $@ as clear text. | test.py:34:30:34:39 | ControlFlowNode for get_cert() | sensitive data (certificate) |
| test.py:20:48:20:55 | ControlFlowNode for password | test.py:19:16:19:29 | ControlFlowNode for get_password() | test.py:20:48:20:55 | ControlFlowNode for password | This expression logs $@ as clear text. | test.py:19:16:19:29 | ControlFlowNode for get_password() | sensitive data (password) |
| test.py:22:58:22:65 | ControlFlowNode for password | test.py:19:16:19:29 | ControlFlowNode for get_password() | test.py:22:58:22:65 | ControlFlowNode for password | This expression logs $@ as clear text. | test.py:19:16:19:29 | ControlFlowNode for get_password() | sensitive data (password) |
| test.py:23:58:23:65 | ControlFlowNode for password | test.py:19:16:19:29 | ControlFlowNode for get_password() | test.py:23:58:23:65 | ControlFlowNode for password | This expression logs $@ as clear text. | test.py:19:16:19:29 | ControlFlowNode for get_password() | sensitive data (password) |
From 972cfa5cf6480005b58da576edb2cfc47c6d3330 Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Wed, 2 Nov 2022 09:49:32 +0100
Subject: [PATCH 063/415] Python: Accept bad `StackTraceExposure.expected`
This is only Python 2 though
---
.../StackTraceExposure.expected | 110 ++++++++++++++++++
1 file changed, 110 insertions(+)
diff --git a/python/ql/test/query-tests/Security/CWE-209-StackTraceExposure/StackTraceExposure.expected b/python/ql/test/query-tests/Security/CWE-209-StackTraceExposure/StackTraceExposure.expected
index 07b208caaac..23ba9142daa 100644
--- a/python/ql/test/query-tests/Security/CWE-209-StackTraceExposure/StackTraceExposure.expected
+++ b/python/ql/test/query-tests/Security/CWE-209-StackTraceExposure/StackTraceExposure.expected
@@ -1,4 +1,56 @@
edges
+| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:130:22:130:26 | ControlFlowNode for etype | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:144:41:144:45 | ControlFlowNode for etype |
+| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:130:29:130:33 | ControlFlowNode for value | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:144:48:144:52 | ControlFlowNode for value |
+| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:144:19:144:53 | ControlFlowNode for format_exception_only() | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:145:12:145:15 | ControlFlowNode for list |
+| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:144:19:144:53 | ControlFlowNode for format_exception_only() | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:145:12:145:15 | ControlFlowNode for list |
+| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:144:41:144:45 | ControlFlowNode for etype | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:144:19:144:53 | ControlFlowNode for format_exception_only() |
+| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:144:41:144:45 | ControlFlowNode for etype | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:147:27:147:31 | ControlFlowNode for etype |
+| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:144:48:144:52 | ControlFlowNode for value | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:144:19:144:53 | ControlFlowNode for format_exception_only() |
+| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:144:48:144:52 | ControlFlowNode for value | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:147:34:147:38 | ControlFlowNode for value |
+| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:147:27:147:31 | ControlFlowNode for etype | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:173:40:173:44 | ControlFlowNode for etype |
+| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:147:34:147:38 | ControlFlowNode for value | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:173:47:173:51 | ControlFlowNode for value |
+| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:147:34:147:38 | ControlFlowNode for value | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:178:47:178:51 | ControlFlowNode for value |
+| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:147:34:147:38 | ControlFlowNode for value | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:200:48:200:52 | ControlFlowNode for value |
+| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:173:17:173:52 | ControlFlowNode for _format_final_exc_line() | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:173:16:173:53 | ControlFlowNode for List |
+| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:173:17:173:52 | ControlFlowNode for _format_final_exc_line() | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:173:16:173:53 | ControlFlowNode for List |
+| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:173:40:173:44 | ControlFlowNode for etype | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:173:17:173:52 | ControlFlowNode for _format_final_exc_line() |
+| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:173:40:173:44 | ControlFlowNode for etype | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:203:28:203:32 | ControlFlowNode for etype |
+| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:173:47:173:51 | ControlFlowNode for value | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:173:17:173:52 | ControlFlowNode for _format_final_exc_line() |
+| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:173:47:173:51 | ControlFlowNode for value | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:203:35:203:39 | ControlFlowNode for value |
+| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:178:17:178:52 | ControlFlowNode for _format_final_exc_line() | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:178:16:178:53 | ControlFlowNode for List |
+| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:178:47:178:51 | ControlFlowNode for value | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:178:17:178:52 | ControlFlowNode for _format_final_exc_line() |
+| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:178:47:178:51 | ControlFlowNode for value | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:203:35:203:39 | ControlFlowNode for value |
+| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:200:18:200:53 | ControlFlowNode for _format_final_exc_line() | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:201:12:201:16 | ControlFlowNode for lines |
+| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:200:48:200:52 | ControlFlowNode for value | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:200:18:200:53 | ControlFlowNode for _format_final_exc_line() |
+| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:200:48:200:52 | ControlFlowNode for value | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:203:35:203:39 | ControlFlowNode for value |
+| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:203:28:203:32 | ControlFlowNode for etype | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:209:30:209:44 | ControlFlowNode for Tuple |
+| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:203:28:203:32 | ControlFlowNode for etype | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:210:12:210:15 | ControlFlowNode for line |
+| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:203:35:203:39 | ControlFlowNode for value | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:205:26:205:30 | ControlFlowNode for value |
+| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:205:16:205:31 | ControlFlowNode for _some_str() | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:209:30:209:44 | ControlFlowNode for Tuple |
+| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:205:26:205:30 | ControlFlowNode for value | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:205:16:205:31 | ControlFlowNode for _some_str() |
+| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:205:26:205:30 | ControlFlowNode for value | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:212:15:212:19 | ControlFlowNode for value |
+| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:209:30:209:44 | ControlFlowNode for Tuple | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:210:12:210:15 | ControlFlowNode for line |
+| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:209:30:209:44 | ControlFlowNode for Tuple | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:210:12:210:15 | ControlFlowNode for line |
+| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:212:15:212:19 | ControlFlowNode for value | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:214:16:214:25 | ControlFlowNode for str() |
+| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:212:15:212:19 | ControlFlowNode for value | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:219:16:219:56 | ControlFlowNode for Attribute() |
+| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:241:9:241:13 | SSA variable etype | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:242:41:242:45 | ControlFlowNode for etype |
+| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:241:9:241:24 | ControlFlowNode for Tuple | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:241:9:241:13 | SSA variable etype |
+| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:241:9:241:24 | ControlFlowNode for Tuple | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:241:16:241:20 | SSA variable value |
+| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:241:9:241:24 | ControlFlowNode for Tuple [Tuple element at index 0] | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:241:9:241:13 | SSA variable etype |
+| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:241:9:241:24 | ControlFlowNode for Tuple [Tuple element at index 1] | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:241:16:241:20 | SSA variable value |
+| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:241:9:241:24 | IterableElement | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:241:9:241:24 | ControlFlowNode for Tuple |
+| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:241:9:241:24 | IterableElement | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:241:9:241:24 | ControlFlowNode for Tuple [Tuple element at index 0] |
+| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:241:9:241:24 | IterableElement | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:241:9:241:24 | ControlFlowNode for Tuple [Tuple element at index 1] |
+| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:241:16:241:20 | SSA variable value | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:242:48:242:52 | ControlFlowNode for value |
+| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:241:28:241:41 | ControlFlowNode for Attribute() | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:241:9:241:24 | ControlFlowNode for Tuple |
+| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:241:28:241:41 | ControlFlowNode for Attribute() | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:241:9:241:24 | IterableElement |
+| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:242:16:242:65 | ControlFlowNode for Attribute() | test.py:16:16:16:37 | ControlFlowNode for Attribute() |
+| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:242:16:242:65 | ControlFlowNode for Attribute() | test.py:49:15:49:36 | ControlFlowNode for Attribute() |
+| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:242:24:242:64 | ControlFlowNode for format_exception() | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:242:16:242:65 | ControlFlowNode for Attribute() |
+| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:242:41:242:45 | ControlFlowNode for etype | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:130:22:130:26 | ControlFlowNode for etype |
+| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:242:41:242:45 | ControlFlowNode for etype | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:242:24:242:64 | ControlFlowNode for format_exception() |
+| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:242:48:242:52 | ControlFlowNode for value | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:130:29:130:33 | ControlFlowNode for value |
+| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:242:48:242:52 | ControlFlowNode for value | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:242:24:242:64 | ControlFlowNode for format_exception() |
| test.py:23:25:23:25 | SSA variable e | test.py:24:16:24:16 | ControlFlowNode for e |
| test.py:31:25:31:25 | SSA variable e | test.py:32:16:32:30 | ControlFlowNode for Attribute |
| test.py:49:15:49:36 | ControlFlowNode for Attribute() | test.py:50:29:50:31 | ControlFlowNode for err |
@@ -7,6 +59,50 @@ edges
| test.py:52:18:52:20 | ControlFlowNode for msg | test.py:53:12:53:27 | ControlFlowNode for BinaryExpr |
| test.py:65:25:65:25 | SSA variable e | test.py:66:24:66:40 | ControlFlowNode for Dict |
nodes
+| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:130:22:130:26 | ControlFlowNode for etype | semmle.label | ControlFlowNode for etype |
+| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:130:29:130:33 | ControlFlowNode for value | semmle.label | ControlFlowNode for value |
+| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:144:19:144:53 | ControlFlowNode for format_exception_only() | semmle.label | ControlFlowNode for format_exception_only() |
+| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:144:19:144:53 | ControlFlowNode for format_exception_only() | semmle.label | ControlFlowNode for format_exception_only() |
+| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:144:41:144:45 | ControlFlowNode for etype | semmle.label | ControlFlowNode for etype |
+| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:144:48:144:52 | ControlFlowNode for value | semmle.label | ControlFlowNode for value |
+| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:145:12:145:15 | ControlFlowNode for list | semmle.label | ControlFlowNode for list |
+| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:145:12:145:15 | ControlFlowNode for list | semmle.label | ControlFlowNode for list |
+| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:147:27:147:31 | ControlFlowNode for etype | semmle.label | ControlFlowNode for etype |
+| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:147:34:147:38 | ControlFlowNode for value | semmle.label | ControlFlowNode for value |
+| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:173:16:173:53 | ControlFlowNode for List | semmle.label | ControlFlowNode for List |
+| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:173:16:173:53 | ControlFlowNode for List | semmle.label | ControlFlowNode for List |
+| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:173:17:173:52 | ControlFlowNode for _format_final_exc_line() | semmle.label | ControlFlowNode for _format_final_exc_line() |
+| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:173:17:173:52 | ControlFlowNode for _format_final_exc_line() | semmle.label | ControlFlowNode for _format_final_exc_line() |
+| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:173:40:173:44 | ControlFlowNode for etype | semmle.label | ControlFlowNode for etype |
+| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:173:47:173:51 | ControlFlowNode for value | semmle.label | ControlFlowNode for value |
+| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:178:16:178:53 | ControlFlowNode for List | semmle.label | ControlFlowNode for List |
+| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:178:17:178:52 | ControlFlowNode for _format_final_exc_line() | semmle.label | ControlFlowNode for _format_final_exc_line() |
+| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:178:47:178:51 | ControlFlowNode for value | semmle.label | ControlFlowNode for value |
+| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:200:18:200:53 | ControlFlowNode for _format_final_exc_line() | semmle.label | ControlFlowNode for _format_final_exc_line() |
+| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:200:48:200:52 | ControlFlowNode for value | semmle.label | ControlFlowNode for value |
+| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:201:12:201:16 | ControlFlowNode for lines | semmle.label | ControlFlowNode for lines |
+| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:203:28:203:32 | ControlFlowNode for etype | semmle.label | ControlFlowNode for etype |
+| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:203:35:203:39 | ControlFlowNode for value | semmle.label | ControlFlowNode for value |
+| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:205:16:205:31 | ControlFlowNode for _some_str() | semmle.label | ControlFlowNode for _some_str() |
+| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:205:26:205:30 | ControlFlowNode for value | semmle.label | ControlFlowNode for value |
+| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:209:30:209:44 | ControlFlowNode for Tuple | semmle.label | ControlFlowNode for Tuple |
+| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:209:30:209:44 | ControlFlowNode for Tuple | semmle.label | ControlFlowNode for Tuple |
+| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:210:12:210:15 | ControlFlowNode for line | semmle.label | ControlFlowNode for line |
+| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:210:12:210:15 | ControlFlowNode for line | semmle.label | ControlFlowNode for line |
+| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:212:15:212:19 | ControlFlowNode for value | semmle.label | ControlFlowNode for value |
+| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:214:16:214:25 | ControlFlowNode for str() | semmle.label | ControlFlowNode for str() |
+| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:219:16:219:56 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
+| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:241:9:241:13 | SSA variable etype | semmle.label | SSA variable etype |
+| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:241:9:241:24 | ControlFlowNode for Tuple | semmle.label | ControlFlowNode for Tuple |
+| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:241:9:241:24 | ControlFlowNode for Tuple [Tuple element at index 0] | semmle.label | ControlFlowNode for Tuple [Tuple element at index 0] |
+| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:241:9:241:24 | ControlFlowNode for Tuple [Tuple element at index 1] | semmle.label | ControlFlowNode for Tuple [Tuple element at index 1] |
+| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:241:9:241:24 | IterableElement | semmle.label | IterableElement |
+| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:241:16:241:20 | SSA variable value | semmle.label | SSA variable value |
+| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:241:28:241:41 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
+| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:242:16:242:65 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
+| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:242:24:242:64 | ControlFlowNode for format_exception() | semmle.label | ControlFlowNode for format_exception() |
+| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:242:41:242:45 | ControlFlowNode for etype | semmle.label | ControlFlowNode for etype |
+| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:242:48:242:52 | ControlFlowNode for value | semmle.label | ControlFlowNode for value |
| test.py:16:16:16:37 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
| test.py:23:25:23:25 | SSA variable e | semmle.label | SSA variable e |
| test.py:24:16:24:16 | ControlFlowNode for e | semmle.label | ControlFlowNode for e |
@@ -20,10 +116,24 @@ nodes
| test.py:65:25:65:25 | SSA variable e | semmle.label | SSA variable e |
| test.py:66:24:66:40 | ControlFlowNode for Dict | semmle.label | ControlFlowNode for Dict |
subpaths
+| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:144:41:144:45 | ControlFlowNode for etype | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:147:27:147:31 | ControlFlowNode for etype | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:173:16:173:53 | ControlFlowNode for List | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:144:19:144:53 | ControlFlowNode for format_exception_only() |
+| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:144:48:144:52 | ControlFlowNode for value | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:147:34:147:38 | ControlFlowNode for value | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:173:16:173:53 | ControlFlowNode for List | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:144:19:144:53 | ControlFlowNode for format_exception_only() |
+| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:144:48:144:52 | ControlFlowNode for value | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:147:34:147:38 | ControlFlowNode for value | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:178:16:178:53 | ControlFlowNode for List | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:144:19:144:53 | ControlFlowNode for format_exception_only() |
+| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:144:48:144:52 | ControlFlowNode for value | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:147:34:147:38 | ControlFlowNode for value | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:201:12:201:16 | ControlFlowNode for lines | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:144:19:144:53 | ControlFlowNode for format_exception_only() |
+| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:173:40:173:44 | ControlFlowNode for etype | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:203:28:203:32 | ControlFlowNode for etype | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:210:12:210:15 | ControlFlowNode for line | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:173:17:173:52 | ControlFlowNode for _format_final_exc_line() |
+| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:173:47:173:51 | ControlFlowNode for value | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:203:35:203:39 | ControlFlowNode for value | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:210:12:210:15 | ControlFlowNode for line | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:173:17:173:52 | ControlFlowNode for _format_final_exc_line() |
+| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:178:47:178:51 | ControlFlowNode for value | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:203:35:203:39 | ControlFlowNode for value | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:210:12:210:15 | ControlFlowNode for line | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:178:17:178:52 | ControlFlowNode for _format_final_exc_line() |
+| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:200:48:200:52 | ControlFlowNode for value | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:203:35:203:39 | ControlFlowNode for value | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:210:12:210:15 | ControlFlowNode for line | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:200:18:200:53 | ControlFlowNode for _format_final_exc_line() |
+| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:205:26:205:30 | ControlFlowNode for value | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:212:15:212:19 | ControlFlowNode for value | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:214:16:214:25 | ControlFlowNode for str() | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:205:16:205:31 | ControlFlowNode for _some_str() |
+| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:205:26:205:30 | ControlFlowNode for value | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:212:15:212:19 | ControlFlowNode for value | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:219:16:219:56 | ControlFlowNode for Attribute() | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:205:16:205:31 | ControlFlowNode for _some_str() |
+| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:242:41:242:45 | ControlFlowNode for etype | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:130:22:130:26 | ControlFlowNode for etype | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:145:12:145:15 | ControlFlowNode for list | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:242:24:242:64 | ControlFlowNode for format_exception() |
+| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:242:48:242:52 | ControlFlowNode for value | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:130:29:130:33 | ControlFlowNode for value | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:145:12:145:15 | ControlFlowNode for list | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:242:24:242:64 | ControlFlowNode for format_exception() |
| test.py:50:29:50:31 | ControlFlowNode for err | test.py:52:18:52:20 | ControlFlowNode for msg | test.py:53:12:53:27 | ControlFlowNode for BinaryExpr | test.py:50:16:50:32 | ControlFlowNode for format_error() |
#select
+| test.py:16:16:16:37 | ControlFlowNode for Attribute() | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:241:28:241:41 | ControlFlowNode for Attribute() | test.py:16:16:16:37 | ControlFlowNode for Attribute() | $@ flows to this location and may be exposed to an external user. | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:241:28:241:41 | ControlFlowNode for Attribute() | Stack trace information |
| test.py:16:16:16:37 | ControlFlowNode for Attribute() | test.py:16:16:16:37 | ControlFlowNode for Attribute() | test.py:16:16:16:37 | ControlFlowNode for Attribute() | $@ flows to this location and may be exposed to an external user. | test.py:16:16:16:37 | ControlFlowNode for Attribute() | Stack trace information |
| test.py:24:16:24:16 | ControlFlowNode for e | test.py:23:25:23:25 | SSA variable e | test.py:24:16:24:16 | ControlFlowNode for e | $@ flows to this location and may be exposed to an external user. | test.py:23:25:23:25 | SSA variable e | Stack trace information |
| test.py:32:16:32:30 | ControlFlowNode for Attribute | test.py:31:25:31:25 | SSA variable e | test.py:32:16:32:30 | ControlFlowNode for Attribute | $@ flows to this location and may be exposed to an external user. | test.py:31:25:31:25 | SSA variable e | Stack trace information |
+| test.py:50:16:50:32 | ControlFlowNode for format_error() | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:241:28:241:41 | ControlFlowNode for Attribute() | test.py:50:16:50:32 | ControlFlowNode for format_error() | $@ flows to this location and may be exposed to an external user. | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:241:28:241:41 | ControlFlowNode for Attribute() | Stack trace information |
| test.py:50:16:50:32 | ControlFlowNode for format_error() | test.py:49:15:49:36 | ControlFlowNode for Attribute() | test.py:50:16:50:32 | ControlFlowNode for format_error() | $@ flows to this location and may be exposed to an external user. | test.py:49:15:49:36 | ControlFlowNode for Attribute() | Stack trace information |
| test.py:66:24:66:40 | ControlFlowNode for Dict | test.py:65:25:65:25 | SSA variable e | test.py:66:24:66:40 | ControlFlowNode for Dict | $@ flows to this location and may be exposed to an external user. | test.py:65:25:65:25 | SSA variable e | Stack trace information |
From 6646e98d20971983dd507063d70038bfca9ccb78 Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Wed, 2 Nov 2022 09:55:59 +0100
Subject: [PATCH 064/415] Python: Fix results outside DB for
`StackTraceExposure`
---
.../StackTraceExposureCustomizations.qll | 26 ++++-
.../StackTraceExposure.expected | 110 ------------------
2 files changed, 25 insertions(+), 111 deletions(-)
diff --git a/python/ql/lib/semmle/python/security/dataflow/StackTraceExposureCustomizations.qll b/python/ql/lib/semmle/python/security/dataflow/StackTraceExposureCustomizations.qll
index 09f84327a63..6d0b9dc5c77 100644
--- a/python/ql/lib/semmle/python/security/dataflow/StackTraceExposureCustomizations.qll
+++ b/python/ql/lib/semmle/python/security/dataflow/StackTraceExposureCustomizations.qll
@@ -42,7 +42,31 @@ module StackTraceExposure {
* A source of exception info, considered as a flow source.
*/
class ExceptionInfoAsSource extends Source {
- ExceptionInfoAsSource() { this instanceof ExceptionInfo }
+ ExceptionInfoAsSource() {
+ this instanceof ExceptionInfo and
+ // since `traceback.format_exc()` in Python 2 is internally implemented as
+ // ```py
+ // def format_exc(limit=None):
+ // """Like print_exc() but return a string."""
+ // try:
+ // etype, value, tb = sys.exc_info()
+ // return ''.join(format_exception(etype, value, tb, limit))
+ // finally:
+ // etype = value = tb = None
+ // ```
+ // any time we would report flow to such from a call to format_exc, we can ALSO report
+ // the flow from the `sys.exc_info()` source -- obviously we don't want that.
+ //
+ //
+ // To avoid this, we use the same approach as for sinks in the command injection
+ // query (and others).
+ not exists(Module traceback |
+ traceback.getName() = "traceback" and
+ this.getScope().getEnclosingModule() = traceback and
+ // do allow this call if we're analyzing traceback.py as part of CPython though
+ not exists(traceback.getFile().getRelativePath())
+ )
+ }
}
/**
diff --git a/python/ql/test/query-tests/Security/CWE-209-StackTraceExposure/StackTraceExposure.expected b/python/ql/test/query-tests/Security/CWE-209-StackTraceExposure/StackTraceExposure.expected
index 23ba9142daa..07b208caaac 100644
--- a/python/ql/test/query-tests/Security/CWE-209-StackTraceExposure/StackTraceExposure.expected
+++ b/python/ql/test/query-tests/Security/CWE-209-StackTraceExposure/StackTraceExposure.expected
@@ -1,56 +1,4 @@
edges
-| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:130:22:130:26 | ControlFlowNode for etype | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:144:41:144:45 | ControlFlowNode for etype |
-| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:130:29:130:33 | ControlFlowNode for value | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:144:48:144:52 | ControlFlowNode for value |
-| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:144:19:144:53 | ControlFlowNode for format_exception_only() | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:145:12:145:15 | ControlFlowNode for list |
-| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:144:19:144:53 | ControlFlowNode for format_exception_only() | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:145:12:145:15 | ControlFlowNode for list |
-| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:144:41:144:45 | ControlFlowNode for etype | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:144:19:144:53 | ControlFlowNode for format_exception_only() |
-| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:144:41:144:45 | ControlFlowNode for etype | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:147:27:147:31 | ControlFlowNode for etype |
-| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:144:48:144:52 | ControlFlowNode for value | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:144:19:144:53 | ControlFlowNode for format_exception_only() |
-| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:144:48:144:52 | ControlFlowNode for value | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:147:34:147:38 | ControlFlowNode for value |
-| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:147:27:147:31 | ControlFlowNode for etype | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:173:40:173:44 | ControlFlowNode for etype |
-| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:147:34:147:38 | ControlFlowNode for value | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:173:47:173:51 | ControlFlowNode for value |
-| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:147:34:147:38 | ControlFlowNode for value | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:178:47:178:51 | ControlFlowNode for value |
-| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:147:34:147:38 | ControlFlowNode for value | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:200:48:200:52 | ControlFlowNode for value |
-| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:173:17:173:52 | ControlFlowNode for _format_final_exc_line() | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:173:16:173:53 | ControlFlowNode for List |
-| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:173:17:173:52 | ControlFlowNode for _format_final_exc_line() | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:173:16:173:53 | ControlFlowNode for List |
-| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:173:40:173:44 | ControlFlowNode for etype | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:173:17:173:52 | ControlFlowNode for _format_final_exc_line() |
-| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:173:40:173:44 | ControlFlowNode for etype | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:203:28:203:32 | ControlFlowNode for etype |
-| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:173:47:173:51 | ControlFlowNode for value | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:173:17:173:52 | ControlFlowNode for _format_final_exc_line() |
-| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:173:47:173:51 | ControlFlowNode for value | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:203:35:203:39 | ControlFlowNode for value |
-| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:178:17:178:52 | ControlFlowNode for _format_final_exc_line() | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:178:16:178:53 | ControlFlowNode for List |
-| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:178:47:178:51 | ControlFlowNode for value | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:178:17:178:52 | ControlFlowNode for _format_final_exc_line() |
-| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:178:47:178:51 | ControlFlowNode for value | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:203:35:203:39 | ControlFlowNode for value |
-| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:200:18:200:53 | ControlFlowNode for _format_final_exc_line() | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:201:12:201:16 | ControlFlowNode for lines |
-| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:200:48:200:52 | ControlFlowNode for value | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:200:18:200:53 | ControlFlowNode for _format_final_exc_line() |
-| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:200:48:200:52 | ControlFlowNode for value | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:203:35:203:39 | ControlFlowNode for value |
-| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:203:28:203:32 | ControlFlowNode for etype | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:209:30:209:44 | ControlFlowNode for Tuple |
-| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:203:28:203:32 | ControlFlowNode for etype | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:210:12:210:15 | ControlFlowNode for line |
-| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:203:35:203:39 | ControlFlowNode for value | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:205:26:205:30 | ControlFlowNode for value |
-| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:205:16:205:31 | ControlFlowNode for _some_str() | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:209:30:209:44 | ControlFlowNode for Tuple |
-| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:205:26:205:30 | ControlFlowNode for value | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:205:16:205:31 | ControlFlowNode for _some_str() |
-| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:205:26:205:30 | ControlFlowNode for value | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:212:15:212:19 | ControlFlowNode for value |
-| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:209:30:209:44 | ControlFlowNode for Tuple | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:210:12:210:15 | ControlFlowNode for line |
-| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:209:30:209:44 | ControlFlowNode for Tuple | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:210:12:210:15 | ControlFlowNode for line |
-| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:212:15:212:19 | ControlFlowNode for value | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:214:16:214:25 | ControlFlowNode for str() |
-| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:212:15:212:19 | ControlFlowNode for value | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:219:16:219:56 | ControlFlowNode for Attribute() |
-| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:241:9:241:13 | SSA variable etype | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:242:41:242:45 | ControlFlowNode for etype |
-| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:241:9:241:24 | ControlFlowNode for Tuple | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:241:9:241:13 | SSA variable etype |
-| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:241:9:241:24 | ControlFlowNode for Tuple | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:241:16:241:20 | SSA variable value |
-| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:241:9:241:24 | ControlFlowNode for Tuple [Tuple element at index 0] | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:241:9:241:13 | SSA variable etype |
-| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:241:9:241:24 | ControlFlowNode for Tuple [Tuple element at index 1] | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:241:16:241:20 | SSA variable value |
-| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:241:9:241:24 | IterableElement | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:241:9:241:24 | ControlFlowNode for Tuple |
-| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:241:9:241:24 | IterableElement | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:241:9:241:24 | ControlFlowNode for Tuple [Tuple element at index 0] |
-| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:241:9:241:24 | IterableElement | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:241:9:241:24 | ControlFlowNode for Tuple [Tuple element at index 1] |
-| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:241:16:241:20 | SSA variable value | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:242:48:242:52 | ControlFlowNode for value |
-| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:241:28:241:41 | ControlFlowNode for Attribute() | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:241:9:241:24 | ControlFlowNode for Tuple |
-| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:241:28:241:41 | ControlFlowNode for Attribute() | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:241:9:241:24 | IterableElement |
-| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:242:16:242:65 | ControlFlowNode for Attribute() | test.py:16:16:16:37 | ControlFlowNode for Attribute() |
-| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:242:16:242:65 | ControlFlowNode for Attribute() | test.py:49:15:49:36 | ControlFlowNode for Attribute() |
-| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:242:24:242:64 | ControlFlowNode for format_exception() | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:242:16:242:65 | ControlFlowNode for Attribute() |
-| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:242:41:242:45 | ControlFlowNode for etype | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:130:22:130:26 | ControlFlowNode for etype |
-| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:242:41:242:45 | ControlFlowNode for etype | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:242:24:242:64 | ControlFlowNode for format_exception() |
-| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:242:48:242:52 | ControlFlowNode for value | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:130:29:130:33 | ControlFlowNode for value |
-| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:242:48:242:52 | ControlFlowNode for value | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:242:24:242:64 | ControlFlowNode for format_exception() |
| test.py:23:25:23:25 | SSA variable e | test.py:24:16:24:16 | ControlFlowNode for e |
| test.py:31:25:31:25 | SSA variable e | test.py:32:16:32:30 | ControlFlowNode for Attribute |
| test.py:49:15:49:36 | ControlFlowNode for Attribute() | test.py:50:29:50:31 | ControlFlowNode for err |
@@ -59,50 +7,6 @@ edges
| test.py:52:18:52:20 | ControlFlowNode for msg | test.py:53:12:53:27 | ControlFlowNode for BinaryExpr |
| test.py:65:25:65:25 | SSA variable e | test.py:66:24:66:40 | ControlFlowNode for Dict |
nodes
-| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:130:22:130:26 | ControlFlowNode for etype | semmle.label | ControlFlowNode for etype |
-| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:130:29:130:33 | ControlFlowNode for value | semmle.label | ControlFlowNode for value |
-| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:144:19:144:53 | ControlFlowNode for format_exception_only() | semmle.label | ControlFlowNode for format_exception_only() |
-| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:144:19:144:53 | ControlFlowNode for format_exception_only() | semmle.label | ControlFlowNode for format_exception_only() |
-| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:144:41:144:45 | ControlFlowNode for etype | semmle.label | ControlFlowNode for etype |
-| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:144:48:144:52 | ControlFlowNode for value | semmle.label | ControlFlowNode for value |
-| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:145:12:145:15 | ControlFlowNode for list | semmle.label | ControlFlowNode for list |
-| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:145:12:145:15 | ControlFlowNode for list | semmle.label | ControlFlowNode for list |
-| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:147:27:147:31 | ControlFlowNode for etype | semmle.label | ControlFlowNode for etype |
-| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:147:34:147:38 | ControlFlowNode for value | semmle.label | ControlFlowNode for value |
-| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:173:16:173:53 | ControlFlowNode for List | semmle.label | ControlFlowNode for List |
-| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:173:16:173:53 | ControlFlowNode for List | semmle.label | ControlFlowNode for List |
-| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:173:17:173:52 | ControlFlowNode for _format_final_exc_line() | semmle.label | ControlFlowNode for _format_final_exc_line() |
-| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:173:17:173:52 | ControlFlowNode for _format_final_exc_line() | semmle.label | ControlFlowNode for _format_final_exc_line() |
-| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:173:40:173:44 | ControlFlowNode for etype | semmle.label | ControlFlowNode for etype |
-| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:173:47:173:51 | ControlFlowNode for value | semmle.label | ControlFlowNode for value |
-| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:178:16:178:53 | ControlFlowNode for List | semmle.label | ControlFlowNode for List |
-| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:178:17:178:52 | ControlFlowNode for _format_final_exc_line() | semmle.label | ControlFlowNode for _format_final_exc_line() |
-| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:178:47:178:51 | ControlFlowNode for value | semmle.label | ControlFlowNode for value |
-| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:200:18:200:53 | ControlFlowNode for _format_final_exc_line() | semmle.label | ControlFlowNode for _format_final_exc_line() |
-| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:200:48:200:52 | ControlFlowNode for value | semmle.label | ControlFlowNode for value |
-| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:201:12:201:16 | ControlFlowNode for lines | semmle.label | ControlFlowNode for lines |
-| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:203:28:203:32 | ControlFlowNode for etype | semmle.label | ControlFlowNode for etype |
-| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:203:35:203:39 | ControlFlowNode for value | semmle.label | ControlFlowNode for value |
-| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:205:16:205:31 | ControlFlowNode for _some_str() | semmle.label | ControlFlowNode for _some_str() |
-| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:205:26:205:30 | ControlFlowNode for value | semmle.label | ControlFlowNode for value |
-| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:209:30:209:44 | ControlFlowNode for Tuple | semmle.label | ControlFlowNode for Tuple |
-| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:209:30:209:44 | ControlFlowNode for Tuple | semmle.label | ControlFlowNode for Tuple |
-| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:210:12:210:15 | ControlFlowNode for line | semmle.label | ControlFlowNode for line |
-| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:210:12:210:15 | ControlFlowNode for line | semmle.label | ControlFlowNode for line |
-| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:212:15:212:19 | ControlFlowNode for value | semmle.label | ControlFlowNode for value |
-| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:214:16:214:25 | ControlFlowNode for str() | semmle.label | ControlFlowNode for str() |
-| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:219:16:219:56 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
-| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:241:9:241:13 | SSA variable etype | semmle.label | SSA variable etype |
-| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:241:9:241:24 | ControlFlowNode for Tuple | semmle.label | ControlFlowNode for Tuple |
-| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:241:9:241:24 | ControlFlowNode for Tuple [Tuple element at index 0] | semmle.label | ControlFlowNode for Tuple [Tuple element at index 0] |
-| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:241:9:241:24 | ControlFlowNode for Tuple [Tuple element at index 1] | semmle.label | ControlFlowNode for Tuple [Tuple element at index 1] |
-| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:241:9:241:24 | IterableElement | semmle.label | IterableElement |
-| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:241:16:241:20 | SSA variable value | semmle.label | SSA variable value |
-| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:241:28:241:41 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
-| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:242:16:242:65 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
-| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:242:24:242:64 | ControlFlowNode for format_exception() | semmle.label | ControlFlowNode for format_exception() |
-| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:242:41:242:45 | ControlFlowNode for etype | semmle.label | ControlFlowNode for etype |
-| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:242:48:242:52 | ControlFlowNode for value | semmle.label | ControlFlowNode for value |
| test.py:16:16:16:37 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
| test.py:23:25:23:25 | SSA variable e | semmle.label | SSA variable e |
| test.py:24:16:24:16 | ControlFlowNode for e | semmle.label | ControlFlowNode for e |
@@ -116,24 +20,10 @@ nodes
| test.py:65:25:65:25 | SSA variable e | semmle.label | SSA variable e |
| test.py:66:24:66:40 | ControlFlowNode for Dict | semmle.label | ControlFlowNode for Dict |
subpaths
-| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:144:41:144:45 | ControlFlowNode for etype | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:147:27:147:31 | ControlFlowNode for etype | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:173:16:173:53 | ControlFlowNode for List | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:144:19:144:53 | ControlFlowNode for format_exception_only() |
-| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:144:48:144:52 | ControlFlowNode for value | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:147:34:147:38 | ControlFlowNode for value | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:173:16:173:53 | ControlFlowNode for List | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:144:19:144:53 | ControlFlowNode for format_exception_only() |
-| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:144:48:144:52 | ControlFlowNode for value | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:147:34:147:38 | ControlFlowNode for value | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:178:16:178:53 | ControlFlowNode for List | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:144:19:144:53 | ControlFlowNode for format_exception_only() |
-| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:144:48:144:52 | ControlFlowNode for value | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:147:34:147:38 | ControlFlowNode for value | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:201:12:201:16 | ControlFlowNode for lines | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:144:19:144:53 | ControlFlowNode for format_exception_only() |
-| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:173:40:173:44 | ControlFlowNode for etype | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:203:28:203:32 | ControlFlowNode for etype | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:210:12:210:15 | ControlFlowNode for line | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:173:17:173:52 | ControlFlowNode for _format_final_exc_line() |
-| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:173:47:173:51 | ControlFlowNode for value | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:203:35:203:39 | ControlFlowNode for value | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:210:12:210:15 | ControlFlowNode for line | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:173:17:173:52 | ControlFlowNode for _format_final_exc_line() |
-| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:178:47:178:51 | ControlFlowNode for value | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:203:35:203:39 | ControlFlowNode for value | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:210:12:210:15 | ControlFlowNode for line | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:178:17:178:52 | ControlFlowNode for _format_final_exc_line() |
-| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:200:48:200:52 | ControlFlowNode for value | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:203:35:203:39 | ControlFlowNode for value | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:210:12:210:15 | ControlFlowNode for line | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:200:18:200:53 | ControlFlowNode for _format_final_exc_line() |
-| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:205:26:205:30 | ControlFlowNode for value | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:212:15:212:19 | ControlFlowNode for value | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:214:16:214:25 | ControlFlowNode for str() | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:205:16:205:31 | ControlFlowNode for _some_str() |
-| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:205:26:205:30 | ControlFlowNode for value | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:212:15:212:19 | ControlFlowNode for value | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:219:16:219:56 | ControlFlowNode for Attribute() | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:205:16:205:31 | ControlFlowNode for _some_str() |
-| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:242:41:242:45 | ControlFlowNode for etype | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:130:22:130:26 | ControlFlowNode for etype | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:145:12:145:15 | ControlFlowNode for list | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:242:24:242:64 | ControlFlowNode for format_exception() |
-| file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:242:48:242:52 | ControlFlowNode for value | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:130:29:130:33 | ControlFlowNode for value | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:145:12:145:15 | ControlFlowNode for list | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:242:24:242:64 | ControlFlowNode for format_exception() |
| test.py:50:29:50:31 | ControlFlowNode for err | test.py:52:18:52:20 | ControlFlowNode for msg | test.py:53:12:53:27 | ControlFlowNode for BinaryExpr | test.py:50:16:50:32 | ControlFlowNode for format_error() |
#select
-| test.py:16:16:16:37 | ControlFlowNode for Attribute() | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:241:28:241:41 | ControlFlowNode for Attribute() | test.py:16:16:16:37 | ControlFlowNode for Attribute() | $@ flows to this location and may be exposed to an external user. | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:241:28:241:41 | ControlFlowNode for Attribute() | Stack trace information |
| test.py:16:16:16:37 | ControlFlowNode for Attribute() | test.py:16:16:16:37 | ControlFlowNode for Attribute() | test.py:16:16:16:37 | ControlFlowNode for Attribute() | $@ flows to this location and may be exposed to an external user. | test.py:16:16:16:37 | ControlFlowNode for Attribute() | Stack trace information |
| test.py:24:16:24:16 | ControlFlowNode for e | test.py:23:25:23:25 | SSA variable e | test.py:24:16:24:16 | ControlFlowNode for e | $@ flows to this location and may be exposed to an external user. | test.py:23:25:23:25 | SSA variable e | Stack trace information |
| test.py:32:16:32:30 | ControlFlowNode for Attribute | test.py:31:25:31:25 | SSA variable e | test.py:32:16:32:30 | ControlFlowNode for Attribute | $@ flows to this location and may be exposed to an external user. | test.py:31:25:31:25 | SSA variable e | Stack trace information |
-| test.py:50:16:50:32 | ControlFlowNode for format_error() | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:241:28:241:41 | ControlFlowNode for Attribute() | test.py:50:16:50:32 | ControlFlowNode for format_error() | $@ flows to this location and may be exposed to an external user. | file:///home/rasmus/.pyenv/versions/2.7.18/lib/python2.7/traceback.py:241:28:241:41 | ControlFlowNode for Attribute() | Stack trace information |
| test.py:50:16:50:32 | ControlFlowNode for format_error() | test.py:49:15:49:36 | ControlFlowNode for Attribute() | test.py:50:16:50:32 | ControlFlowNode for format_error() | $@ flows to this location and may be exposed to an external user. | test.py:49:15:49:36 | ControlFlowNode for Attribute() | Stack trace information |
| test.py:66:24:66:40 | ControlFlowNode for Dict | test.py:65:25:65:25 | SSA variable e | test.py:66:24:66:40 | ControlFlowNode for Dict | $@ flows to this location and may be exposed to an external user. | test.py:65:25:65:25 | SSA variable e | Stack trace information |
From bd46b7deaa0c9715b07f1b116a2f0980d14cdd6e Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Fri, 23 Sep 2022 16:27:23 +0200
Subject: [PATCH 065/415] Python: Cache a few call-graph predicates
We DON'T want to recompute these ones for sure!
---
.../semmle/python/dataflow/new/internal/DataFlowDispatch.qll | 2 ++
1 file changed, 2 insertions(+)
diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
index 55c4b0ae240..26764e96670 100644
--- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
@@ -1039,6 +1039,7 @@ predicate resolveClassInstanceCall(CallNode call, Function target, Node self) {
/**
* Holds if `call` is a call to the `target`, with call-type `type`.
*/
+cached
predicate resolveCall(ControlFlowNode call, Function target, CallType type) {
type instanceof CallTypePlainFunction and
call.(CallNode).getFunction() = functionTracker(target).asCfgNode() and
@@ -1114,6 +1115,7 @@ predicate normalCallArg(CallNode call, Node arg, ArgumentPosition apos) {
* Note: If `Bar.meth` and `Foo.meth` resolves to the same function, we will end up
* sending both `self` arguments to that function, which is by definition the right thing to do.
*/
+cached
predicate getCallArg(
ControlFlowNode call, Function target, CallType type, Node arg, ArgumentPosition apos
) {
From fc0545561e3c6a1de824bea3b5cb063ea296786c Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Mon, 17 Oct 2022 10:52:44 +0200
Subject: [PATCH 066/415] Python: Introduce points-to cached stage
With points-to not being used for the call-graph any longer, it's time
to split them.
---
python/ql/lib/semmle/python/Flow.qll | 2 +-
.../semmle/python/internal/CachedStages.qll | 60 +++++++++++++------
.../semmle/python/objects/ObjectInternal.qll | 2 +-
python/ql/lib/semmle/python/pointsto/Base.qll | 2 +-
.../lib/semmle/python/pointsto/PointsTo.qll | 2 +-
python/ql/lib/semmle/python/types/Object.qll | 6 +-
6 files changed, 49 insertions(+), 25 deletions(-)
diff --git a/python/ql/lib/semmle/python/Flow.qll b/python/ql/lib/semmle/python/Flow.qll
index bd491d527cc..a26a7ac7d8a 100644
--- a/python/ql/lib/semmle/python/Flow.qll
+++ b/python/ql/lib/semmle/python/Flow.qll
@@ -125,7 +125,7 @@ class ControlFlowNode extends @py_flow_node {
/** Gets a textual representation of this element. */
cached
string toString() {
- Stages::DataFlow::ref() and
+ Stages::PointsTo::ref() and
exists(Scope s | s.getEntryNode() = this | result = "Entry node for " + s.toString())
or
exists(Scope s | s.getANormalExit() = this | result = "Exit node for " + s.toString())
diff --git a/python/ql/lib/semmle/python/internal/CachedStages.qll b/python/ql/lib/semmle/python/internal/CachedStages.qll
index 290a90f5a73..8b5701b3115 100644
--- a/python/ql/lib/semmle/python/internal/CachedStages.qll
+++ b/python/ql/lib/semmle/python/internal/CachedStages.qll
@@ -125,6 +125,48 @@ module Stages {
}
}
+ /**
+ * The points-to stage.
+ */
+ cached
+ module PointsTo {
+ /**
+ * Always holds.
+ * Ensures that a predicate is evaluated as part of the points-to stage.
+ */
+ cached
+ predicate ref() { 1 = 1 }
+
+ private import semmle.python.pointsto.Base as PointsToBase
+ private import semmle.python.types.Object as TypeObject
+ private import semmle.python.objects.TObject as TObject
+ private import semmle.python.Flow as Flow
+ private import semmle.python.objects.ObjectInternal as ObjectInternal
+ // have to alias since this module is also called PointsTo
+ private import semmle.python.pointsto.PointsTo as RealPointsTo
+
+ /**
+ * DONT USE!
+ * Contains references to each predicate that use the above `ref` predicate.
+ */
+ cached
+ predicate backref() {
+ 1 = 1
+ or
+ PointsToBase::BaseFlow::scope_entry_value_transfer_from_earlier(_, _, _, _)
+ or
+ exists(TypeObject::Object a)
+ or
+ exists(TObject::TObject f)
+ or
+ exists(any(Flow::ControlFlowNode c).toString())
+ or
+ exists(any(ObjectInternal::ObjectInternal o).toString())
+ or
+ RealPointsTo::AttributePointsTo::variableAttributePointsTo(_, _, _, _, _)
+ }
+ }
+
/**
* The `dataflow` stage.
*/
@@ -140,12 +182,6 @@ module Stages {
private import semmle.python.dataflow.new.internal.DataFlowPublic as DataFlowPublic
private import semmle.python.dataflow.new.internal.LocalSources as LocalSources
private import semmle.python.internal.Awaited as Awaited
- private import semmle.python.pointsto.Base as PointsToBase
- private import semmle.python.types.Object as TypeObject
- private import semmle.python.objects.TObject as TObject
- private import semmle.python.Flow as Flow
- private import semmle.python.objects.ObjectInternal as ObjectInternal
- private import semmle.python.pointsto.PointsTo as PointsTo
/**
* DONT USE!
@@ -162,18 +198,6 @@ module Stages {
any(LocalSources::LocalSourceNode n).flowsTo(_)
or
exists(Awaited::awaited(_))
- or
- PointsToBase::BaseFlow::scope_entry_value_transfer_from_earlier(_, _, _, _)
- or
- exists(TypeObject::Object a)
- or
- exists(TObject::TObject f)
- or
- exists(any(Flow::ControlFlowNode c).toString())
- or
- exists(any(ObjectInternal::ObjectInternal o).toString())
- or
- PointsTo::AttributePointsTo::variableAttributePointsTo(_, _, _, _, _)
}
}
}
diff --git a/python/ql/lib/semmle/python/objects/ObjectInternal.qll b/python/ql/lib/semmle/python/objects/ObjectInternal.qll
index b6725e87cb6..a58b8b5f0a9 100644
--- a/python/ql/lib/semmle/python/objects/ObjectInternal.qll
+++ b/python/ql/lib/semmle/python/objects/ObjectInternal.qll
@@ -216,7 +216,7 @@ class BuiltinOpaqueObjectInternal extends ObjectInternal, TBuiltinOpaqueObject {
override Builtin getBuiltin() { this = TBuiltinOpaqueObject(result) }
override string toString() {
- Stages::DataFlow::ref() and
+ Stages::PointsTo::ref() and
result = this.getBuiltin().getClass().getName() + " object"
}
diff --git a/python/ql/lib/semmle/python/pointsto/Base.qll b/python/ql/lib/semmle/python/pointsto/Base.qll
index a3407419da2..96437cfed7e 100644
--- a/python/ql/lib/semmle/python/pointsto/Base.qll
+++ b/python/ql/lib/semmle/python/pointsto/Base.qll
@@ -318,7 +318,7 @@ module BaseFlow {
predicate scope_entry_value_transfer_from_earlier(
EssaVariable pred_var, Scope pred_scope, ScopeEntryDefinition succ_def, Scope succ_scope
) {
- Stages::DataFlow::ref() and
+ Stages::PointsTo::ref() and
exists(SsaSourceVariable var |
essa_var_scope(var, pred_scope, pred_var) and
scope_entry_def_scope(var, succ_scope, succ_def)
diff --git a/python/ql/lib/semmle/python/pointsto/PointsTo.qll b/python/ql/lib/semmle/python/pointsto/PointsTo.qll
index a5732e3bbd4..f4118100841 100644
--- a/python/ql/lib/semmle/python/pointsto/PointsTo.qll
+++ b/python/ql/lib/semmle/python/pointsto/PointsTo.qll
@@ -2569,7 +2569,7 @@ module AttributePointsTo {
predicate variableAttributePointsTo(
EssaVariable var, Context context, string name, ObjectInternal value, CfgOrigin origin
) {
- Stages::DataFlow::ref() and
+ Stages::PointsTo::ref() and
definitionAttributePointsTo(var.getDefinition(), context, name, value, origin)
or
exists(EssaVariable prev |
diff --git a/python/ql/lib/semmle/python/types/Object.qll b/python/ql/lib/semmle/python/types/Object.qll
index e0d252929f9..b408fc7ba1c 100644
--- a/python/ql/lib/semmle/python/types/Object.qll
+++ b/python/ql/lib/semmle/python/types/Object.qll
@@ -5,7 +5,7 @@ private import semmle.python.internal.CachedStages
cached
private predicate is_an_object(@py_object obj) {
- Stages::DataFlow::ref() and
+ Stages::PointsTo::ref() and
/* CFG nodes for numeric literals, all of which have a @py_cobject for the value of that literal */
obj instanceof ControlFlowNode and
not obj.(ControlFlowNode).getNode() instanceof IntegerLiteral and
@@ -78,7 +78,7 @@ class Object extends @py_object {
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
) {
- Stages::DataFlow::ref() and
+ Stages::PointsTo::ref() and
this.hasOrigin() and
this.getOrigin()
.getLocation()
@@ -98,7 +98,7 @@ class Object extends @py_object {
/** Gets a textual representation of this element. */
cached
string toString() {
- Stages::DataFlow::ref() and
+ Stages::PointsTo::ref() and
not this = undefinedVariable() and
not this = unknownValue() and
exists(ClassObject type | type.asBuiltin() = this.asBuiltin().getClass() |
From 36e8b8bfb9af1bb82083e312b76be09cd63d6a3f Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Thu, 20 Oct 2022 21:19:12 +0200
Subject: [PATCH 067/415] Python: Add call-graph to cached dataflow stage
I didn't do any performance investigation on this, since it just seems
so much like the right approach.
---
.../new/internal/DataFlowDispatch.qll | 159 +++++++++---------
.../semmle/python/internal/CachedStages.qll | 5 +
2 files changed, 88 insertions(+), 76 deletions(-)
diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
index 26764e96670..bf5da859c90 100644
--- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
@@ -37,6 +37,7 @@ private import DataFlowPublic
private import DataFlowPrivate
private import FlowSummaryImpl as FlowSummaryImpl
private import FlowSummaryImplSpecific as FlowSummaryImplSpecific
+private import semmle.python.internal.CachedStages
newtype TParameterPosition =
/** Used for `self` in methods, and `cls` in classmethods. */
@@ -1041,20 +1042,23 @@ predicate resolveClassInstanceCall(CallNode call, Function target, Node self) {
*/
cached
predicate resolveCall(ControlFlowNode call, Function target, CallType type) {
- type instanceof CallTypePlainFunction and
- call.(CallNode).getFunction() = functionTracker(target).asCfgNode() and
- not exists(Class cls | cls.getAMethod() = target)
- or
- resolveMethodCall(call, target, type, _)
- or
- type instanceof CallTypeClass and
- exists(Class cls |
- resolveClassCall(call, cls) and
- target = invokedFunctionFromClassConstruction(cls, _)
+ Stages::DataFlow::ref() and
+ (
+ type instanceof CallTypePlainFunction and
+ call.(CallNode).getFunction() = functionTracker(target).asCfgNode() and
+ not exists(Class cls | cls.getAMethod() = target)
+ or
+ resolveMethodCall(call, target, type, _)
+ or
+ type instanceof CallTypeClass and
+ exists(Class cls |
+ resolveClassCall(call, cls) and
+ target = invokedFunctionFromClassConstruction(cls, _)
+ )
+ or
+ type instanceof CallTypeClassInstanceCall and
+ resolveClassInstanceCall(call, target, _)
)
- or
- type instanceof CallTypeClassInstanceCall and
- resolveClassInstanceCall(call, target, _)
}
// =============================================================================
@@ -1119,77 +1123,80 @@ cached
predicate getCallArg(
ControlFlowNode call, Function target, CallType type, Node arg, ArgumentPosition apos
) {
- // normal calls with a real call node
- resolveCall(call, target, type) and
- call instanceof CallNode and
+ Stages::DataFlow::ref() and
(
- type instanceof CallTypePlainFunction and
- normalCallArg(call, arg, apos)
- or
- // self argument for normal method calls
- type instanceof CallTypeNormalMethod and
- apos.isSelf() and
- resolveMethodCall(call, target, type, arg) and
- // dataflow lib has requirement that arguments and calls are in same enclosing callable.
- exists(CfgNode cfgNode | cfgNode.getNode() = call |
- cfgNode.getEnclosingCallable() = arg.getEnclosingCallable()
- )
- or
- // cls argument for classmethod calls
- type instanceof CallTypeClassMethod and
- apos.isSelf() and
- resolveMethodCall(call, target, type, arg) and
- (arg = classTracker(_) or arg = clsTracker(_)) and
- // dataflow lib has requirement that arguments and calls are in same enclosing callable.
- exists(CfgNode cfgNode | cfgNode.getNode() = call |
- cfgNode.getEnclosingCallable() = arg.getEnclosingCallable()
- )
- or
- // normal arguments for method calls
+ // normal calls with a real call node
+ resolveCall(call, target, type) and
+ call instanceof CallNode and
(
- type instanceof CallTypeNormalMethod or
- type instanceof CallTypeStaticMethod or
- type instanceof CallTypeClassMethod
- ) and
- normalCallArg(call, arg, apos)
- or
- // method as plain function call.
- //
- // argument index 0 of call has position self (and MUST be given as positional
- // argument in call). This also means that call-arguments are shifted by 1, such
- // that argument index 1 of call has argument position 0
- type instanceof CallTypeMethodAsPlainFunction and
- (
- apos.isSelf() and arg.asCfgNode() = call.(CallNode).getArg(0)
+ type instanceof CallTypePlainFunction and
+ normalCallArg(call, arg, apos)
or
- not apos.isPositional(_) and normalCallArg(call, arg, apos)
- or
- exists(ArgumentPosition normalPos, int index |
- apos.isPositional(index - 1) and
- normalPos.isPositional(index) and
- normalCallArg(call, arg, normalPos)
+ // self argument for normal method calls
+ type instanceof CallTypeNormalMethod and
+ apos.isSelf() and
+ resolveMethodCall(call, target, type, arg) and
+ // dataflow lib has requirement that arguments and calls are in same enclosing callable.
+ exists(CfgNode cfgNode | cfgNode.getNode() = call |
+ cfgNode.getEnclosingCallable() = arg.getEnclosingCallable()
)
- )
- or
- // class call
- type instanceof CallTypeClass and
- (
- // only pass synthetic node for created object to __init__, and not __new__ since
- // __new__ is a classmethod.
- target = invokedFunctionFromClassConstruction(_, "__init__") and
- apos.isSelf() and
- arg = TSyntheticPreUpdateNode(call)
or
- normalCallArg(call, arg, apos)
- )
- or
- // call on class instance, which goes to `__call__` method
- type instanceof CallTypeClassInstanceCall and
- (
+ // cls argument for classmethod calls
+ type instanceof CallTypeClassMethod and
apos.isSelf() and
- resolveClassInstanceCall(call, target, arg)
+ resolveMethodCall(call, target, type, arg) and
+ (arg = classTracker(_) or arg = clsTracker(_)) and
+ // dataflow lib has requirement that arguments and calls are in same enclosing callable.
+ exists(CfgNode cfgNode | cfgNode.getNode() = call |
+ cfgNode.getEnclosingCallable() = arg.getEnclosingCallable()
+ )
or
+ // normal arguments for method calls
+ (
+ type instanceof CallTypeNormalMethod or
+ type instanceof CallTypeStaticMethod or
+ type instanceof CallTypeClassMethod
+ ) and
normalCallArg(call, arg, apos)
+ or
+ // method as plain function call.
+ //
+ // argument index 0 of call has position self (and MUST be given as positional
+ // argument in call). This also means that call-arguments are shifted by 1, such
+ // that argument index 1 of call has argument position 0
+ type instanceof CallTypeMethodAsPlainFunction and
+ (
+ apos.isSelf() and arg.asCfgNode() = call.(CallNode).getArg(0)
+ or
+ not apos.isPositional(_) and normalCallArg(call, arg, apos)
+ or
+ exists(ArgumentPosition normalPos, int index |
+ apos.isPositional(index - 1) and
+ normalPos.isPositional(index) and
+ normalCallArg(call, arg, normalPos)
+ )
+ )
+ or
+ // class call
+ type instanceof CallTypeClass and
+ (
+ // only pass synthetic node for created object to __init__, and not __new__ since
+ // __new__ is a classmethod.
+ target = invokedFunctionFromClassConstruction(_, "__init__") and
+ apos.isSelf() and
+ arg = TSyntheticPreUpdateNode(call)
+ or
+ normalCallArg(call, arg, apos)
+ )
+ or
+ // call on class instance, which goes to `__call__` method
+ type instanceof CallTypeClassInstanceCall and
+ (
+ apos.isSelf() and
+ resolveClassInstanceCall(call, target, arg)
+ or
+ normalCallArg(call, arg, apos)
+ )
)
)
}
diff --git a/python/ql/lib/semmle/python/internal/CachedStages.qll b/python/ql/lib/semmle/python/internal/CachedStages.qll
index 8b5701b3115..58bb9716195 100644
--- a/python/ql/lib/semmle/python/internal/CachedStages.qll
+++ b/python/ql/lib/semmle/python/internal/CachedStages.qll
@@ -180,6 +180,7 @@ module Stages {
predicate ref() { 1 = 1 }
private import semmle.python.dataflow.new.internal.DataFlowPublic as DataFlowPublic
+ private import semmle.python.dataflow.new.internal.DataFlowDispatch as DataFlowDispatch
private import semmle.python.dataflow.new.internal.LocalSources as LocalSources
private import semmle.python.internal.Awaited as Awaited
@@ -195,6 +196,10 @@ module Stages {
or
any(DataFlowPublic::Node node).hasLocationInfo(_, _, _, _, _)
or
+ DataFlowDispatch::resolveCall(_, _, _)
+ or
+ DataFlowDispatch::getCallArg(_, _, _, _, _)
+ or
any(LocalSources::LocalSourceNode n).flowsTo(_)
or
exists(Awaited::awaited(_))
From aa382ac042423d418404893c01d1494d9f22fd7e Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Fri, 4 Nov 2022 11:40:19 +0100
Subject: [PATCH 068/415] Python: Add test for strange generator taint flow
I did check, and this was not a problem with the old call-graph on main!
I'm absolutely baffled!
---
.../generator-flow/InlineTaintTest.expected | 3 ++
.../generator-flow/InlineTaintTest.ql | 1 +
.../NormalDataflowTest.expected | 2 +
.../generator-flow/NormalDataflowTest.ql | 2 +
.../generator-flow/test_dataflow.py | 34 +++++++++++++++++
.../generator-flow/test_taint.py | 37 +++++++++++++++++++
6 files changed, 79 insertions(+)
create mode 100644 python/ql/test/experimental/dataflow/tainttracking/generator-flow/InlineTaintTest.expected
create mode 100644 python/ql/test/experimental/dataflow/tainttracking/generator-flow/InlineTaintTest.ql
create mode 100644 python/ql/test/experimental/dataflow/tainttracking/generator-flow/NormalDataflowTest.expected
create mode 100644 python/ql/test/experimental/dataflow/tainttracking/generator-flow/NormalDataflowTest.ql
create mode 100644 python/ql/test/experimental/dataflow/tainttracking/generator-flow/test_dataflow.py
create mode 100644 python/ql/test/experimental/dataflow/tainttracking/generator-flow/test_taint.py
diff --git a/python/ql/test/experimental/dataflow/tainttracking/generator-flow/InlineTaintTest.expected b/python/ql/test/experimental/dataflow/tainttracking/generator-flow/InlineTaintTest.expected
new file mode 100644
index 00000000000..79d760d87f4
--- /dev/null
+++ b/python/ql/test/experimental/dataflow/tainttracking/generator-flow/InlineTaintTest.expected
@@ -0,0 +1,3 @@
+argumentToEnsureNotTaintedNotMarkedAsSpurious
+untaintedArgumentToEnsureTaintedNotMarkedAsMissing
+failures
diff --git a/python/ql/test/experimental/dataflow/tainttracking/generator-flow/InlineTaintTest.ql b/python/ql/test/experimental/dataflow/tainttracking/generator-flow/InlineTaintTest.ql
new file mode 100644
index 00000000000..027ad8667be
--- /dev/null
+++ b/python/ql/test/experimental/dataflow/tainttracking/generator-flow/InlineTaintTest.ql
@@ -0,0 +1 @@
+import experimental.meta.InlineTaintTest
diff --git a/python/ql/test/experimental/dataflow/tainttracking/generator-flow/NormalDataflowTest.expected b/python/ql/test/experimental/dataflow/tainttracking/generator-flow/NormalDataflowTest.expected
new file mode 100644
index 00000000000..3875da4e143
--- /dev/null
+++ b/python/ql/test/experimental/dataflow/tainttracking/generator-flow/NormalDataflowTest.expected
@@ -0,0 +1,2 @@
+missingAnnotationOnSink
+failures
diff --git a/python/ql/test/experimental/dataflow/tainttracking/generator-flow/NormalDataflowTest.ql b/python/ql/test/experimental/dataflow/tainttracking/generator-flow/NormalDataflowTest.ql
new file mode 100644
index 00000000000..3ee344d0b87
--- /dev/null
+++ b/python/ql/test/experimental/dataflow/tainttracking/generator-flow/NormalDataflowTest.ql
@@ -0,0 +1,2 @@
+import python
+import experimental.dataflow.TestUtil.NormalDataflowTest
diff --git a/python/ql/test/experimental/dataflow/tainttracking/generator-flow/test_dataflow.py b/python/ql/test/experimental/dataflow/tainttracking/generator-flow/test_dataflow.py
new file mode 100644
index 00000000000..d1becb7bbba
--- /dev/null
+++ b/python/ql/test/experimental/dataflow/tainttracking/generator-flow/test_dataflow.py
@@ -0,0 +1,34 @@
+def normal_helper(arg):
+ l = [arg]
+ return l[0]
+
+
+def generator_helper(arg):
+ l = [arg]
+ l = [x for x in l]
+ return l[0]
+
+
+def generator_helper_wo_source_use(arg):
+ l = [arg]
+ l = [x for x in l]
+ return l[0]
+
+
+def test_source():
+ x = normal_helper(SOURCE)
+ SINK(x) # $ flow="SOURCE, l:-1 -> x"
+
+ x = generator_helper(SOURCE)
+ SINK(x) # $ flow="SOURCE, l:-1 -> x"
+
+
+def test_non_source():
+ x = normal_helper(NONSOURCE)
+ SINK_F(x)
+
+ x = generator_helper(NONSOURCE)
+ SINK_F(x)
+
+ x = generator_helper_wo_source_use(NONSOURCE)
+ SINK_F(x)
diff --git a/python/ql/test/experimental/dataflow/tainttracking/generator-flow/test_taint.py b/python/ql/test/experimental/dataflow/tainttracking/generator-flow/test_taint.py
new file mode 100644
index 00000000000..858a23bcfb8
--- /dev/null
+++ b/python/ql/test/experimental/dataflow/tainttracking/generator-flow/test_taint.py
@@ -0,0 +1,37 @@
+def normal_helper(arg):
+ l = [arg]
+ return l[0]
+
+# we had a regression where flow from a source to the argument of this function would
+# cause _all_ returns from this function to be treated as tainted. That is, the
+# `generator_helper(NONSOURCE)` call in `test_non_source` would result in taint :| This
+# is specific to taint-tracking, and does NOT appear in pure data-flow (see the
+# test_dataflow file)
+def generator_helper(arg):
+ l = [arg]
+ l = [x for x in l]
+ return l[0]
+
+
+def generator_helper_wo_source_use(arg):
+ l = [arg]
+ l = [x for x in l]
+ return l[0]
+
+def test_source():
+ x = normal_helper(TAINTED_STRING)
+ ensure_tainted(x) # $ tainted
+
+ x = generator_helper(TAINTED_STRING)
+ ensure_tainted(x) # $ tainted
+
+
+def test_non_source():
+ x = normal_helper(NONSOURCE)
+ ensure_not_tainted(x)
+
+ x = generator_helper(NONSOURCE)
+ ensure_not_tainted(x) # $ SPURIOUS: tainted
+
+ x = generator_helper_wo_source_use(NONSOURCE)
+ ensure_not_tainted(x)
From d86f98d60b005eb7288f58c64eca538662134310 Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Sun, 13 Nov 2022 20:46:45 +0100
Subject: [PATCH 069/415] Python: Accept changes for `enclosing-callable` test
---
.../EnclosingCallable.expected | 48 +++++++++----------
1 file changed, 24 insertions(+), 24 deletions(-)
diff --git a/python/ql/test/experimental/dataflow/enclosing-callable/EnclosingCallable.expected b/python/ql/test/experimental/dataflow/enclosing-callable/EnclosingCallable.expected
index a1e3de562f5..9ab214dc672 100644
--- a/python/ql/test/experimental/dataflow/enclosing-callable/EnclosingCallable.expected
+++ b/python/ql/test/experimental/dataflow/enclosing-callable/EnclosingCallable.expected
@@ -1,24 +1,24 @@
-| file://:0:0:0:0 | Function generator_func | generator.py:1:20:1:21 | ControlFlowNode for xs |
-| file://:0:0:0:0 | Function generator_func | generator.py:2:12:2:26 | ControlFlowNode for .0 |
-| file://:0:0:0:0 | Function generator_func | generator.py:2:12:2:26 | ControlFlowNode for .0 |
-| file://:0:0:0:0 | Function generator_func | generator.py:2:12:2:26 | ControlFlowNode for ListComp |
-| file://:0:0:0:0 | Function generator_func | generator.py:2:13:2:13 | ControlFlowNode for Yield |
-| file://:0:0:0:0 | Function generator_func | generator.py:2:13:2:13 | ControlFlowNode for x |
-| file://:0:0:0:0 | Function generator_func | generator.py:2:19:2:19 | ControlFlowNode for x |
-| file://:0:0:0:0 | Function generator_func | generator.py:2:24:2:25 | ControlFlowNode for xs |
-| file://:0:0:0:0 | Module class_example | class_example.py:1:1:1:3 | ControlFlowNode for wat |
-| file://:0:0:0:0 | Module class_example | class_example.py:1:7:1:7 | ControlFlowNode for IntegerLiteral |
-| file://:0:0:0:0 | Module class_example | class_example.py:3:1:3:10 | ControlFlowNode for ClassExpr |
-| file://:0:0:0:0 | Module class_example | class_example.py:3:7:3:9 | ControlFlowNode for Wat |
-| file://:0:0:0:0 | Module class_example | class_example.py:4:5:4:7 | ControlFlowNode for wat |
-| file://:0:0:0:0 | Module class_example | class_example.py:4:11:4:11 | ControlFlowNode for IntegerLiteral |
-| file://:0:0:0:0 | Module class_example | class_example.py:5:5:5:9 | ControlFlowNode for print |
-| file://:0:0:0:0 | Module class_example | class_example.py:5:5:5:26 | ControlFlowNode for print() |
-| file://:0:0:0:0 | Module class_example | class_example.py:5:11:5:20 | ControlFlowNode for Str |
-| file://:0:0:0:0 | Module class_example | class_example.py:5:23:5:25 | ControlFlowNode for wat |
-| file://:0:0:0:0 | Module class_example | class_example.py:7:1:7:5 | ControlFlowNode for print |
-| file://:0:0:0:0 | Module class_example | class_example.py:7:1:7:23 | ControlFlowNode for print() |
-| file://:0:0:0:0 | Module class_example | class_example.py:7:7:7:17 | ControlFlowNode for Str |
-| file://:0:0:0:0 | Module class_example | class_example.py:7:20:7:22 | ControlFlowNode for wat |
-| file://:0:0:0:0 | Module generator | generator.py:1:1:1:23 | ControlFlowNode for FunctionExpr |
-| file://:0:0:0:0 | Module generator | generator.py:1:5:1:18 | ControlFlowNode for generator_func |
+| class_example.py:0:0:0:0 | Module class_example | class_example.py:1:1:1:3 | ControlFlowNode for wat |
+| class_example.py:0:0:0:0 | Module class_example | class_example.py:1:7:1:7 | ControlFlowNode for IntegerLiteral |
+| class_example.py:0:0:0:0 | Module class_example | class_example.py:3:1:3:10 | ControlFlowNode for ClassExpr |
+| class_example.py:0:0:0:0 | Module class_example | class_example.py:3:7:3:9 | ControlFlowNode for Wat |
+| class_example.py:0:0:0:0 | Module class_example | class_example.py:4:5:4:7 | ControlFlowNode for wat |
+| class_example.py:0:0:0:0 | Module class_example | class_example.py:4:11:4:11 | ControlFlowNode for IntegerLiteral |
+| class_example.py:0:0:0:0 | Module class_example | class_example.py:5:5:5:9 | ControlFlowNode for print |
+| class_example.py:0:0:0:0 | Module class_example | class_example.py:5:5:5:26 | ControlFlowNode for print() |
+| class_example.py:0:0:0:0 | Module class_example | class_example.py:5:11:5:20 | ControlFlowNode for Str |
+| class_example.py:0:0:0:0 | Module class_example | class_example.py:5:23:5:25 | ControlFlowNode for wat |
+| class_example.py:0:0:0:0 | Module class_example | class_example.py:7:1:7:5 | ControlFlowNode for print |
+| class_example.py:0:0:0:0 | Module class_example | class_example.py:7:1:7:23 | ControlFlowNode for print() |
+| class_example.py:0:0:0:0 | Module class_example | class_example.py:7:7:7:17 | ControlFlowNode for Str |
+| class_example.py:0:0:0:0 | Module class_example | class_example.py:7:20:7:22 | ControlFlowNode for wat |
+| generator.py:0:0:0:0 | Module generator | generator.py:1:1:1:23 | ControlFlowNode for FunctionExpr |
+| generator.py:0:0:0:0 | Module generator | generator.py:1:5:1:18 | ControlFlowNode for generator_func |
+| generator.py:1:1:1:23 | Function generator_func | generator.py:1:20:1:21 | ControlFlowNode for xs |
+| generator.py:1:1:1:23 | Function generator_func | generator.py:2:12:2:26 | ControlFlowNode for ListComp |
+| generator.py:1:1:1:23 | Function generator_func | generator.py:2:24:2:25 | ControlFlowNode for xs |
+| generator.py:2:12:2:26 | Function listcomp | generator.py:2:12:2:26 | ControlFlowNode for .0 |
+| generator.py:2:12:2:26 | Function listcomp | generator.py:2:12:2:26 | ControlFlowNode for .0 |
+| generator.py:2:12:2:26 | Function listcomp | generator.py:2:13:2:13 | ControlFlowNode for Yield |
+| generator.py:2:12:2:26 | Function listcomp | generator.py:2:13:2:13 | ControlFlowNode for x |
+| generator.py:2:12:2:26 | Function listcomp | generator.py:2:19:2:19 | ControlFlowNode for x |
From c0ad870949f91bc6df5c002ba517837853d8e018 Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Sun, 13 Nov 2022 21:08:02 +0100
Subject: [PATCH 070/415] Python: Exclude synthetic generator functions from
DataFlowCallable
---
.../python/dataflow/new/internal/DataFlowDispatch.qll | 9 ++++++++-
.../experimental/dataflow/coverage/localFlow.expected | 6 ++++++
.../enclosing-callable/EnclosingCallable.expected | 10 +++++-----
.../tainttracking/generator-flow/test_taint.py | 2 +-
4 files changed, 20 insertions(+), 7 deletions(-)
diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
index bf5da859c90..ccef7dd161d 100644
--- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
@@ -254,7 +254,14 @@ abstract class LibraryCallable extends string {
}
newtype TDataFlowCallable =
- TFunction(Function func) or
+ TFunction(Function func) {
+ // For generators/list-comprehensions we create a synthetic function. In the
+ // points-to call-graph these were not considered callable, and instead we added
+ // data-flow steps (read/write) for these. As an easy solution for now, we do the
+ // same to keep things easy to reason about (and therefore exclude things that do
+ // not have a definition)
+ exists(func.getDefinition())
+ } or
/** see QLDoc for `DataFlowModuleScope` for why we need this. */
TModule(Module m) or
TLibraryCallable(LibraryCallable callable)
diff --git a/python/ql/test/experimental/dataflow/coverage/localFlow.expected b/python/ql/test/experimental/dataflow/coverage/localFlow.expected
index 7ca11daba51..30b25979df3 100644
--- a/python/ql/test/experimental/dataflow/coverage/localFlow.expected
+++ b/python/ql/test/experimental/dataflow/coverage/localFlow.expected
@@ -8,4 +8,10 @@
| test.py:187:1:187:53 | GSSA Variable SINK | test.py:189:5:189:8 | ControlFlowNode for SINK |
| test.py:187:1:187:53 | GSSA Variable SOURCE | test.py:188:25:188:30 | ControlFlowNode for SOURCE |
| test.py:188:5:188:5 | SSA variable x | test.py:189:10:189:10 | ControlFlowNode for x |
+| test.py:188:9:188:68 | ControlFlowNode for .0 | test.py:188:9:188:68 | SSA variable .0 |
| test.py:188:9:188:68 | ControlFlowNode for ListComp | test.py:188:5:188:5 | SSA variable x |
+| test.py:188:9:188:68 | SSA variable .0 | test.py:188:9:188:68 | ControlFlowNode for .0 |
+| test.py:188:16:188:16 | SSA variable v | test.py:188:45:188:45 | ControlFlowNode for v |
+| test.py:188:40:188:40 | SSA variable u | test.py:188:56:188:56 | ControlFlowNode for u |
+| test.py:188:51:188:51 | SSA variable z | test.py:188:67:188:67 | ControlFlowNode for z |
+| test.py:188:62:188:62 | SSA variable y | test.py:188:10:188:10 | ControlFlowNode for y |
diff --git a/python/ql/test/experimental/dataflow/enclosing-callable/EnclosingCallable.expected b/python/ql/test/experimental/dataflow/enclosing-callable/EnclosingCallable.expected
index 9ab214dc672..3bd4cd81d54 100644
--- a/python/ql/test/experimental/dataflow/enclosing-callable/EnclosingCallable.expected
+++ b/python/ql/test/experimental/dataflow/enclosing-callable/EnclosingCallable.expected
@@ -15,10 +15,10 @@
| generator.py:0:0:0:0 | Module generator | generator.py:1:1:1:23 | ControlFlowNode for FunctionExpr |
| generator.py:0:0:0:0 | Module generator | generator.py:1:5:1:18 | ControlFlowNode for generator_func |
| generator.py:1:1:1:23 | Function generator_func | generator.py:1:20:1:21 | ControlFlowNode for xs |
+| generator.py:1:1:1:23 | Function generator_func | generator.py:2:12:2:26 | ControlFlowNode for .0 |
+| generator.py:1:1:1:23 | Function generator_func | generator.py:2:12:2:26 | ControlFlowNode for .0 |
| generator.py:1:1:1:23 | Function generator_func | generator.py:2:12:2:26 | ControlFlowNode for ListComp |
+| generator.py:1:1:1:23 | Function generator_func | generator.py:2:13:2:13 | ControlFlowNode for Yield |
+| generator.py:1:1:1:23 | Function generator_func | generator.py:2:13:2:13 | ControlFlowNode for x |
+| generator.py:1:1:1:23 | Function generator_func | generator.py:2:19:2:19 | ControlFlowNode for x |
| generator.py:1:1:1:23 | Function generator_func | generator.py:2:24:2:25 | ControlFlowNode for xs |
-| generator.py:2:12:2:26 | Function listcomp | generator.py:2:12:2:26 | ControlFlowNode for .0 |
-| generator.py:2:12:2:26 | Function listcomp | generator.py:2:12:2:26 | ControlFlowNode for .0 |
-| generator.py:2:12:2:26 | Function listcomp | generator.py:2:13:2:13 | ControlFlowNode for Yield |
-| generator.py:2:12:2:26 | Function listcomp | generator.py:2:13:2:13 | ControlFlowNode for x |
-| generator.py:2:12:2:26 | Function listcomp | generator.py:2:19:2:19 | ControlFlowNode for x |
diff --git a/python/ql/test/experimental/dataflow/tainttracking/generator-flow/test_taint.py b/python/ql/test/experimental/dataflow/tainttracking/generator-flow/test_taint.py
index 858a23bcfb8..4ec13583dcc 100644
--- a/python/ql/test/experimental/dataflow/tainttracking/generator-flow/test_taint.py
+++ b/python/ql/test/experimental/dataflow/tainttracking/generator-flow/test_taint.py
@@ -31,7 +31,7 @@ def test_non_source():
ensure_not_tainted(x)
x = generator_helper(NONSOURCE)
- ensure_not_tainted(x) # $ SPURIOUS: tainted
+ ensure_not_tainted(x)
x = generator_helper_wo_source_use(NONSOURCE)
ensure_not_tainted(x)
From 8de5cfef43c6a2a83a0af969d55787ba85cff112 Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Tue, 22 Nov 2022 14:41:48 +0100
Subject: [PATCH 071/415] Python: Update `dataflow-consistency.expected`
After merging in main
---
.../dataflow/callgraph_crosstalk/dataflow-consistency.expected | 2 ++
.../library-tests/CallGraph/dataflow-consistency.expected | 2 ++
2 files changed, 4 insertions(+)
diff --git a/python/ql/test/experimental/dataflow/callgraph_crosstalk/dataflow-consistency.expected b/python/ql/test/experimental/dataflow/callgraph_crosstalk/dataflow-consistency.expected
index 9fedaf9f663..8f4dbd04742 100644
--- a/python/ql/test/experimental/dataflow/callgraph_crosstalk/dataflow-consistency.expected
+++ b/python/ql/test/experimental/dataflow/callgraph_crosstalk/dataflow-consistency.expected
@@ -6,6 +6,8 @@ uniqueNodeToString
missingToString
parameterCallable
localFlowIsLocal
+readStepIsLocal
+storeStepIsLocal
compatibleTypesReflexive
unreachableNodeCCtx
localCallNodes
diff --git a/python/ql/test/experimental/library-tests/CallGraph/dataflow-consistency.expected b/python/ql/test/experimental/library-tests/CallGraph/dataflow-consistency.expected
index 9fedaf9f663..8f4dbd04742 100644
--- a/python/ql/test/experimental/library-tests/CallGraph/dataflow-consistency.expected
+++ b/python/ql/test/experimental/library-tests/CallGraph/dataflow-consistency.expected
@@ -6,6 +6,8 @@ uniqueNodeToString
missingToString
parameterCallable
localFlowIsLocal
+readStepIsLocal
+storeStepIsLocal
compatibleTypesReflexive
unreachableNodeCCtx
localCallNodes
From ee2f7401e82e8c4f7b156cb874789c2327db7dd8 Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Tue, 22 Nov 2022 14:42:10 +0100
Subject: [PATCH 072/415] Python: Add `generator-flow/dataflow-consistency.ql`
---
.../dataflow-consistency.expected | 21 +++++++++++++++++++
.../generator-flow/dataflow-consistency.ql | 2 ++
2 files changed, 23 insertions(+)
create mode 100644 python/ql/test/experimental/dataflow/tainttracking/generator-flow/dataflow-consistency.expected
create mode 100644 python/ql/test/experimental/dataflow/tainttracking/generator-flow/dataflow-consistency.ql
diff --git a/python/ql/test/experimental/dataflow/tainttracking/generator-flow/dataflow-consistency.expected b/python/ql/test/experimental/dataflow/tainttracking/generator-flow/dataflow-consistency.expected
new file mode 100644
index 00000000000..8f4dbd04742
--- /dev/null
+++ b/python/ql/test/experimental/dataflow/tainttracking/generator-flow/dataflow-consistency.expected
@@ -0,0 +1,21 @@
+uniqueEnclosingCallable
+uniqueType
+uniqueNodeLocation
+missingLocation
+uniqueNodeToString
+missingToString
+parameterCallable
+localFlowIsLocal
+readStepIsLocal
+storeStepIsLocal
+compatibleTypesReflexive
+unreachableNodeCCtx
+localCallNodes
+postIsNotPre
+postHasUniquePre
+uniquePostUpdate
+postIsInSameCallable
+reverseRead
+argHasPostUpdate
+postWithInFlow
+viableImplInCallContextTooLarge
diff --git a/python/ql/test/experimental/dataflow/tainttracking/generator-flow/dataflow-consistency.ql b/python/ql/test/experimental/dataflow/tainttracking/generator-flow/dataflow-consistency.ql
new file mode 100644
index 00000000000..3dda6701a83
--- /dev/null
+++ b/python/ql/test/experimental/dataflow/tainttracking/generator-flow/dataflow-consistency.ql
@@ -0,0 +1,2 @@
+import python
+import experimental.dataflow.TestUtil.DataFlowConsistency
From 00ec3a23ba61e9406c57f04735c31654be69191c Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Tue, 22 Nov 2022 14:43:04 +0100
Subject: [PATCH 073/415] Python: Accept fix from module-resolution PR
---
.../CallGraph-imports/InlineCallGraphTest.expected | 1 -
.../experimental/library-tests/CallGraph-imports/pkg/use.py | 2 +-
2 files changed, 1 insertion(+), 2 deletions(-)
diff --git a/python/ql/test/experimental/library-tests/CallGraph-imports/InlineCallGraphTest.expected b/python/ql/test/experimental/library-tests/CallGraph-imports/InlineCallGraphTest.expected
index 7bba932e8f4..d5ed453c51a 100644
--- a/python/ql/test/experimental/library-tests/CallGraph-imports/InlineCallGraphTest.expected
+++ b/python/ql/test/experimental/library-tests/CallGraph-imports/InlineCallGraphTest.expected
@@ -1,5 +1,4 @@
failures
debug_callableNotUnique
pointsTo_found_typeTracker_notFound
-| pkg/use.py:10:5:10:10 | ControlFlowNode for func() | "pkg/func_def.py:func" |
typeTracker_found_pointsTo_notFound
diff --git a/python/ql/test/experimental/library-tests/CallGraph-imports/pkg/use.py b/python/ql/test/experimental/library-tests/CallGraph-imports/pkg/use.py
index 861359b5d91..fd1d957ba81 100644
--- a/python/ql/test/experimental/library-tests/CallGraph-imports/pkg/use.py
+++ b/python/ql/test/experimental/library-tests/CallGraph-imports/pkg/use.py
@@ -7,7 +7,7 @@ test_direct_import() # $ pt,tt=test_direct_import
def test_alias_problem():
from .alias_problem import func
- func() # $ pt="pkg/func_def.py:func" MISSING: tt="pkg/func_def.py:func"
+ func() # $ pt,tt="pkg/func_def.py:func"
test_alias_problem() # $ pt,tt=test_alias_problem
From 69b43f147aceeed864dc4444530d36621ad2882c Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Tue, 22 Nov 2022 16:24:47 +0100
Subject: [PATCH 074/415] Python: Fix ql4ql alerts
The rest will be ignored.
---
.../python/dataflow/new/internal/DataFlowDispatch.qll | 6 +++---
.../ql/src/Security/CWE-020-ExternalAPIs/ExternalAPIs.qll | 7 +++----
python/ql/src/meta/analysis-quality/CallGraphQuality.qll | 4 ++--
3 files changed, 8 insertions(+), 9 deletions(-)
diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
index ccef7dd161d..f2e2ace7fda 100644
--- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
@@ -478,7 +478,7 @@ private TypeTrackingNode classTracker(TypeTracker t, Class cls) {
or
// when a class is decorated, it's the result of the (last) decorator call that
// is used
- result.asExpr() = cls.getParent().(ClassExpr).getADecoratorCall()
+ result.asExpr() = cls.getParent().getADecoratorCall()
or
// `type(obj)`, where obj is an instance of this class
result = getTypeCall() and
@@ -1102,8 +1102,8 @@ predicate normalCallArg(CallNode call, Node arg, ArgumentPosition apos) {
}
/**
- * Gets the argument of `call` at position `apos`, if any, where we can resolve `call`
- * to `target` with CallType `type`.
+ * Gets the argument `arg` of `call` at position `apos`, if any. Requires that we can
+ * resolve `call` to `target` with CallType `type`.
*
* It might seem like it's enough to know the CallType to resolve arguments. The reason
* we also need the `target`, is to avoid cross-talk. In the example below, assuming
diff --git a/python/ql/src/Security/CWE-020-ExternalAPIs/ExternalAPIs.qll b/python/ql/src/Security/CWE-020-ExternalAPIs/ExternalAPIs.qll
index dbc92f9d151..e01cd33aa5c 100644
--- a/python/ql/src/Security/CWE-020-ExternalAPIs/ExternalAPIs.qll
+++ b/python/ql/src/Security/CWE-020-ExternalAPIs/ExternalAPIs.qll
@@ -157,11 +157,10 @@ class UnresolvedCall extends InterestingExternalApiCall, TUnresolvedCall {
/** A node representing data being passed to an external API through a call. */
class ExternalApiDataNode extends DataFlow::Node {
- InterestingExternalApiCall call;
- DataFlowPrivate::ArgumentPosition apos;
-
ExternalApiDataNode() {
- this = call.getArgument(apos) and
+ exists(InterestingExternalApiCall call, DataFlowPrivate::ArgumentPosition apos |
+ this = call.getArgument(apos)
+ ) and
// Not already modeled as a taint step
not exists(DataFlow::Node next | TaintTrackingPrivate::defaultAdditionalTaintStep(this, next)) and
// for `list.append(x)`, we have a additional taint step from x -> [post] list.
diff --git a/python/ql/src/meta/analysis-quality/CallGraphQuality.qll b/python/ql/src/meta/analysis-quality/CallGraphQuality.qll
index cdb143017db..b1f29b52cc7 100644
--- a/python/ql/src/meta/analysis-quality/CallGraphQuality.qll
+++ b/python/ql/src/meta/analysis-quality/CallGraphQuality.qll
@@ -81,7 +81,7 @@ module PointsToBasedCallGraph {
*/
class ResolvableCallRelevantTarget extends ResolvableCall {
ResolvableCallRelevantTarget() {
- exists(Target target | target = getTarget() |
+ exists(Target target | target = this.getTarget() |
exists(target.getLocation().getFile().getRelativePath())
)
}
@@ -137,7 +137,7 @@ module TypeTrackingBasedCallGraph {
*/
class ResolvableCallRelevantTarget extends ResolvableCall {
ResolvableCallRelevantTarget() {
- exists(Target target | target = getTarget() |
+ exists(Target target | target = this.getTarget() |
exists(target.getLocation().getFile().getRelativePath())
)
}
From d151e21f15c38c4a0c1d8aff3a0b61d547ce2150 Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Thu, 24 Nov 2022 10:14:39 +0100
Subject: [PATCH 075/415] Python: Move `ControlFlowNode.toString()` to AST
cached stage
This means points-to is no longer evaluated for sql injection :tada:
Thanks @asgerf :muscle:
---
python/ql/lib/semmle/python/Flow.qll | 2 +-
python/ql/lib/semmle/python/internal/CachedStages.qll | 5 ++---
2 files changed, 3 insertions(+), 4 deletions(-)
diff --git a/python/ql/lib/semmle/python/Flow.qll b/python/ql/lib/semmle/python/Flow.qll
index a26a7ac7d8a..d5f25fd7b9f 100644
--- a/python/ql/lib/semmle/python/Flow.qll
+++ b/python/ql/lib/semmle/python/Flow.qll
@@ -125,7 +125,7 @@ class ControlFlowNode extends @py_flow_node {
/** Gets a textual representation of this element. */
cached
string toString() {
- Stages::PointsTo::ref() and
+ Stages::AST::ref() and
exists(Scope s | s.getEntryNode() = this | result = "Entry node for " + s.toString())
or
exists(Scope s | s.getANormalExit() = this | result = "Exit node for " + s.toString())
diff --git a/python/ql/lib/semmle/python/internal/CachedStages.qll b/python/ql/lib/semmle/python/internal/CachedStages.qll
index 58bb9716195..40dda556caa 100644
--- a/python/ql/lib/semmle/python/internal/CachedStages.qll
+++ b/python/ql/lib/semmle/python/internal/CachedStages.qll
@@ -93,6 +93,8 @@ module Stages {
exists(PyFlow::DefinitionNode b)
or
exists(any(PyFlow::SequenceNode n).getElement(_))
+ or
+ exists(any(PyFlow::ControlFlowNode c).toString())
}
}
@@ -140,7 +142,6 @@ module Stages {
private import semmle.python.pointsto.Base as PointsToBase
private import semmle.python.types.Object as TypeObject
private import semmle.python.objects.TObject as TObject
- private import semmle.python.Flow as Flow
private import semmle.python.objects.ObjectInternal as ObjectInternal
// have to alias since this module is also called PointsTo
private import semmle.python.pointsto.PointsTo as RealPointsTo
@@ -159,8 +160,6 @@ module Stages {
or
exists(TObject::TObject f)
or
- exists(any(Flow::ControlFlowNode c).toString())
- or
exists(any(ObjectInternal::ObjectInternal o).toString())
or
RealPointsTo::AttributePointsTo::variableAttributePointsTo(_, _, _, _, _)
From 68fd75ca34dd9d792cdce7f400a72acd8b473b54 Mon Sep 17 00:00:00 2001
From: ALJI Mohamed
Date: Mon, 5 Dec 2022 17:20:22 +0100
Subject: [PATCH 076/415] UnpackUnsafe query and tests
---
.../Security/CWE-022bis/UnsafeUnpack.qhelp | 56 +++++++++++++++++++
.../Security/CWE-022bis/UnsafeUnpack.ql | 56 +++++++++++++++++++
.../CWE-022bis/examples/HIT_UnsafeUnpack.py | 12 ++++
.../CWE-022bis/examples/NoHIT_UnsafeUnpack.py | 17 ++++++
.../Security/CWE-022/UnsafeUnpack.expected | 10 ++++
.../Security/CWE-022/UnsafeUnpack.py | 12 ++++
.../Security/CWE-022/UnsafeUnpack.qlref | 1 +
7 files changed, 164 insertions(+)
create mode 100644 python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpack.qhelp
create mode 100644 python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpack.ql
create mode 100644 python/ql/src/experimental/Security/CWE-022bis/examples/HIT_UnsafeUnpack.py
create mode 100644 python/ql/src/experimental/Security/CWE-022bis/examples/NoHIT_UnsafeUnpack.py
create mode 100644 python/ql/test/experimental/query-tests/Security/CWE-022/UnsafeUnpack.expected
create mode 100644 python/ql/test/experimental/query-tests/Security/CWE-022/UnsafeUnpack.py
create mode 100644 python/ql/test/experimental/query-tests/Security/CWE-022/UnsafeUnpack.qlref
diff --git a/python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpack.qhelp b/python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpack.qhelp
new file mode 100644
index 00000000000..c1115e819b9
--- /dev/null
+++ b/python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpack.qhelp
@@ -0,0 +1,56 @@
+
+
+
+
+Extracting files from a malicious tarball without validating that the destination file path
+is within the destination directory using `shutil.unpack_archive()` can cause files outside the
+destination directory to be overwritten, due to the possible presence of directory traversal elements
+ (..) in archive path names.
+
+Tarball contain archive entries representing each file in the archive. These entries
+include a file path for the entry, but these file paths are not restricted and may contain
+unexpected special elements such as the directory traversal element (..). If these
+file paths are used to determine an output file to write the contents of the archive item to, then
+the file may be written to an unexpected location. This can result in sensitive information being
+revealed or deleted, or an attacker being able to influence behavior by modifying unexpected
+files.
+
+For example, if a tarball contains a file entry ../sim4n6.txt, and the tarball
+is extracted to the directory /tmp/tmp123, then naively combining the paths would result
+in an output file path of /tmp/tmp123/../sim4n6.txt, which would cause the file to be
+written to /tmp/.
+
+
+
+
+Ensure that output paths constructed from tarball entries are validated
+to prevent writing files to unexpected locations.
+
+Consider using a safer module, such as: zipfile
+
+
+
+
+
+In this example an archive is extracted without validating file paths.
+
+
+
+
+To fix this vulnerability, we need to call the function tarfile.extract()
+ on each member after verifying that it does not contain either `..` or startswith `/`.
+
+
+
+
+
+
+
+
+ Shutil official documentation
+ shutil.unpack_archive() warning.
+
+
+
diff --git a/python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpack.ql b/python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpack.ql
new file mode 100644
index 00000000000..eb78b13cd0a
--- /dev/null
+++ b/python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpack.ql
@@ -0,0 +1,56 @@
+/**
+ * @name Arbitrary file write during a remotely downloaded tarball extraction
+ * @description Extracting files from a malicious tarball using `shutil.unpack_archive()` without validating
+ * that the destination file path is within the destination directory can cause files outside
+ * the destination directory to be overwritten. More precisely, if the tarball comes from a remote location.
+ * @kind path-problem
+ * @id py/unsafe-unpacking
+ * @problem.severity error
+ * @security-severity 7.5
+ * @precision high
+ * @tags security
+ * external/cwe/cwe-022bis
+ */
+
+import python
+import experimental.semmle.python.Concepts
+import DataFlow::PathGraph
+import semmle.python.ApiGraphs
+import semmle.python.dataflow.new.internal.Attributes
+import semmle.python.dataflow.new.DataFlow
+import semmle.python.ApiGraphs
+import semmle.python.dataflow.new.TaintTracking
+import semmle.python.Concepts
+
+class UnsafeUnpackingConfig extends TaintTracking::Configuration {
+ UnsafeUnpackingConfig() { this = "UnsafeUnpackingConfig" }
+
+ override predicate isSource(DataFlow::Node source) {
+ // A source coming from a remote location
+ exists(Http::Client::Request request | source = request) and
+ not source.getScope().getLocation().getFile().inStdlib()
+ }
+
+ override predicate isSink(DataFlow::Node sink) {
+ // A sink capturing method calls to `unpack_archive`.
+ sink =
+ API::moduleImport("shutil").getMember("unpack_archive").getACall().getParameter(0).asSink() and
+ not sink.getScope().getLocation().getFile().inStdlib()
+ }
+
+ override predicate isAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
+ // Writing the response data to the archive
+ exists(API::CallNode call, MethodCallNode w |
+ nodeTo = call.getArg(0) and
+ call = API::builtin("open").getACall() and
+ w.getMethodName() = "write" and
+ w.getObject() = call.getReturn().getAValueReachableFromSource() and
+ nodeFrom = w.getArg(0)
+ )
+ }
+}
+
+from UnsafeUnpackingConfig config, DataFlow::PathNode source, DataFlow::PathNode sink
+where config.hasFlowPath(source, sink)
+select source.getNode(), source, sink, "Unsafe extraction from a malicious tarball, is used in a $@",
+ source.getAQlClass(), "during archive unpacking."
\ No newline at end of file
diff --git a/python/ql/src/experimental/Security/CWE-022bis/examples/HIT_UnsafeUnpack.py b/python/ql/src/experimental/Security/CWE-022bis/examples/HIT_UnsafeUnpack.py
new file mode 100644
index 00000000000..cc0f857f8c0
--- /dev/null
+++ b/python/ql/src/experimental/Security/CWE-022bis/examples/HIT_UnsafeUnpack.py
@@ -0,0 +1,12 @@
+import requests
+import shutil
+
+url = "https://www.someremote.location/tarball.tar.gz"
+response = requests.get(url, stream=True)
+
+tarpath = "/tmp/tmp456/tarball.tar.gz"
+with open(tarpath, "wb") as f:
+ f.write(response.raw.read())
+
+untarredpath = "/tmp/tmp123"
+shutil.unpack_archive(tarpath, untarredpath)
\ No newline at end of file
diff --git a/python/ql/src/experimental/Security/CWE-022bis/examples/NoHIT_UnsafeUnpack.py b/python/ql/src/experimental/Security/CWE-022bis/examples/NoHIT_UnsafeUnpack.py
new file mode 100644
index 00000000000..426bcd71481
--- /dev/null
+++ b/python/ql/src/experimental/Security/CWE-022bis/examples/NoHIT_UnsafeUnpack.py
@@ -0,0 +1,17 @@
+import requests
+import tarfile
+
+url = "https://www.someremote.location/tarball.tar.gz"
+response = requests.get(url, stream=True)
+
+tarpath = "/tmp/tmp456/tarball.tar.gz"
+with open(tarpath, "wb") as f:
+ f.write(response.raw.read())
+
+untarredpath = "/tmp/tmp123"
+with tarfile.open(tarpath) as tar:
+ for member in tar.getmembers():
+ if member.name.startswith("/") or ".." in member.name:
+ raise Exception("Path traversal identified in tarball")
+
+ tar.extract(untarredpath, member)
\ No newline at end of file
diff --git a/python/ql/test/experimental/query-tests/Security/CWE-022/UnsafeUnpack.expected b/python/ql/test/experimental/query-tests/Security/CWE-022/UnsafeUnpack.expected
new file mode 100644
index 00000000000..96d25c65a30
--- /dev/null
+++ b/python/ql/test/experimental/query-tests/Security/CWE-022/UnsafeUnpack.expected
@@ -0,0 +1,10 @@
+edges
+| UnsafeUnpack.py:5:12:5:41 | ControlFlowNode for Attribute() | UnsafeUnpack.py:9:15:9:26 | ControlFlowNode for Attribute |
+| UnsafeUnpack.py:9:15:9:26 | ControlFlowNode for Attribute | UnsafeUnpack.py:12:23:12:29 | ControlFlowNode for tarpath |
+nodes
+| UnsafeUnpack.py:5:12:5:41 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
+| UnsafeUnpack.py:9:15:9:26 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
+| UnsafeUnpack.py:12:23:12:29 | ControlFlowNode for tarpath | semmle.label | ControlFlowNode for tarpath |
+subpaths
+#select
+| UnsafeUnpack.py:5:12:5:41 | ControlFlowNode for Attribute() | UnsafeUnpack.py:5:12:5:41 | ControlFlowNode for Attribute() | UnsafeUnpack.py:12:23:12:29 | ControlFlowNode for tarpath | Unsafe extraction from a malicious tarball, is used in a $@ | PathNode | during archive unpacking. |
diff --git a/python/ql/test/experimental/query-tests/Security/CWE-022/UnsafeUnpack.py b/python/ql/test/experimental/query-tests/Security/CWE-022/UnsafeUnpack.py
new file mode 100644
index 00000000000..cc0f857f8c0
--- /dev/null
+++ b/python/ql/test/experimental/query-tests/Security/CWE-022/UnsafeUnpack.py
@@ -0,0 +1,12 @@
+import requests
+import shutil
+
+url = "https://www.someremote.location/tarball.tar.gz"
+response = requests.get(url, stream=True)
+
+tarpath = "/tmp/tmp456/tarball.tar.gz"
+with open(tarpath, "wb") as f:
+ f.write(response.raw.read())
+
+untarredpath = "/tmp/tmp123"
+shutil.unpack_archive(tarpath, untarredpath)
\ No newline at end of file
diff --git a/python/ql/test/experimental/query-tests/Security/CWE-022/UnsafeUnpack.qlref b/python/ql/test/experimental/query-tests/Security/CWE-022/UnsafeUnpack.qlref
new file mode 100644
index 00000000000..90e5db651a0
--- /dev/null
+++ b/python/ql/test/experimental/query-tests/Security/CWE-022/UnsafeUnpack.qlref
@@ -0,0 +1 @@
+experimental/Security/CWE-022bis/UnsafeUnpack.ql
\ No newline at end of file
From 054c06be6579ef2338f900df58539a29c8a3fc75 Mon Sep 17 00:00:00 2001
From: ALJI Mohamed
Date: Tue, 6 Dec 2022 02:51:07 +0100
Subject: [PATCH 077/415] Update UnsafeUnpack.ql
---
.../Security/CWE-022bis/UnsafeUnpack.ql | 45 ++++++++++---------
1 file changed, 25 insertions(+), 20 deletions(-)
diff --git a/python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpack.ql b/python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpack.ql
index eb78b13cd0a..c80dda11cea 100644
--- a/python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpack.ql
+++ b/python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpack.ql
@@ -8,49 +8,54 @@
* @problem.severity error
* @security-severity 7.5
* @precision high
- * @tags security
+ * @tags securityimport semmle.python.dataflow.TaintTracking
* external/cwe/cwe-022bis
*/
import python
-import experimental.semmle.python.Concepts
-import DataFlow::PathGraph
-import semmle.python.ApiGraphs
-import semmle.python.dataflow.new.internal.Attributes
-import semmle.python.dataflow.new.DataFlow
-import semmle.python.ApiGraphs
-import semmle.python.dataflow.new.TaintTracking
import semmle.python.Concepts
+import semmle.python.dataflow.new.internal.DataFlowPublic
+import semmle.python.ApiGraphs
+import DataFlow::PathGraph
+import semmle.python.dataflow.new.TaintTracking
class UnsafeUnpackingConfig extends TaintTracking::Configuration {
UnsafeUnpackingConfig() { this = "UnsafeUnpackingConfig" }
override predicate isSource(DataFlow::Node source) {
// A source coming from a remote location
- exists(Http::Client::Request request | source = request) and
- not source.getScope().getLocation().getFile().inStdlib()
+ exists(Http::Client::Request request | source = request)
}
override predicate isSink(DataFlow::Node sink) {
// A sink capturing method calls to `unpack_archive`.
- sink =
- API::moduleImport("shutil").getMember("unpack_archive").getACall().getParameter(0).asSink() and
- not sink.getScope().getLocation().getFile().inStdlib()
+ sink = API::moduleImport("shutil").getMember("unpack_archive").getACall().getArg(0)
}
override predicate isAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
// Writing the response data to the archive
- exists(API::CallNode call, MethodCallNode w |
- nodeTo = call.getArg(0) and
+ (exists(API::CallNode call, MethodCallNode mc, Node f |
+ mc.getMethodName() = "write" and
+ f = mc.getObject() and
+ nodeTo = mc.getArg(0) and
call = API::builtin("open").getACall() and
- w.getMethodName() = "write" and
- w.getObject() = call.getReturn().getAValueReachableFromSource() and
- nodeFrom = w.getArg(0)
+ call.flowsTo(f) and
+ nodeFrom = call.getArg(0)
)
+ or
+ // Reading the response
+ exists(MethodCallNode mc |
+ nodeFrom = mc.getObject() and
+ mc.getMethodName() = "read" and
+ nodeTo = mc
+ )
+ or
+ // Accessing the name
+ exists(AttrRead ar | ar.accesses(nodeFrom, "name") and nodeTo = ar))
}
}
from UnsafeUnpackingConfig config, DataFlow::PathNode source, DataFlow::PathNode sink
where config.hasFlowPath(source, sink)
-select source.getNode(), source, sink, "Unsafe extraction from a malicious tarball, is used in a $@",
- source.getAQlClass(), "during archive unpacking."
\ No newline at end of file
+select sink.getNode(), source, sink,
+ "Unsafe extraction from a malicious tarball retrieved from a remote location."
From a5849eb9b0cd434a14dfb849adad7c7ca194ba52 Mon Sep 17 00:00:00 2001
From: ALJI Mohamed
Date: Tue, 6 Dec 2022 14:00:08 +0100
Subject: [PATCH 078/415] Improved the additional taint step using
InstanceSource
---
.../Security/CWE-022bis/UnsafeUnpack.ql | 36 ++++++++++---------
1 file changed, 19 insertions(+), 17 deletions(-)
diff --git a/python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpack.ql b/python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpack.ql
index c80dda11cea..bd3d17ff71b 100644
--- a/python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpack.ql
+++ b/python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpack.ql
@@ -18,6 +18,7 @@ import semmle.python.dataflow.new.internal.DataFlowPublic
import semmle.python.ApiGraphs
import DataFlow::PathGraph
import semmle.python.dataflow.new.TaintTracking
+import semmle.python.frameworks.Stdlib
class UnsafeUnpackingConfig extends TaintTracking::Configuration {
UnsafeUnpackingConfig() { this = "UnsafeUnpackingConfig" }
@@ -34,24 +35,25 @@ class UnsafeUnpackingConfig extends TaintTracking::Configuration {
override predicate isAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
// Writing the response data to the archive
- (exists(API::CallNode call, MethodCallNode mc, Node f |
- mc.getMethodName() = "write" and
- f = mc.getObject() and
- nodeTo = mc.getArg(0) and
- call = API::builtin("open").getACall() and
- call.flowsTo(f) and
- nodeFrom = call.getArg(0)
+ (
+ exists(Stdlib::FileLikeObject::InstanceSource is, Node f, MethodCallNode mc |
+ is.flowsTo(f) and
+ mc.getMethodName() = "write" and
+ f = mc.getObject() and
+ nodeFrom = mc.getArg(0) and
+ nodeTo = is.(CallCfgNode).getArg(0)
+ )
+ or
+ // Reading the response
+ exists(MethodCallNode mc |
+ nodeFrom = mc.getObject() and
+ mc.getMethodName() = "read" and
+ nodeTo = mc
+ )
+ or
+ // Accessing the name
+ exists(AttrRead ar | ar.accesses(nodeFrom, "name") and nodeTo = ar)
)
- or
- // Reading the response
- exists(MethodCallNode mc |
- nodeFrom = mc.getObject() and
- mc.getMethodName() = "read" and
- nodeTo = mc
- )
- or
- // Accessing the name
- exists(AttrRead ar | ar.accesses(nodeFrom, "name") and nodeTo = ar))
}
}
From c22c0b502940478b5434eab1c43895a691c22f59 Mon Sep 17 00:00:00 2001
From: Sim4n6
Date: Tue, 6 Dec 2022 14:39:16 +0100
Subject: [PATCH 079/415] Update
python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpack.qhelp
Co-authored-by: intrigus-lgtm <60750685+intrigus-lgtm@users.noreply.github.com>
---
.../ql/src/experimental/Security/CWE-022bis/UnsafeUnpack.qhelp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpack.qhelp b/python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpack.qhelp
index c1115e819b9..d1fd1f4f414 100644
--- a/python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpack.qhelp
+++ b/python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpack.qhelp
@@ -40,7 +40,7 @@ In this example an archive is extracted without validating file paths.
To fix this vulnerability, we need to call the function tarfile.extract()
- on each member after verifying that it does not contain either `..` or startswith `/`.
+ on each member after verifying that it does not contain either .. or startswith /.
From 9a60202de62da63e842b35eddbef838c4de08ca1 Mon Sep 17 00:00:00 2001
From: Sim4n6
Date: Tue, 6 Dec 2022 14:40:35 +0100
Subject: [PATCH 080/415] Update
python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpack.qhelp
Co-authored-by: intrigus-lgtm <60750685+intrigus-lgtm@users.noreply.github.com>
---
.../ql/src/experimental/Security/CWE-022bis/UnsafeUnpack.qhelp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpack.qhelp b/python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpack.qhelp
index d1fd1f4f414..cc42d88de1d 100644
--- a/python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpack.qhelp
+++ b/python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpack.qhelp
@@ -5,7 +5,7 @@
Extracting files from a malicious tarball without validating that the destination file path
-is within the destination directory using `shutil.unpack_archive()` can cause files outside the
+is within the destination directory using shutil.unpack_archive() can cause files outside the
destination directory to be overwritten, due to the possible presence of directory traversal elements
(..) in archive path names.
From 58570b4d2c2b09dae1558b1f6bee2c77d0ffcdbb Mon Sep 17 00:00:00 2001
From: Sim4n6
Date: Tue, 6 Dec 2022 14:40:48 +0100
Subject: [PATCH 081/415] Update
python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpack.ql
Co-authored-by: intrigus-lgtm <60750685+intrigus-lgtm@users.noreply.github.com>
---
python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpack.ql | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpack.ql b/python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpack.ql
index bd3d17ff71b..4c57e087dd3 100644
--- a/python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpack.ql
+++ b/python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpack.ql
@@ -8,7 +8,7 @@
* @problem.severity error
* @security-severity 7.5
* @precision high
- * @tags securityimport semmle.python.dataflow.TaintTracking
+ * @tags security
* external/cwe/cwe-022bis
*/
From 4896e62117c20dd2f5737a80f0e1cdd21401f8c4 Mon Sep 17 00:00:00 2001
From: ALJI Mohamed
Date: Tue, 6 Dec 2022 14:44:52 +0100
Subject: [PATCH 082/415] Use of more generic terms
---
.../src/experimental/Security/CWE-022bis/UnsafeUnpack.qhelp | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpack.qhelp b/python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpack.qhelp
index cc42d88de1d..1219bbe43bc 100644
--- a/python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpack.qhelp
+++ b/python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpack.qhelp
@@ -17,9 +17,9 @@ the file may be written to an unexpected location. This can result in sensitive
revealed or deleted, or an attacker being able to influence behavior by modifying unexpected
files.
-For example, if a tarball contains a file entry ../sim4n6.txt, and the tarball
+
For example, if a tarball contains a file entry ../sneaky-file.txt, and the tarball
is extracted to the directory /tmp/tmp123, then naively combining the paths would result
-in an output file path of /tmp/tmp123/../sim4n6.txt, which would cause the file to be
+in an output file path of /tmp/tmp123/../sneaky-file.txt, which would cause the file to be
written to /tmp/.
From 2801b8495aefcdcf7710bfcd5a5a3748c6da0145 Mon Sep 17 00:00:00 2001
From: ALJI Mohamed
Date: Tue, 6 Dec 2022 14:50:47 +0100
Subject: [PATCH 083/415] A fix of the tag name
---
python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpack.ql | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpack.ql b/python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpack.ql
index 4c57e087dd3..d550b2a4a37 100644
--- a/python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpack.ql
+++ b/python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpack.ql
@@ -9,7 +9,7 @@
* @security-severity 7.5
* @precision high
* @tags security
- * external/cwe/cwe-022bis
+ * external/cwe/cwe-022
*/
import python
From 9336f4f1a2db7d557f398a229985505870697a91 Mon Sep 17 00:00:00 2001
From: ALJI Mohamed
Date: Thu, 8 Dec 2022 12:26:59 +0100
Subject: [PATCH 084/415] Considering the use of contextlib.closing() method
---
.../Security/CWE-022bis/UnsafeUnpack.ql | 11 ++++-
.../Security/CWE-022/UnsafeUnpack.expected | 6 ++-
.../Security/CWE-022/UnsafeUnpack.py | 46 ++++++++++++++++++-
3 files changed, 59 insertions(+), 4 deletions(-)
diff --git a/python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpack.ql b/python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpack.ql
index d550b2a4a37..e793e670771 100644
--- a/python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpack.ql
+++ b/python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpack.ql
@@ -34,8 +34,8 @@ class UnsafeUnpackingConfig extends TaintTracking::Configuration {
}
override predicate isAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
- // Writing the response data to the archive
(
+ // Writing the response data to the archive
exists(Stdlib::FileLikeObject::InstanceSource is, Node f, MethodCallNode mc |
is.flowsTo(f) and
mc.getMethodName() = "write" and
@@ -48,11 +48,18 @@ class UnsafeUnpackingConfig extends TaintTracking::Configuration {
exists(MethodCallNode mc |
nodeFrom = mc.getObject() and
mc.getMethodName() = "read" and
- nodeTo = mc
+ mc.flowsTo(nodeTo)
)
or
// Accessing the name
exists(AttrRead ar | ar.accesses(nodeFrom, "name") and nodeTo = ar)
+ or
+ // Considering closing use
+ exists(API::Node closing |
+ closing = API::moduleImport("contextlib").getMember("closing") and
+ closing.getACall().flowsTo(nodeTo) and
+ nodeFrom = closing.getACall().getArg(0)
+ )
)
}
}
diff --git a/python/ql/test/experimental/query-tests/Security/CWE-022/UnsafeUnpack.expected b/python/ql/test/experimental/query-tests/Security/CWE-022/UnsafeUnpack.expected
index 96d25c65a30..b1e93bf3ab2 100644
--- a/python/ql/test/experimental/query-tests/Security/CWE-022/UnsafeUnpack.expected
+++ b/python/ql/test/experimental/query-tests/Security/CWE-022/UnsafeUnpack.expected
@@ -1,10 +1,14 @@
edges
| UnsafeUnpack.py:5:12:5:41 | ControlFlowNode for Attribute() | UnsafeUnpack.py:9:15:9:26 | ControlFlowNode for Attribute |
| UnsafeUnpack.py:9:15:9:26 | ControlFlowNode for Attribute | UnsafeUnpack.py:12:23:12:29 | ControlFlowNode for tarpath |
+| UnsafeUnpack.py:36:24:36:43 | ControlFlowNode for Attribute() | UnsafeUnpack.py:55:31:55:37 | ControlFlowNode for to_path |
nodes
| UnsafeUnpack.py:5:12:5:41 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
| UnsafeUnpack.py:9:15:9:26 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
| UnsafeUnpack.py:12:23:12:29 | ControlFlowNode for tarpath | semmle.label | ControlFlowNode for tarpath |
+| UnsafeUnpack.py:36:24:36:43 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
+| UnsafeUnpack.py:55:31:55:37 | ControlFlowNode for to_path | semmle.label | ControlFlowNode for to_path |
subpaths
#select
-| UnsafeUnpack.py:5:12:5:41 | ControlFlowNode for Attribute() | UnsafeUnpack.py:5:12:5:41 | ControlFlowNode for Attribute() | UnsafeUnpack.py:12:23:12:29 | ControlFlowNode for tarpath | Unsafe extraction from a malicious tarball, is used in a $@ | PathNode | during archive unpacking. |
+| UnsafeUnpack.py:12:23:12:29 | ControlFlowNode for tarpath | UnsafeUnpack.py:5:12:5:41 | ControlFlowNode for Attribute() | UnsafeUnpack.py:12:23:12:29 | ControlFlowNode for tarpath | Unsafe extraction from a malicious tarball retrieved from a remote location. |
+| UnsafeUnpack.py:55:31:55:37 | ControlFlowNode for to_path | UnsafeUnpack.py:36:24:36:43 | ControlFlowNode for Attribute() | UnsafeUnpack.py:55:31:55:37 | ControlFlowNode for to_path | Unsafe extraction from a malicious tarball retrieved from a remote location. |
diff --git a/python/ql/test/experimental/query-tests/Security/CWE-022/UnsafeUnpack.py b/python/ql/test/experimental/query-tests/Security/CWE-022/UnsafeUnpack.py
index cc0f857f8c0..c7820e52b04 100644
--- a/python/ql/test/experimental/query-tests/Security/CWE-022/UnsafeUnpack.py
+++ b/python/ql/test/experimental/query-tests/Security/CWE-022/UnsafeUnpack.py
@@ -9,4 +9,48 @@ with open(tarpath, "wb") as f:
f.write(response.raw.read())
untarredpath = "/tmp/tmp123"
-shutil.unpack_archive(tarpath, untarredpath)
\ No newline at end of file
+shutil.unpack_archive(tarpath, untarredpath)
+
+
+import tempfile
+import os
+from urllib import request
+import contextlib
+import shutil
+
+unpack = True
+to_path = "/tmp/tmp123"
+uri = "https://www.goog.com/zzz.tar.gz"
+scheme = "https"
+
+with tempfile.TemporaryDirectory() as temp_dir:
+ if unpack and (str(uri).endswith("zip") or str(uri).endswith("tar.gz")):
+ unpack_path = to_path
+ to_path = temp_dir
+ else:
+ unpack_path = None
+ if scheme in ["http", "https", "ftp"]:
+ if os.path.isdir(to_path):
+ to_path = os.path.join(to_path, os.path.basename(uri))
+ url = uri
+ url_response = request.urlopen(url)
+ with contextlib.closing(url_response) as fp:
+ with open(to_path, "wb") as out_file:
+ block_size = DEFAULT_BUFFER_SIZE * 8
+ while True:
+ block = fp.read(block_size)
+ if not block:
+ break
+ out_file.write(block)
+ else:
+ if scheme == "oci" and not storage_options:
+ storage_options = default_signer()
+ fs = fsspec.filesystem(scheme, **storage_options)
+ if os.path.isdir(to_path):
+ to_path = os.path.join(
+ to_path, os.path.basename(str(uri).rstrip("/"))
+ )
+ fs.get(uri, to_path, recursive=True)
+ if unpack_path:
+ shutil.unpack_archive(to_path, unpack_path)
+ to_path = unpack_path
From 545aab0e07d5ee9cbfdb56b16e057db59fecd9e7 Mon Sep 17 00:00:00 2001
From: ALJI Mohamed
Date: Fri, 9 Dec 2022 15:54:43 +0100
Subject: [PATCH 085/415] tarball path provided using CLI argument (source)
---
.../Security/CWE-022bis/UnsafeUnpack.ql | 17 +++++++++++++----
1 file changed, 13 insertions(+), 4 deletions(-)
diff --git a/python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpack.ql b/python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpack.ql
index e793e670771..33ecb3d5dc2 100644
--- a/python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpack.ql
+++ b/python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpack.ql
@@ -1,8 +1,9 @@
/**
- * @name Arbitrary file write during a remotely downloaded tarball extraction
- * @description Extracting files from a malicious tarball using `shutil.unpack_archive()` without validating
+ * @name Arbitrary file write during a tarball extraction from user controlled source
+ * @description Extracting files from a potentially malicious tarball using `shutil.unpack_archive()` without validating
* that the destination file path is within the destination directory can cause files outside
- * the destination directory to be overwritten. More precisely, if the tarball comes from a remote location.
+ * the destination directory to be overwritten. More precisely, if the tarball comes from a user controlled
+ * location either a remote one or cli argument.
* @kind path-problem
* @id py/unsafe-unpacking
* @problem.severity error
@@ -26,6 +27,14 @@ class UnsafeUnpackingConfig extends TaintTracking::Configuration {
override predicate isSource(DataFlow::Node source) {
// A source coming from a remote location
exists(Http::Client::Request request | source = request)
+ or
+ // A source coming from a CLI argparse module
+ exists(Node o, API::Node ap, MethodCallNode args |
+ ap = API::moduleImport("argparse").getMember("ArgumentParser").getACall().getReturn() and
+ args = ap.getMember("parse_args").getACall() and
+ args.flowsTo(o) and
+ source.(AttrRead).accesses(o, any(string s))
+ )
}
override predicate isSink(DataFlow::Node sink) {
@@ -54,7 +63,7 @@ class UnsafeUnpackingConfig extends TaintTracking::Configuration {
// Accessing the name
exists(AttrRead ar | ar.accesses(nodeFrom, "name") and nodeTo = ar)
or
- // Considering closing use
+ // Considering the use of closing()
exists(API::Node closing |
closing = API::moduleImport("contextlib").getMember("closing") and
closing.getACall().flowsTo(nodeTo) and
From eff132512c3bae3140c87e477d5d62c89d0a7db8 Mon Sep 17 00:00:00 2001
From: ALJI Mohamed
Date: Sat, 10 Dec 2022 08:15:42 +0100
Subject: [PATCH 086/415] Copying the response data to the archive
---
.../Security/CWE-022bis/UnsafeUnpack.ql | 13 +++++++++++--
1 file changed, 11 insertions(+), 2 deletions(-)
diff --git a/python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpack.ql b/python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpack.ql
index 33ecb3d5dc2..cb87b16432e 100644
--- a/python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpack.ql
+++ b/python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpack.ql
@@ -53,6 +53,15 @@ class UnsafeUnpackingConfig extends TaintTracking::Configuration {
nodeTo = is.(CallCfgNode).getArg(0)
)
or
+ // Copying the response data to the archive
+ exists(Stdlib::FileLikeObject::InstanceSource is, Node f, MethodCallNode mc |
+ is.flowsTo(f) and
+ mc = API::moduleImport("shutil").getMember("copyfileobj").getACall() and
+ f = mc.getArg(1) and
+ nodeFrom = mc.getArg(0) and
+ nodeTo = is.(CallCfgNode).getArg(0)
+ )
+ or
// Reading the response
exists(MethodCallNode mc |
nodeFrom = mc.getObject() and
@@ -60,8 +69,8 @@ class UnsafeUnpackingConfig extends TaintTracking::Configuration {
mc.flowsTo(nodeTo)
)
or
- // Accessing the name
- exists(AttrRead ar | ar.accesses(nodeFrom, "name") and nodeTo = ar)
+ // Accessing the name or raw content
+ exists(AttrRead ar | ar.accesses(nodeFrom, ["name","raw"]) and nodeTo = ar)
or
// Considering the use of closing()
exists(API::Node closing |
From b19452467df781552790be5fc5bdcce2acacc5f7 Mon Sep 17 00:00:00 2001
From: ALJI Mohamed
Date: Sat, 10 Dec 2022 21:59:14 +0100
Subject: [PATCH 087/415] read by chunks as additional step
---
.../Security/CWE-022bis/UnsafeUnpack.ql | 32 +++++++++++++------
1 file changed, 22 insertions(+), 10 deletions(-)
diff --git a/python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpack.ql b/python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpack.ql
index cb87b16432e..159a19b4bee 100644
--- a/python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpack.ql
+++ b/python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpack.ql
@@ -1,8 +1,8 @@
/**
- * @name Arbitrary file write during a tarball extraction from user controlled source
+ * @name Arbitrary file write during a tarball extraction from a user controlled source
* @description Extracting files from a potentially malicious tarball using `shutil.unpack_archive()` without validating
* that the destination file path is within the destination directory can cause files outside
- * the destination directory to be overwritten. More precisely, if the tarball comes from a user controlled
+ * the destination directory to be overwritten. More precisely, if the tarball comes from a user controlled
* location either a remote one or cli argument.
* @kind path-problem
* @id py/unsafe-unpacking
@@ -28,11 +28,11 @@ class UnsafeUnpackingConfig extends TaintTracking::Configuration {
// A source coming from a remote location
exists(Http::Client::Request request | source = request)
or
- // A source coming from a CLI argparse module
+ //A source coming from a CLI argparse module
exists(Node o, API::Node ap, MethodCallNode args |
ap = API::moduleImport("argparse").getMember("ArgumentParser").getACall().getReturn() and
args = ap.getMember("parse_args").getACall() and
- args.flowsTo(o) and
+ args.flowsTo(o) and
source.(AttrRead).accesses(o, any(string s))
)
}
@@ -57,7 +57,7 @@ class UnsafeUnpackingConfig extends TaintTracking::Configuration {
exists(Stdlib::FileLikeObject::InstanceSource is, Node f, MethodCallNode mc |
is.flowsTo(f) and
mc = API::moduleImport("shutil").getMember("copyfileobj").getACall() and
- f = mc.getArg(1) and
+ f = mc.getArg(1) and
nodeFrom = mc.getArg(0) and
nodeTo = is.(CallCfgNode).getArg(0)
)
@@ -70,13 +70,25 @@ class UnsafeUnpackingConfig extends TaintTracking::Configuration {
)
or
// Accessing the name or raw content
- exists(AttrRead ar | ar.accesses(nodeFrom, ["name","raw"]) and nodeTo = ar)
+ exists(AttrRead ar | ar.accesses(nodeFrom, ["name", "raw"]) and ar.flowsTo(nodeTo))
+ or
+ //Use of join of filename
+ exists(API::CallNode mcn |
+ mcn = API::moduleImport("os").getMember("path").getMember("join").getACall() and
+ nodeFrom = mcn.getArg(1) and
+ mcn.flowsTo(nodeTo)
+ )
+ or
+ // Read by chunks
+ exists(MethodCallNode mc |
+ nodeFrom = mc.getObject() and mc.getMethodName() = "chunks" and mc.flowsTo(nodeTo)
+ )
or
// Considering the use of closing()
- exists(API::Node closing |
- closing = API::moduleImport("contextlib").getMember("closing") and
- closing.getACall().flowsTo(nodeTo) and
- nodeFrom = closing.getACall().getArg(0)
+ exists(API::CallNode closing |
+ closing = API::moduleImport("contextlib").getMember("closing").getACall() and
+ closing.flowsTo(nodeTo) and
+ nodeFrom = closing.getArg(0)
)
)
}
From 2f68b54b27a6e6ba51e27351715823bde362f176 Mon Sep 17 00:00:00 2001
From: ALJI Mohamed
Date: Mon, 12 Dec 2022 19:46:34 +0100
Subject: [PATCH 088/415] A simple download_file() call from maybe boto3
---
python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpack.ql | 3 +++
1 file changed, 3 insertions(+)
diff --git a/python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpack.ql b/python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpack.ql
index 159a19b4bee..17604c00426 100644
--- a/python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpack.ql
+++ b/python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpack.ql
@@ -35,6 +35,9 @@ class UnsafeUnpackingConfig extends TaintTracking::Configuration {
args.flowsTo(o) and
source.(AttrRead).accesses(o, any(string s))
)
+ or
+ // A source catching a S3 filename download
+ exists(API::Node s3 | source = s3.getMember("download_file").getACall().getArg(2))
}
override predicate isSink(DataFlow::Node sink) {
From 54109b8ea7ce7b413f2d672b52e2ccfde8795e72 Mon Sep 17 00:00:00 2001
From: ALJI Mohamed
Date: Tue, 13 Dec 2022 15:34:01 +0100
Subject: [PATCH 089/415] Add source wget.download
---
.../ql/src/experimental/Security/CWE-022bis/UnsafeUnpack.ql | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpack.ql b/python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpack.ql
index 17604c00426..f8dfb78fced 100644
--- a/python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpack.ql
+++ b/python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpack.ql
@@ -38,6 +38,11 @@ class UnsafeUnpackingConfig extends TaintTracking::Configuration {
or
// A source catching a S3 filename download
exists(API::Node s3 | source = s3.getMember("download_file").getACall().getArg(2))
+ or
+ // A source download a file using wget
+ exists(MethodCallNode mcn |
+ mcn = API::moduleImport("wget").getMember("download").getACall() and source = mcn.getArg(1)
+ )
}
override predicate isSink(DataFlow::Node sink) {
From 4376870a518262146c3c2446cd2512be2038b577 Mon Sep 17 00:00:00 2001
From: Sim4n6
Date: Thu, 15 Dec 2022 23:39:02 +0100
Subject: [PATCH 090/415] An uploded file is considered a source
---
.../Security/CWE-022bis/UnsafeUnpack.ql | 122 +++++++++++-------
1 file changed, 75 insertions(+), 47 deletions(-)
diff --git a/python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpack.ql b/python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpack.ql
index f8dfb78fced..1ebdf48397c 100644
--- a/python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpack.ql
+++ b/python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpack.ql
@@ -43,6 +43,21 @@ class UnsafeUnpackingConfig extends TaintTracking::Configuration {
exists(MethodCallNode mcn |
mcn = API::moduleImport("wget").getMember("download").getACall() and source = mcn.getArg(1)
)
+ or
+ // catch the uploaded files as a source
+ exists(Subscript s, Attribute at |
+ at = s.getObject() and at.getAttr() = "FILES" and source.asExpr() = s
+ )
+ or
+ exists(Node obj, AttrRead ar |
+ ar.getAMethodCall("get").flowsTo(source) and
+ ar.accesses(obj, "FILES")
+ )
+ or
+ exists(Node obj, AttrRead ar |
+ ar.getAMethodCall("getlist").flowsTo(source) and
+ ar.accesses(obj, "FILES")
+ )
}
override predicate isSink(DataFlow::Node sink) {
@@ -51,53 +66,66 @@ class UnsafeUnpackingConfig extends TaintTracking::Configuration {
}
override predicate isAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
- (
- // Writing the response data to the archive
- exists(Stdlib::FileLikeObject::InstanceSource is, Node f, MethodCallNode mc |
- is.flowsTo(f) and
- mc.getMethodName() = "write" and
- f = mc.getObject() and
- nodeFrom = mc.getArg(0) and
- nodeTo = is.(CallCfgNode).getArg(0)
- )
- or
- // Copying the response data to the archive
- exists(Stdlib::FileLikeObject::InstanceSource is, Node f, MethodCallNode mc |
- is.flowsTo(f) and
- mc = API::moduleImport("shutil").getMember("copyfileobj").getACall() and
- f = mc.getArg(1) and
- nodeFrom = mc.getArg(0) and
- nodeTo = is.(CallCfgNode).getArg(0)
- )
- or
- // Reading the response
- exists(MethodCallNode mc |
- nodeFrom = mc.getObject() and
- mc.getMethodName() = "read" and
- mc.flowsTo(nodeTo)
- )
- or
- // Accessing the name or raw content
- exists(AttrRead ar | ar.accesses(nodeFrom, ["name", "raw"]) and ar.flowsTo(nodeTo))
- or
- //Use of join of filename
- exists(API::CallNode mcn |
- mcn = API::moduleImport("os").getMember("path").getMember("join").getACall() and
- nodeFrom = mcn.getArg(1) and
- mcn.flowsTo(nodeTo)
- )
- or
- // Read by chunks
- exists(MethodCallNode mc |
- nodeFrom = mc.getObject() and mc.getMethodName() = "chunks" and mc.flowsTo(nodeTo)
- )
- or
- // Considering the use of closing()
- exists(API::CallNode closing |
- closing = API::moduleImport("contextlib").getMember("closing").getACall() and
- closing.flowsTo(nodeTo) and
- nodeFrom = closing.getArg(0)
- )
+ // Writing the response data to the archive
+ exists(Stdlib::FileLikeObject::InstanceSource is, Node f, MethodCallNode mc |
+ is.flowsTo(f) and
+ mc.getMethodName() = "write" and
+ f = mc.getObject() and
+ nodeFrom = mc.getArg(0) and
+ nodeTo = is.(CallCfgNode).getArg(0)
+ )
+ or
+ // Copying the response data to the archive
+ exists(Stdlib::FileLikeObject::InstanceSource is, Node f, MethodCallNode mc |
+ is.flowsTo(f) and
+ mc = API::moduleImport("shutil").getMember("copyfileobj").getACall() and
+ f = mc.getArg(1) and
+ nodeFrom = mc.getArg(0) and
+ nodeTo = is.(CallCfgNode).getArg(0)
+ )
+ or
+ // Reading the response
+ exists(MethodCallNode mc |
+ nodeFrom = mc.getObject() and
+ mc.getMethodName() = "read" and
+ mc.flowsTo(nodeTo)
+ )
+ or
+ // Accessing the name or raw content
+ exists(AttrRead ar | ar.accesses(nodeFrom, ["name", "raw"]) and ar.flowsTo(nodeTo))
+ or
+ //Use of join of filename
+ exists(API::CallNode mcn |
+ mcn = API::moduleImport("os").getMember("path").getMember("join").getACall() and
+ nodeFrom = mcn.getArg(1) and
+ mcn.flowsTo(nodeTo)
+ )
+ or
+ // Read by chunks
+ exists(MethodCallNode mc |
+ nodeFrom = mc.getObject() and mc.getMethodName() = "chunks" and mc.flowsTo(nodeTo)
+ )
+ or
+ // Considering the use of closing()
+ exists(API::CallNode closing |
+ closing = API::moduleImport("contextlib").getMember("closing").getACall() and
+ closing.flowsTo(nodeTo) and
+ nodeFrom = closing.getArg(0)
+ )
+ or
+ // Considering the use of "fs"
+ exists(API::CallNode fs, MethodCallNode mcn |
+ fs =
+ API::moduleImport("django")
+ .getMember("core")
+ .getMember("files")
+ .getMember("storage")
+ .getMember("FileSystemStorage")
+ .getACall() and
+ fs.flowsTo(mcn.getObject()) and
+ mcn.getMethodName() = ["save", "path"] and
+ nodeFrom = mcn.getArg(0) and
+ nodeTo = mcn
)
}
}
From e5e5d84361a8bef27fadbf4660271cf43af2c6be Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Mon, 16 Jan 2023 13:44:24 +0100
Subject: [PATCH 091/415] Python: Add change-note
---
python/ql/lib/change-notes/2023-01-16-new-call-graph.md | 4 ++++
1 file changed, 4 insertions(+)
create mode 100644 python/ql/lib/change-notes/2023-01-16-new-call-graph.md
diff --git a/python/ql/lib/change-notes/2023-01-16-new-call-graph.md b/python/ql/lib/change-notes/2023-01-16-new-call-graph.md
new file mode 100644
index 00000000000..1dbfd05a80f
--- /dev/null
+++ b/python/ql/lib/change-notes/2023-01-16-new-call-graph.md
@@ -0,0 +1,4 @@
+---
+category: majorAnalysis
+---
+* We use a new analysis for the call-graph (determining which function is called). This can lead to changed results. In most cases this is much more accurate than the old call-graph that was based on points-to, but we do loose a few valid edges in the call-graph, especially around methods that are not defined inside its' class.
From dfbb744a7a0c4045400e2e8e0e04413831758bff Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Mon, 16 Jan 2023 14:04:25 +0100
Subject: [PATCH 092/415] Python: Add comment on `*args` argument handling
---
.../semmle/python/dataflow/new/internal/DataFlowDispatch.qll | 3 +++
1 file changed, 3 insertions(+)
diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
index f2e2ace7fda..7314d5dad14 100644
--- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
@@ -1090,6 +1090,9 @@ predicate normalCallArg(CallNode call, Node arg, ArgumentPosition apos) {
exists(int index |
apos.isStarArgs(index) and
arg.asCfgNode() = call.getStarArg() and
+ // since `CallNode.getArg` doesn't include `*args`, we need to drop to the AST level
+ // to get the index. Notice that we only use the AST for getting the index, so we
+ // don't need to check for dominance in regards to splitting.
call.getStarArg().getNode() = call.getNode().getPositionalArg(index).(Starred).getValue()
)
or
From a1513cc1d39d73dd45af6a7be8d0c9671b208f95 Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Mon, 16 Jan 2023 14:07:11 +0100
Subject: [PATCH 093/415] Python: Minor QLDoc fix
---
.../semmle/python/dataflow/new/internal/DataFlowDispatch.qll | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
index 7314d5dad14..96272a85791 100644
--- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
@@ -350,7 +350,7 @@ class DataFlowPlainFunction extends DataFlowFunction {
DataFlowPlainFunction() { not this instanceof DataFlowMethod }
}
-/** A method, except staticmethods. */
+/** A method. */
class DataFlowMethod extends DataFlowFunction {
Class cls;
From 3fcb8f3f4b26595b8b15db0a9a250da2d8815fce Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Mon, 16 Jan 2023 14:11:13 +0100
Subject: [PATCH 094/415] Python: Accept suggestions from code-review
---
.../new/internal/DataFlowDispatch.qll | 36 +++++++++----------
1 file changed, 18 insertions(+), 18 deletions(-)
diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
index 96272a85791..b01f3453201 100644
--- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
@@ -42,13 +42,13 @@ private import semmle.python.internal.CachedStages
newtype TParameterPosition =
/** Used for `self` in methods, and `cls` in classmethods. */
TSelfParameterPosition() or
- TPositionalParameterPosition(int pos) {
- pos = any(Parameter p).getPosition()
+ TPositionalParameterPosition(int index) {
+ index = any(Parameter p).getPosition()
or
// since synthetic parameters are made for a synthetic summary callable, based on
// what Argument positions they have flow for, we need to make sure we have such
// parameter positions available.
- FlowSummaryImplSpecific::ParsePositions::isParsedPositionalArgumentPosition(_, pos)
+ FlowSummaryImplSpecific::ParsePositions::isParsedPositionalArgumentPosition(_, index)
} or
TKeywordParameterPosition(string name) {
name = any(Parameter p).getName()
@@ -56,15 +56,15 @@ newtype TParameterPosition =
// see comment for TPositionalParameterPosition
FlowSummaryImplSpecific::ParsePositions::isParsedKeywordArgumentPosition(_, name)
} or
- TStarArgsParameterPosition(int pos) {
+ TStarArgsParameterPosition(int index) {
// since `.getPosition` does not work for `*args`, we need *args parameter positions
// at index 1 larger than the largest positional parameter position (and 0 must be
// included as well). This is a bit of an over-approximation.
- pos = 0 or
- pos = any(Parameter p).getPosition() + 1
+ index = 0 or
+ index = any(Parameter p).getPosition() + 1
} or
- TSynthStarArgsElementParameterPosition(int pos) { exists(TStarArgsParameterPosition(pos)) } or
- TSynthLateStarArgsParameterPosition(int pos) { exists(TStarArgsParameterPosition(pos)) } or
+ TSynthStarArgsElementParameterPosition(int index) { exists(TStarArgsParameterPosition(index)) } or
+ TSynthLateStarArgsParameterPosition(int index) { exists(TStarArgsParameterPosition(index)) } or
TDictSplatParameterPosition()
/** A parameter position. */
@@ -128,13 +128,13 @@ class ParameterPosition extends TParameterPosition {
newtype TArgumentPosition =
/** Used for `self` in methods, and `cls` in classmethods. */
TSelfArgumentPosition() or
- TPositionalArgumentPosition(int pos) {
- exists(any(CallNode c).getArg(pos))
+ TPositionalArgumentPosition(int index) {
+ exists(any(CallNode c).getArg(index))
or
// since synthetic calls within a summarized callable could use a unique argument
// position, we need to ensure we make these available (these are specified as
// parameters in the flow-summary spec)
- FlowSummaryImplSpecific::ParsePositions::isParsedPositionalParameterPosition(_, pos)
+ FlowSummaryImplSpecific::ParsePositions::isParsedPositionalParameterPosition(_, index)
} or
TKeywordArgumentPosition(string name) {
exists(any(CallNode c).getArgByName(name))
@@ -142,7 +142,9 @@ newtype TArgumentPosition =
// see comment for TPositionalArgumentPosition
FlowSummaryImplSpecific::ParsePositions::isParsedKeywordParameterPosition(_, name)
} or
- TStarArgsArgumentPosition(int pos) { exists(Call c | c.getPositionalArg(pos) instanceof Starred) } or
+ TStarArgsArgumentPosition(int index) {
+ exists(Call c | c.getPositionalArg(index) instanceof Starred)
+ } or
TDictSplatArgumentPosition()
/** An argument position. */
@@ -329,11 +331,9 @@ abstract class DataFlowFunction extends DataFlowCallable, TFunction {
|
// a `*args` parameter comes after the last positional parameter. We need to take
// self parameter into account, so for
- // `def func(foo, bar, *args)` it should be index 2 (1 + max-index == 1 + 1)
- // `class A: def func(self, foo, bar, *args)` it should be index 2 (1 + max-index - 1 == 1 + 2 - 1)
- index =
- 1 + max(int positionalIndex | exists(func.getArg(positionalIndex)) | positionalIndex) -
- this.positionalOffset()
+ // `def func(foo, bar, *args)` it should be index 2 (pos-param-count == 2)
+ // `class A: def func(self, foo, bar, *args)` it should be index 2 (pos-param-count - 1 == 3 - 1)
+ index = func.getPositionalParameterCount() - this.positionalOffset()
or
// no positional argument
not exists(func.getArg(_)) and index = 0
@@ -579,8 +579,8 @@ Node clsTracker(Class classWithMethod) {
* call happened in the method `func` (either a method or a classmethod).
*/
private TypeTrackingNode superCallNoArgumentTracker(TypeTracker t, Function func) {
- not isStaticmethod(func) and
t.start() and
+ not isStaticmethod(func) and
exists(CallCfgNode call | result = call |
call = getSuperCall() and
not exists(call.getArg(_)) and
From a3b7273844807575fce4709d337137307bc716fd Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Mon, 16 Jan 2023 14:19:18 +0100
Subject: [PATCH 095/415] Python: Fix duplicated meta query id
---
python/ql/src/meta/analysis-quality/TTCallGraph.ql | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/python/ql/src/meta/analysis-quality/TTCallGraph.ql b/python/ql/src/meta/analysis-quality/TTCallGraph.ql
index 67faca55893..d6383a32eb1 100644
--- a/python/ql/src/meta/analysis-quality/TTCallGraph.ql
+++ b/python/ql/src/meta/analysis-quality/TTCallGraph.ql
@@ -2,7 +2,7 @@
* @name New call graph edge from using type-tracking instead of points-to
* @kind problem
* @problem.severity recommendation
- * @id py/meta/call-graph-new
+ * @id py/meta/type-tracking-call-graph
* @tags meta
* @precision very-low
*/
From 690a09d9b640b30d0ef293913dea906639d2fe10 Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Mon, 16 Jan 2023 20:45:44 +0100
Subject: [PATCH 096/415] Python: new-call-graph: `pragma[noinline]` =>
`pragma[nomagic]`
As suggested by @tausbn. Obviously, this needs to be performance tested.
---
.../dataflow/new/internal/DataFlowDispatch.qll | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
index b01f3453201..32b319c43d6 100644
--- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
@@ -841,7 +841,7 @@ private module MethodCalls {
* reference to the class `cls`, or to an instance of the class `cls`. The reference the
* attribute-read is made on is `self`.
*/
- pragma[noinline]
+ pragma[nomagic]
private predicate directCall(
CallNode call, Function target, string functionName, Class cls, AttrRead attr, Node self
) {
@@ -850,7 +850,7 @@ private module MethodCalls {
}
/** Extracted to give good join order */
- pragma[noinline]
+ pragma[nomagic]
private predicate directCall_join(
CallNode call, string functionName, Class cls, AttrRead attr, Node self
) {
@@ -872,7 +872,7 @@ private module MethodCalls {
* reference to an implicit `self`/`cls` argument. The reference the attribute-read is
* made on is `self`.
*/
- pragma[noinline]
+ pragma[nomagic]
private predicate callWithinMethodImplicitSelfOrCls(
CallNode call, Function target, string functionName, Class classWithMethod, AttrRead attr,
Node self
@@ -882,7 +882,7 @@ private module MethodCalls {
}
/** Extracted to give good join order */
- pragma[noinline]
+ pragma[nomagic]
private predicate callWithinMethodImplicitSelfOrCls_join(
CallNode call, string functionName, Class classWithMethod, AttrRead attr, Node self
) {
@@ -921,7 +921,7 @@ private module MethodCalls {
* The method call is found by making an attribute read `attr` with the name
* `functionName` on the return value from the `super` call.
*/
- pragma[noinline]
+ pragma[nomagic]
predicate fromSuper(
CallNode call, Function target, string functionName, Class classUsedInSuper, AttrRead attr,
Node self
@@ -931,7 +931,7 @@ private module MethodCalls {
}
/** Extracted to give good join order */
- pragma[noinline]
+ pragma[nomagic]
private predicate fromSuper_join(
CallNode call, string functionName, Class classUsedInSuper, AttrRead attr, Node self
) {
From 7c242b1409ae92d45617e93d7a170bf9dfedc490 Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Tue, 17 Jan 2023 10:25:19 +0100
Subject: [PATCH 097/415] Python: Minor QLDoc fix
Co-authored-by: Taus
---
.../semmle/python/dataflow/new/internal/DataFlowDispatch.qll | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
index 32b319c43d6..35a01380364 100644
--- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
@@ -24,7 +24,7 @@
* any special logic that requires an AST call to be made before we care to figure out
* what callable this call might end up targeting.
*
- * Specifically this means that we cannot use type-backtrackes from the function of a
+ * Specifically this means that we cannot use type-backtrackers from the function of a
* `CallNode`, since there is no `CallNode` to backtrack from for `func` in the example
* above.
*
From 4f3876f18411d43dc655411de3ea887303d60168 Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Tue, 17 Jan 2023 10:30:29 +0100
Subject: [PATCH 098/415] Python: Accept rewrite for `_join(` predicates
Co-authored-by: yoff
---
.../new/internal/DataFlowDispatch.qll | 22 +++++--------------
1 file changed, 6 insertions(+), 16 deletions(-)
diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
index 35a01380364..81950a016ad 100644
--- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
@@ -854,14 +854,9 @@ private module MethodCalls {
private predicate directCall_join(
CallNode call, string functionName, Class cls, AttrRead attr, Node self
) {
- (
- call.getFunction() = attrReadTracker(attr).asCfgNode() and
- attr.accesses(classTracker(cls), functionName)
- or
- call.getFunction() = attrReadTracker(attr).asCfgNode() and
- attr.accesses(classInstanceTracker(cls), functionName)
- ) and
- attr.accesses(self, functionName)
+ call.getFunction() = attrReadTracker(attr).asCfgNode() and
+ attr.accesses(self, functionName) and
+ self in [classTracker(cls), classInstanceTracker(cls)]
}
/**
@@ -886,14 +881,9 @@ private module MethodCalls {
private predicate callWithinMethodImplicitSelfOrCls_join(
CallNode call, string functionName, Class classWithMethod, AttrRead attr, Node self
) {
- (
- call.getFunction() = attrReadTracker(attr).asCfgNode() and
- attr.accesses(clsTracker(classWithMethod), functionName)
- or
- call.getFunction() = attrReadTracker(attr).asCfgNode() and
- attr.accesses(selfTracker(classWithMethod), functionName)
- ) and
- attr.accesses(self, functionName)
+ call.getFunction() = attrReadTracker(attr).asCfgNode() and
+ attr.accesses(self, functionName) and
+ self in [clsTracker(classWithMethod), selfTracker(classWithMethod)]
}
/**
From 700e40b11bbe6f4dfe16d4df98db2603a9c455f0 Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Tue, 17 Jan 2023 10:35:20 +0100
Subject: [PATCH 099/415] Python: Fix ql4ql
---
python/ql/src/Security/CWE-020-ExternalAPIs/ExternalAPIs.qll | 4 +---
1 file changed, 1 insertion(+), 3 deletions(-)
diff --git a/python/ql/src/Security/CWE-020-ExternalAPIs/ExternalAPIs.qll b/python/ql/src/Security/CWE-020-ExternalAPIs/ExternalAPIs.qll
index 285c351ccdc..9d7c153e15a 100644
--- a/python/ql/src/Security/CWE-020-ExternalAPIs/ExternalAPIs.qll
+++ b/python/ql/src/Security/CWE-020-ExternalAPIs/ExternalAPIs.qll
@@ -158,9 +158,7 @@ class UnresolvedCall extends InterestingExternalApiCall, TUnresolvedCall {
/** A node representing data being passed to an external API through a call. */
class ExternalApiDataNode extends DataFlow::Node {
ExternalApiDataNode() {
- exists(InterestingExternalApiCall call, DataFlowPrivate::ArgumentPosition apos |
- this = call.getArgument(apos)
- ) and
+ exists(InterestingExternalApiCall call | this = call.getArgument(_)) and
// Not already modeled as a taint step
not TaintTrackingPrivate::defaultAdditionalTaintStep(this, _) and
// for `list.append(x)`, we have a additional taint step from x -> [post] list.
From 608b16c98a375e8e85456be744c13ff3f7d617ad Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Tue, 17 Jan 2023 10:56:53 +0100
Subject: [PATCH 100/415] Python: Minor adjustment in QLDoc
Co-authored-by: yoff
---
python/ql/src/Security/CWE-020-ExternalAPIs/ExternalAPIs.qll | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/python/ql/src/Security/CWE-020-ExternalAPIs/ExternalAPIs.qll b/python/ql/src/Security/CWE-020-ExternalAPIs/ExternalAPIs.qll
index 9d7c153e15a..30e68cd00c0 100644
--- a/python/ql/src/Security/CWE-020-ExternalAPIs/ExternalAPIs.qll
+++ b/python/ql/src/Security/CWE-020-ExternalAPIs/ExternalAPIs.qll
@@ -12,7 +12,7 @@ private import semmle.python.dataflow.new.internal.DataFlowPrivate as DataFlowPr
private import semmle.python.dataflow.new.internal.TaintTrackingPrivate as TaintTrackingPrivate
/**
- * An external API that is considered a "safe" from a security perspective.
+ * An external API that is considered "safe" from a security perspective.
*/
class SafeExternalApi extends Unit {
/**
From 479f019eb09597771df2c5f87055c68c6fa11714 Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Tue, 17 Jan 2023 10:40:31 +0100
Subject: [PATCH 101/415] Python: Minor rewrite removing unnecessary `exists`
Co-authored-by: Taus
---
.../semmle/python/dataflow/new/internal/DataFlowDispatch.qll | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
index 81950a016ad..644efbfbf8e 100644
--- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
@@ -702,7 +702,7 @@ Function findFunctionAccordingToMro(Class cls, string name) {
result = cls.getAMethod() and
result.getName() = name
or
- not exists(Function f | f.getName() = name and f = cls.getAMethod()) and
+ not cls.getAMethod().getName() = name and
result = findFunctionAccordingToMro(getNextClassInMro(cls), name)
}
@@ -733,7 +733,7 @@ private Function findFunctionAccordingToMroKnownStartingClass(
result.getName() = name and
cls = getADirectSuperclass*(startingClass)
or
- not exists(Function f | f.getName() = name and f = cls.getAMethod()) and
+ not cls.getAMethod().getName() = name and
result =
findFunctionAccordingToMroKnownStartingClass(getNextClassInMroKnownStartingClass(cls,
startingClass), startingClass, name)
From b6f76d784ce70d04be52216d204c0b944669a103 Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Tue, 17 Jan 2023 10:51:01 +0100
Subject: [PATCH 102/415] Python: Remove accidentally committed files
---
python/ql/test/library-tests/fuck/options | 1 -
python/ql/test/library-tests/fuck/test.py | 17 -----------------
python/ql/test/library-tests/fuck/wat.expected | 1 -
3 files changed, 19 deletions(-)
delete mode 100644 python/ql/test/library-tests/fuck/options
delete mode 100644 python/ql/test/library-tests/fuck/test.py
delete mode 100644 python/ql/test/library-tests/fuck/wat.expected
diff --git a/python/ql/test/library-tests/fuck/options b/python/ql/test/library-tests/fuck/options
deleted file mode 100644
index efa237f03c4..00000000000
--- a/python/ql/test/library-tests/fuck/options
+++ /dev/null
@@ -1 +0,0 @@
-semmle-extractor-options: --max-import-depth=0
diff --git a/python/ql/test/library-tests/fuck/test.py b/python/ql/test/library-tests/fuck/test.py
deleted file mode 100644
index 3029c8be234..00000000000
--- a/python/ql/test/library-tests/fuck/test.py
+++ /dev/null
@@ -1,17 +0,0 @@
-def my_func(arg):
- print("my_func", arg)
-
-class Foo:
- def foo(self, arg=42):
- print("Foo.foo", self, arg)
-
-
-my_func(43)
-
-import random
-if random.choice([True, False]):
- func = my_func
-else:
- func = Foo.foo
-
-func(44)
diff --git a/python/ql/test/library-tests/fuck/wat.expected b/python/ql/test/library-tests/fuck/wat.expected
deleted file mode 100644
index 2a4f078a25f..00000000000
--- a/python/ql/test/library-tests/fuck/wat.expected
+++ /dev/null
@@ -1 +0,0 @@
-| 1 |
From ae1d4decc36841ae3987bf3a30305794f16ac5ab Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Tue, 17 Jan 2023 11:01:47 +0100
Subject: [PATCH 103/415] Python: `ExternalAPIs.qll`: Swap order of classes
Co-authored-by: yoff
---
.../CWE-020-ExternalAPIs/ExternalAPIs.qll | 42 +++++++++----------
1 file changed, 21 insertions(+), 21 deletions(-)
diff --git a/python/ql/src/Security/CWE-020-ExternalAPIs/ExternalAPIs.qll b/python/ql/src/Security/CWE-020-ExternalAPIs/ExternalAPIs.qll
index 30e68cd00c0..94762ace98c 100644
--- a/python/ql/src/Security/CWE-020-ExternalAPIs/ExternalAPIs.qll
+++ b/python/ql/src/Security/CWE-020-ExternalAPIs/ExternalAPIs.qll
@@ -111,27 +111,6 @@ abstract class InterestingExternalApiCall extends TInterestingExternalApiCall {
abstract string getApiName();
}
-class ResolvedCall extends InterestingExternalApiCall, TResolvedCall {
- DataFlowPrivate::DataFlowCall dfCall;
-
- ResolvedCall() { this = TResolvedCall(dfCall) }
-
- override DataFlow::Node getArgument(DataFlowPrivate::ArgumentPosition apos) {
- result = dfCall.getArgument(apos)
- }
-
- override string toString() {
- result = "ExternalAPI:ResolvedCall: " + dfCall.getNode().getNode().toString()
- }
-
- override string getApiName() {
- exists(DataFlow::CallCfgNode call, API::Node apiNode | dfCall.getNode() = call.getNode() |
- result = apiNodeToStringRepr(apiNode) and
- apiNode.getACall() = call
- )
- }
-}
-
class UnresolvedCall extends InterestingExternalApiCall, TUnresolvedCall {
DataFlow::CallCfgNode call;
@@ -155,6 +134,27 @@ class UnresolvedCall extends InterestingExternalApiCall, TUnresolvedCall {
}
}
+class ResolvedCall extends InterestingExternalApiCall, TResolvedCall {
+ DataFlowPrivate::DataFlowCall dfCall;
+
+ ResolvedCall() { this = TResolvedCall(dfCall) }
+
+ override DataFlow::Node getArgument(DataFlowPrivate::ArgumentPosition apos) {
+ result = dfCall.getArgument(apos)
+ }
+
+ override string toString() {
+ result = "ExternalAPI:ResolvedCall: " + dfCall.getNode().getNode().toString()
+ }
+
+ override string getApiName() {
+ exists(DataFlow::CallCfgNode call, API::Node apiNode | dfCall.getNode() = call.getNode() |
+ result = apiNodeToStringRepr(apiNode) and
+ apiNode.getACall() = call
+ )
+ }
+}
+
/** A node representing data being passed to an external API through a call. */
class ExternalApiDataNode extends DataFlow::Node {
ExternalApiDataNode() {
From f8d7a367adacc625f16d00baf02e3a17beec9019 Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Tue, 17 Jan 2023 11:45:57 +0100
Subject: [PATCH 104/415] Python: Rewrite test for __add__ special method
Co-authored-by: yoff
---
.../library-tests/CallGraph/code/class_special_methods.py | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/python/ql/test/experimental/library-tests/CallGraph/code/class_special_methods.py b/python/ql/test/experimental/library-tests/CallGraph/code/class_special_methods.py
index e765f155f3c..23dcefdb852 100644
--- a/python/ql/test/experimental/library-tests/CallGraph/code/class_special_methods.py
+++ b/python/ql/test/experimental/library-tests/CallGraph/code/class_special_methods.py
@@ -38,9 +38,9 @@ b.__call__(44) # $ pt,tt=Base.__call__
print("\n! b2")
b2 = Base(2) # $ tt=Base.__init__
-# __add__ is called
-b + b2
-b + 100
+
+b + b2 # $ MISSING: tt=Base.__add__
+b + 100 # $ MISSING: tt=Base.__add__
# ========
From e3fcfd0a661d51fb4239afd1e7e9c65c27467fa5 Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Tue, 17 Jan 2023 13:53:12 +0100
Subject: [PATCH 105/415] Python: Use configuration for dataflow consistency
checks in `dataflow/exceptions`
---
.../experimental/dataflow/exceptions/dataflow-consistency.ql | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/python/ql/test/experimental/dataflow/exceptions/dataflow-consistency.ql b/python/ql/test/experimental/dataflow/exceptions/dataflow-consistency.ql
index 6743fa10d27..3dda6701a83 100644
--- a/python/ql/test/experimental/dataflow/exceptions/dataflow-consistency.ql
+++ b/python/ql/test/experimental/dataflow/exceptions/dataflow-consistency.ql
@@ -1 +1,2 @@
-import semmle.python.dataflow.new.internal.DataFlowImplConsistency::Consistency
+import python
+import experimental.dataflow.TestUtil.DataFlowConsistency
From 7a423622f8afe1c6ba8140d2c57c85a16274e2c8 Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Tue, 17 Jan 2023 12:17:25 +0100
Subject: [PATCH 106/415] DataFlow: Add `uniqueParameterNodeAtPositionExclude`
---
.../cpp/ir/dataflow/internal/DataFlowImplConsistency.qll | 6 ++++++
.../code/cpp/dataflow/internal/DataFlowImplConsistency.qll | 6 ++++++
.../cpp/ir/dataflow/internal/DataFlowImplConsistency.qll | 6 ++++++
.../csharp/dataflow/internal/DataFlowImplConsistency.qll | 6 ++++++
.../code/java/dataflow/internal/DataFlowImplConsistency.qll | 6 ++++++
.../dataflow/new/internal/DataFlowImplConsistency.qll | 6 ++++++
.../ruby/dataflow/internal/DataFlowImplConsistency.qll | 6 ++++++
.../swift/dataflow/internal/DataFlowImplConsistency.qll | 6 ++++++
8 files changed, 48 insertions(+)
diff --git a/cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/internal/DataFlowImplConsistency.qll b/cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/internal/DataFlowImplConsistency.qll
index 533899e8a85..245de04d3ce 100644
--- a/cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/internal/DataFlowImplConsistency.qll
+++ b/cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/internal/DataFlowImplConsistency.qll
@@ -45,6 +45,11 @@ module Consistency {
) {
none()
}
+
+ /** Holds if `(c, pos, p)` should be excluded from the consistency test `uniqueParameterNodeAtPosition`. */
+ predicate uniqueParameterNodeAtPositionExclude(DataFlowCallable c, ParameterPosition pos, Node p) {
+ none()
+ }
}
private class RelevantNode extends Node {
@@ -246,6 +251,7 @@ module Consistency {
query predicate uniqueParameterNodeAtPosition(
DataFlowCallable c, ParameterPosition pos, Node p, string msg
) {
+ not any(ConsistencyConfiguration conf).uniqueParameterNodeAtPositionExclude(c, pos, p) and
isParameterNode(p, c, pos) and
not exists(unique(Node p0 | isParameterNode(p0, c, pos))) and
msg = "Parameters with overlapping positions."
diff --git a/cpp/ql/lib/semmle/code/cpp/dataflow/internal/DataFlowImplConsistency.qll b/cpp/ql/lib/semmle/code/cpp/dataflow/internal/DataFlowImplConsistency.qll
index 533899e8a85..245de04d3ce 100644
--- a/cpp/ql/lib/semmle/code/cpp/dataflow/internal/DataFlowImplConsistency.qll
+++ b/cpp/ql/lib/semmle/code/cpp/dataflow/internal/DataFlowImplConsistency.qll
@@ -45,6 +45,11 @@ module Consistency {
) {
none()
}
+
+ /** Holds if `(c, pos, p)` should be excluded from the consistency test `uniqueParameterNodeAtPosition`. */
+ predicate uniqueParameterNodeAtPositionExclude(DataFlowCallable c, ParameterPosition pos, Node p) {
+ none()
+ }
}
private class RelevantNode extends Node {
@@ -246,6 +251,7 @@ module Consistency {
query predicate uniqueParameterNodeAtPosition(
DataFlowCallable c, ParameterPosition pos, Node p, string msg
) {
+ not any(ConsistencyConfiguration conf).uniqueParameterNodeAtPositionExclude(c, pos, p) and
isParameterNode(p, c, pos) and
not exists(unique(Node p0 | isParameterNode(p0, c, pos))) and
msg = "Parameters with overlapping positions."
diff --git a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowImplConsistency.qll b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowImplConsistency.qll
index 533899e8a85..245de04d3ce 100644
--- a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowImplConsistency.qll
+++ b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowImplConsistency.qll
@@ -45,6 +45,11 @@ module Consistency {
) {
none()
}
+
+ /** Holds if `(c, pos, p)` should be excluded from the consistency test `uniqueParameterNodeAtPosition`. */
+ predicate uniqueParameterNodeAtPositionExclude(DataFlowCallable c, ParameterPosition pos, Node p) {
+ none()
+ }
}
private class RelevantNode extends Node {
@@ -246,6 +251,7 @@ module Consistency {
query predicate uniqueParameterNodeAtPosition(
DataFlowCallable c, ParameterPosition pos, Node p, string msg
) {
+ not any(ConsistencyConfiguration conf).uniqueParameterNodeAtPositionExclude(c, pos, p) and
isParameterNode(p, c, pos) and
not exists(unique(Node p0 | isParameterNode(p0, c, pos))) and
msg = "Parameters with overlapping positions."
diff --git a/csharp/ql/lib/semmle/code/csharp/dataflow/internal/DataFlowImplConsistency.qll b/csharp/ql/lib/semmle/code/csharp/dataflow/internal/DataFlowImplConsistency.qll
index 533899e8a85..245de04d3ce 100644
--- a/csharp/ql/lib/semmle/code/csharp/dataflow/internal/DataFlowImplConsistency.qll
+++ b/csharp/ql/lib/semmle/code/csharp/dataflow/internal/DataFlowImplConsistency.qll
@@ -45,6 +45,11 @@ module Consistency {
) {
none()
}
+
+ /** Holds if `(c, pos, p)` should be excluded from the consistency test `uniqueParameterNodeAtPosition`. */
+ predicate uniqueParameterNodeAtPositionExclude(DataFlowCallable c, ParameterPosition pos, Node p) {
+ none()
+ }
}
private class RelevantNode extends Node {
@@ -246,6 +251,7 @@ module Consistency {
query predicate uniqueParameterNodeAtPosition(
DataFlowCallable c, ParameterPosition pos, Node p, string msg
) {
+ not any(ConsistencyConfiguration conf).uniqueParameterNodeAtPositionExclude(c, pos, p) and
isParameterNode(p, c, pos) and
not exists(unique(Node p0 | isParameterNode(p0, c, pos))) and
msg = "Parameters with overlapping positions."
diff --git a/java/ql/lib/semmle/code/java/dataflow/internal/DataFlowImplConsistency.qll b/java/ql/lib/semmle/code/java/dataflow/internal/DataFlowImplConsistency.qll
index 533899e8a85..245de04d3ce 100644
--- a/java/ql/lib/semmle/code/java/dataflow/internal/DataFlowImplConsistency.qll
+++ b/java/ql/lib/semmle/code/java/dataflow/internal/DataFlowImplConsistency.qll
@@ -45,6 +45,11 @@ module Consistency {
) {
none()
}
+
+ /** Holds if `(c, pos, p)` should be excluded from the consistency test `uniqueParameterNodeAtPosition`. */
+ predicate uniqueParameterNodeAtPositionExclude(DataFlowCallable c, ParameterPosition pos, Node p) {
+ none()
+ }
}
private class RelevantNode extends Node {
@@ -246,6 +251,7 @@ module Consistency {
query predicate uniqueParameterNodeAtPosition(
DataFlowCallable c, ParameterPosition pos, Node p, string msg
) {
+ not any(ConsistencyConfiguration conf).uniqueParameterNodeAtPositionExclude(c, pos, p) and
isParameterNode(p, c, pos) and
not exists(unique(Node p0 | isParameterNode(p0, c, pos))) and
msg = "Parameters with overlapping positions."
diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowImplConsistency.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowImplConsistency.qll
index 533899e8a85..245de04d3ce 100644
--- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowImplConsistency.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowImplConsistency.qll
@@ -45,6 +45,11 @@ module Consistency {
) {
none()
}
+
+ /** Holds if `(c, pos, p)` should be excluded from the consistency test `uniqueParameterNodeAtPosition`. */
+ predicate uniqueParameterNodeAtPositionExclude(DataFlowCallable c, ParameterPosition pos, Node p) {
+ none()
+ }
}
private class RelevantNode extends Node {
@@ -246,6 +251,7 @@ module Consistency {
query predicate uniqueParameterNodeAtPosition(
DataFlowCallable c, ParameterPosition pos, Node p, string msg
) {
+ not any(ConsistencyConfiguration conf).uniqueParameterNodeAtPositionExclude(c, pos, p) and
isParameterNode(p, c, pos) and
not exists(unique(Node p0 | isParameterNode(p0, c, pos))) and
msg = "Parameters with overlapping positions."
diff --git a/ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowImplConsistency.qll b/ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowImplConsistency.qll
index 533899e8a85..245de04d3ce 100644
--- a/ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowImplConsistency.qll
+++ b/ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowImplConsistency.qll
@@ -45,6 +45,11 @@ module Consistency {
) {
none()
}
+
+ /** Holds if `(c, pos, p)` should be excluded from the consistency test `uniqueParameterNodeAtPosition`. */
+ predicate uniqueParameterNodeAtPositionExclude(DataFlowCallable c, ParameterPosition pos, Node p) {
+ none()
+ }
}
private class RelevantNode extends Node {
@@ -246,6 +251,7 @@ module Consistency {
query predicate uniqueParameterNodeAtPosition(
DataFlowCallable c, ParameterPosition pos, Node p, string msg
) {
+ not any(ConsistencyConfiguration conf).uniqueParameterNodeAtPositionExclude(c, pos, p) and
isParameterNode(p, c, pos) and
not exists(unique(Node p0 | isParameterNode(p0, c, pos))) and
msg = "Parameters with overlapping positions."
diff --git a/swift/ql/lib/codeql/swift/dataflow/internal/DataFlowImplConsistency.qll b/swift/ql/lib/codeql/swift/dataflow/internal/DataFlowImplConsistency.qll
index 533899e8a85..245de04d3ce 100644
--- a/swift/ql/lib/codeql/swift/dataflow/internal/DataFlowImplConsistency.qll
+++ b/swift/ql/lib/codeql/swift/dataflow/internal/DataFlowImplConsistency.qll
@@ -45,6 +45,11 @@ module Consistency {
) {
none()
}
+
+ /** Holds if `(c, pos, p)` should be excluded from the consistency test `uniqueParameterNodeAtPosition`. */
+ predicate uniqueParameterNodeAtPositionExclude(DataFlowCallable c, ParameterPosition pos, Node p) {
+ none()
+ }
}
private class RelevantNode extends Node {
@@ -246,6 +251,7 @@ module Consistency {
query predicate uniqueParameterNodeAtPosition(
DataFlowCallable c, ParameterPosition pos, Node p, string msg
) {
+ not any(ConsistencyConfiguration conf).uniqueParameterNodeAtPositionExclude(c, pos, p) and
isParameterNode(p, c, pos) and
not exists(unique(Node p0 | isParameterNode(p0, c, pos))) and
msg = "Parameters with overlapping positions."
From a6fd5b6e59431bafbd5cbc19d0401f3cba47b555 Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Tue, 17 Jan 2023 11:55:49 +0100
Subject: [PATCH 107/415] DataFlow: Add `uniqueParameterNodePositionExclude`
---
.../cpp/ir/dataflow/internal/DataFlowImplConsistency.qll | 6 ++++++
.../code/cpp/dataflow/internal/DataFlowImplConsistency.qll | 6 ++++++
.../cpp/ir/dataflow/internal/DataFlowImplConsistency.qll | 6 ++++++
.../csharp/dataflow/internal/DataFlowImplConsistency.qll | 6 ++++++
.../code/java/dataflow/internal/DataFlowImplConsistency.qll | 6 ++++++
.../dataflow/new/internal/DataFlowImplConsistency.qll | 6 ++++++
.../ruby/dataflow/internal/DataFlowImplConsistency.qll | 6 ++++++
.../swift/dataflow/internal/DataFlowImplConsistency.qll | 6 ++++++
8 files changed, 48 insertions(+)
diff --git a/cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/internal/DataFlowImplConsistency.qll b/cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/internal/DataFlowImplConsistency.qll
index 245de04d3ce..9bbc70fbdf9 100644
--- a/cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/internal/DataFlowImplConsistency.qll
+++ b/cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/internal/DataFlowImplConsistency.qll
@@ -50,6 +50,11 @@ module Consistency {
predicate uniqueParameterNodeAtPositionExclude(DataFlowCallable c, ParameterPosition pos, Node p) {
none()
}
+
+ /** Holds if `(c, pos, p)` should be excluded from the consistency test `uniqueParameterNodePosition`. */
+ predicate uniqueParameterNodePositionExclude(DataFlowCallable c, ParameterPosition pos, Node p) {
+ none()
+ }
}
private class RelevantNode extends Node {
@@ -260,6 +265,7 @@ module Consistency {
query predicate uniqueParameterNodePosition(
DataFlowCallable c, ParameterPosition pos, Node p, string msg
) {
+ not any(ConsistencyConfiguration conf).uniqueParameterNodePositionExclude(c, pos, p) and
isParameterNode(p, c, pos) and
not exists(unique(ParameterPosition pos0 | isParameterNode(p, c, pos0))) and
msg = "Parameter node with multiple positions."
diff --git a/cpp/ql/lib/semmle/code/cpp/dataflow/internal/DataFlowImplConsistency.qll b/cpp/ql/lib/semmle/code/cpp/dataflow/internal/DataFlowImplConsistency.qll
index 245de04d3ce..9bbc70fbdf9 100644
--- a/cpp/ql/lib/semmle/code/cpp/dataflow/internal/DataFlowImplConsistency.qll
+++ b/cpp/ql/lib/semmle/code/cpp/dataflow/internal/DataFlowImplConsistency.qll
@@ -50,6 +50,11 @@ module Consistency {
predicate uniqueParameterNodeAtPositionExclude(DataFlowCallable c, ParameterPosition pos, Node p) {
none()
}
+
+ /** Holds if `(c, pos, p)` should be excluded from the consistency test `uniqueParameterNodePosition`. */
+ predicate uniqueParameterNodePositionExclude(DataFlowCallable c, ParameterPosition pos, Node p) {
+ none()
+ }
}
private class RelevantNode extends Node {
@@ -260,6 +265,7 @@ module Consistency {
query predicate uniqueParameterNodePosition(
DataFlowCallable c, ParameterPosition pos, Node p, string msg
) {
+ not any(ConsistencyConfiguration conf).uniqueParameterNodePositionExclude(c, pos, p) and
isParameterNode(p, c, pos) and
not exists(unique(ParameterPosition pos0 | isParameterNode(p, c, pos0))) and
msg = "Parameter node with multiple positions."
diff --git a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowImplConsistency.qll b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowImplConsistency.qll
index 245de04d3ce..9bbc70fbdf9 100644
--- a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowImplConsistency.qll
+++ b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowImplConsistency.qll
@@ -50,6 +50,11 @@ module Consistency {
predicate uniqueParameterNodeAtPositionExclude(DataFlowCallable c, ParameterPosition pos, Node p) {
none()
}
+
+ /** Holds if `(c, pos, p)` should be excluded from the consistency test `uniqueParameterNodePosition`. */
+ predicate uniqueParameterNodePositionExclude(DataFlowCallable c, ParameterPosition pos, Node p) {
+ none()
+ }
}
private class RelevantNode extends Node {
@@ -260,6 +265,7 @@ module Consistency {
query predicate uniqueParameterNodePosition(
DataFlowCallable c, ParameterPosition pos, Node p, string msg
) {
+ not any(ConsistencyConfiguration conf).uniqueParameterNodePositionExclude(c, pos, p) and
isParameterNode(p, c, pos) and
not exists(unique(ParameterPosition pos0 | isParameterNode(p, c, pos0))) and
msg = "Parameter node with multiple positions."
diff --git a/csharp/ql/lib/semmle/code/csharp/dataflow/internal/DataFlowImplConsistency.qll b/csharp/ql/lib/semmle/code/csharp/dataflow/internal/DataFlowImplConsistency.qll
index 245de04d3ce..9bbc70fbdf9 100644
--- a/csharp/ql/lib/semmle/code/csharp/dataflow/internal/DataFlowImplConsistency.qll
+++ b/csharp/ql/lib/semmle/code/csharp/dataflow/internal/DataFlowImplConsistency.qll
@@ -50,6 +50,11 @@ module Consistency {
predicate uniqueParameterNodeAtPositionExclude(DataFlowCallable c, ParameterPosition pos, Node p) {
none()
}
+
+ /** Holds if `(c, pos, p)` should be excluded from the consistency test `uniqueParameterNodePosition`. */
+ predicate uniqueParameterNodePositionExclude(DataFlowCallable c, ParameterPosition pos, Node p) {
+ none()
+ }
}
private class RelevantNode extends Node {
@@ -260,6 +265,7 @@ module Consistency {
query predicate uniqueParameterNodePosition(
DataFlowCallable c, ParameterPosition pos, Node p, string msg
) {
+ not any(ConsistencyConfiguration conf).uniqueParameterNodePositionExclude(c, pos, p) and
isParameterNode(p, c, pos) and
not exists(unique(ParameterPosition pos0 | isParameterNode(p, c, pos0))) and
msg = "Parameter node with multiple positions."
diff --git a/java/ql/lib/semmle/code/java/dataflow/internal/DataFlowImplConsistency.qll b/java/ql/lib/semmle/code/java/dataflow/internal/DataFlowImplConsistency.qll
index 245de04d3ce..9bbc70fbdf9 100644
--- a/java/ql/lib/semmle/code/java/dataflow/internal/DataFlowImplConsistency.qll
+++ b/java/ql/lib/semmle/code/java/dataflow/internal/DataFlowImplConsistency.qll
@@ -50,6 +50,11 @@ module Consistency {
predicate uniqueParameterNodeAtPositionExclude(DataFlowCallable c, ParameterPosition pos, Node p) {
none()
}
+
+ /** Holds if `(c, pos, p)` should be excluded from the consistency test `uniqueParameterNodePosition`. */
+ predicate uniqueParameterNodePositionExclude(DataFlowCallable c, ParameterPosition pos, Node p) {
+ none()
+ }
}
private class RelevantNode extends Node {
@@ -260,6 +265,7 @@ module Consistency {
query predicate uniqueParameterNodePosition(
DataFlowCallable c, ParameterPosition pos, Node p, string msg
) {
+ not any(ConsistencyConfiguration conf).uniqueParameterNodePositionExclude(c, pos, p) and
isParameterNode(p, c, pos) and
not exists(unique(ParameterPosition pos0 | isParameterNode(p, c, pos0))) and
msg = "Parameter node with multiple positions."
diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowImplConsistency.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowImplConsistency.qll
index 245de04d3ce..9bbc70fbdf9 100644
--- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowImplConsistency.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowImplConsistency.qll
@@ -50,6 +50,11 @@ module Consistency {
predicate uniqueParameterNodeAtPositionExclude(DataFlowCallable c, ParameterPosition pos, Node p) {
none()
}
+
+ /** Holds if `(c, pos, p)` should be excluded from the consistency test `uniqueParameterNodePosition`. */
+ predicate uniqueParameterNodePositionExclude(DataFlowCallable c, ParameterPosition pos, Node p) {
+ none()
+ }
}
private class RelevantNode extends Node {
@@ -260,6 +265,7 @@ module Consistency {
query predicate uniqueParameterNodePosition(
DataFlowCallable c, ParameterPosition pos, Node p, string msg
) {
+ not any(ConsistencyConfiguration conf).uniqueParameterNodePositionExclude(c, pos, p) and
isParameterNode(p, c, pos) and
not exists(unique(ParameterPosition pos0 | isParameterNode(p, c, pos0))) and
msg = "Parameter node with multiple positions."
diff --git a/ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowImplConsistency.qll b/ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowImplConsistency.qll
index 245de04d3ce..9bbc70fbdf9 100644
--- a/ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowImplConsistency.qll
+++ b/ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowImplConsistency.qll
@@ -50,6 +50,11 @@ module Consistency {
predicate uniqueParameterNodeAtPositionExclude(DataFlowCallable c, ParameterPosition pos, Node p) {
none()
}
+
+ /** Holds if `(c, pos, p)` should be excluded from the consistency test `uniqueParameterNodePosition`. */
+ predicate uniqueParameterNodePositionExclude(DataFlowCallable c, ParameterPosition pos, Node p) {
+ none()
+ }
}
private class RelevantNode extends Node {
@@ -260,6 +265,7 @@ module Consistency {
query predicate uniqueParameterNodePosition(
DataFlowCallable c, ParameterPosition pos, Node p, string msg
) {
+ not any(ConsistencyConfiguration conf).uniqueParameterNodePositionExclude(c, pos, p) and
isParameterNode(p, c, pos) and
not exists(unique(ParameterPosition pos0 | isParameterNode(p, c, pos0))) and
msg = "Parameter node with multiple positions."
diff --git a/swift/ql/lib/codeql/swift/dataflow/internal/DataFlowImplConsistency.qll b/swift/ql/lib/codeql/swift/dataflow/internal/DataFlowImplConsistency.qll
index 245de04d3ce..9bbc70fbdf9 100644
--- a/swift/ql/lib/codeql/swift/dataflow/internal/DataFlowImplConsistency.qll
+++ b/swift/ql/lib/codeql/swift/dataflow/internal/DataFlowImplConsistency.qll
@@ -50,6 +50,11 @@ module Consistency {
predicate uniqueParameterNodeAtPositionExclude(DataFlowCallable c, ParameterPosition pos, Node p) {
none()
}
+
+ /** Holds if `(c, pos, p)` should be excluded from the consistency test `uniqueParameterNodePosition`. */
+ predicate uniqueParameterNodePositionExclude(DataFlowCallable c, ParameterPosition pos, Node p) {
+ none()
+ }
}
private class RelevantNode extends Node {
@@ -260,6 +265,7 @@ module Consistency {
query predicate uniqueParameterNodePosition(
DataFlowCallable c, ParameterPosition pos, Node p, string msg
) {
+ not any(ConsistencyConfiguration conf).uniqueParameterNodePositionExclude(c, pos, p) and
isParameterNode(p, c, pos) and
not exists(unique(ParameterPosition pos0 | isParameterNode(p, c, pos0))) and
msg = "Parameter node with multiple positions."
From b6272b383da0657d420079dac5df52ec6469810a Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Tue, 17 Jan 2023 12:05:18 +0100
Subject: [PATCH 108/415] Python: Allow non-unique parameter positions for
normal parameters
---
.../dataflow/TestUtil/DataFlowConsistency.qll | 17 +++++++++++++++++
1 file changed, 17 insertions(+)
diff --git a/python/ql/test/experimental/dataflow/TestUtil/DataFlowConsistency.qll b/python/ql/test/experimental/dataflow/TestUtil/DataFlowConsistency.qll
index 8d85437b7d3..e4a6306dc5a 100644
--- a/python/ql/test/experimental/dataflow/TestUtil/DataFlowConsistency.qll
+++ b/python/ql/test/experimental/dataflow/TestUtil/DataFlowConsistency.qll
@@ -1,3 +1,4 @@
+import python
import semmle.python.dataflow.new.DataFlow::DataFlow
import semmle.python.dataflow.new.internal.DataFlowPrivate
import semmle.python.dataflow.new.internal.DataFlowImplConsistency::Consistency
@@ -18,4 +19,20 @@ private class MyConsistencyConfiguration extends ConsistencyConfiguration {
// being a post-update node for the synthetic `**kwargs` parameter.
n instanceof SynthDictSplatParameterNode
}
+
+ override predicate uniqueParameterNodePositionExclude(
+ DataFlowCallable c, ParameterPosition pos, Node p
+ ) {
+ // For normal parameters that can both be passed as positional arguments or keyword
+ // arguments, we currently have parameter positions for both cases..
+ //
+ // TODO: Figure out how bad breaking this consistency check is
+ exists(Function func, Parameter param |
+ c.getScope() = func and
+ p = parameterNode(param) and
+ c.getParameter(pos) = p and
+ param = func.getArg(_) and
+ param = func.getArgByName(_)
+ )
+ }
}
From 749e81367dbc67a04d4d4413884032a28fd82b5e Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Tue, 17 Jan 2023 12:21:50 +0100
Subject: [PATCH 109/415] Python: Allow multiple `**kwargs` parameters outside
our test code
---
.../dataflow/TestUtil/DataFlowConsistency.qll | 9 +++++++++
1 file changed, 9 insertions(+)
diff --git a/python/ql/test/experimental/dataflow/TestUtil/DataFlowConsistency.qll b/python/ql/test/experimental/dataflow/TestUtil/DataFlowConsistency.qll
index e4a6306dc5a..456b18e2a87 100644
--- a/python/ql/test/experimental/dataflow/TestUtil/DataFlowConsistency.qll
+++ b/python/ql/test/experimental/dataflow/TestUtil/DataFlowConsistency.qll
@@ -20,6 +20,15 @@ private class MyConsistencyConfiguration extends ConsistencyConfiguration {
n instanceof SynthDictSplatParameterNode
}
+ override predicate uniqueParameterNodeAtPositionExclude(
+ DataFlowCallable c, ParameterPosition pos, Node p
+ ) {
+ // TODO: This can be removed once we solve the overlap of dictionary splat parameters
+ c.getParameter(pos) = p and
+ pos.isDictSplat() and
+ not exists(p.getLocation().getFile().getRelativePath())
+ }
+
override predicate uniqueParameterNodePositionExclude(
DataFlowCallable c, ParameterPosition pos, Node p
) {
From dad6221b61ebe5983bb8308148c4778fa7d34c00 Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Tue, 17 Jan 2023 13:53:41 +0100
Subject: [PATCH 110/415] Python: Accept `dataflow-consistency.expected`
changes for now
As highlighted in the configuration file, there are some things to catch
up on, and we also need to apply the same fix as Ruby for **kwargs
handling.
---
.../callgraph_crosstalk/dataflow-consistency.expected | 3 +++
.../dataflow/consistency/dataflow-consistency.expected | 2 ++
.../dataflow/coverage/dataflow-consistency.expected | 10 ++++++++++
.../generator-flow/dataflow-consistency.expected | 3 +++
.../CallGraph/dataflow-consistency.expected | 3 +++
5 files changed, 21 insertions(+)
diff --git a/python/ql/test/experimental/dataflow/callgraph_crosstalk/dataflow-consistency.expected b/python/ql/test/experimental/dataflow/callgraph_crosstalk/dataflow-consistency.expected
index 8f4dbd04742..410b626ffff 100644
--- a/python/ql/test/experimental/dataflow/callgraph_crosstalk/dataflow-consistency.expected
+++ b/python/ql/test/experimental/dataflow/callgraph_crosstalk/dataflow-consistency.expected
@@ -19,3 +19,6 @@ reverseRead
argHasPostUpdate
postWithInFlow
viableImplInCallContextTooLarge
+uniqueParameterNodeAtPosition
+uniqueParameterNodePosition
+uniqueContentApprox
diff --git a/python/ql/test/experimental/dataflow/consistency/dataflow-consistency.expected b/python/ql/test/experimental/dataflow/consistency/dataflow-consistency.expected
index 410b626ffff..ab832392cf5 100644
--- a/python/ql/test/experimental/dataflow/consistency/dataflow-consistency.expected
+++ b/python/ql/test/experimental/dataflow/consistency/dataflow-consistency.expected
@@ -20,5 +20,7 @@ argHasPostUpdate
postWithInFlow
viableImplInCallContextTooLarge
uniqueParameterNodeAtPosition
+| test.py:239:1:239:42 | Function overflowCallee | ** | test.py:239:1:239:42 | SynthDictSplatParameterNode | Parameters with overlapping positions. |
+| test.py:239:1:239:42 | Function overflowCallee | ** | test.py:239:35:239:40 | ControlFlowNode for kwargs | Parameters with overlapping positions. |
uniqueParameterNodePosition
uniqueContentApprox
diff --git a/python/ql/test/experimental/dataflow/coverage/dataflow-consistency.expected b/python/ql/test/experimental/dataflow/coverage/dataflow-consistency.expected
index 410b626ffff..4ee8d7f0fcc 100644
--- a/python/ql/test/experimental/dataflow/coverage/dataflow-consistency.expected
+++ b/python/ql/test/experimental/dataflow/coverage/dataflow-consistency.expected
@@ -20,5 +20,15 @@ argHasPostUpdate
postWithInFlow
viableImplInCallContextTooLarge
uniqueParameterNodeAtPosition
+| argumentPassing.py:50:1:60:2 | Function argument_passing | ** | argumentPassing.py:50:1:60:2 | SynthDictSplatParameterNode | Parameters with overlapping positions. |
+| argumentPassing.py:50:1:60:2 | Function argument_passing | ** | argumentPassing.py:59:7:59:7 | ControlFlowNode for g | Parameters with overlapping positions. |
+| argumentPassing.py:185:1:185:23 | Function mixed | ** | argumentPassing.py:185:1:185:23 | SynthDictSplatParameterNode | Parameters with overlapping positions. |
+| argumentPassing.py:185:1:185:23 | Function mixed | ** | argumentPassing.py:185:16:185:21 | ControlFlowNode for kwargs | Parameters with overlapping positions. |
+| classes.py:441:5:441:41 | Function __prepare__ | ** | classes.py:441:5:441:41 | SynthDictSplatParameterNode | Parameters with overlapping positions. |
+| classes.py:441:5:441:41 | Function __prepare__ | ** | classes.py:441:36:441:39 | ControlFlowNode for kwds | Parameters with overlapping positions. |
+| test.py:407:1:407:28 | Function f_extra_keyword | ** | test.py:407:1:407:28 | SynthDictSplatParameterNode | Parameters with overlapping positions. |
+| test.py:407:1:407:28 | Function f_extra_keyword | ** | test.py:407:26:407:26 | ControlFlowNode for b | Parameters with overlapping positions. |
+| test.py:521:23:521:43 | Function lambda | ** | test.py:521:23:521:43 | SynthDictSplatParameterNode | Parameters with overlapping positions. |
+| test.py:521:23:521:43 | Function lambda | ** | test.py:521:35:521:35 | ControlFlowNode for b | Parameters with overlapping positions. |
uniqueParameterNodePosition
uniqueContentApprox
diff --git a/python/ql/test/experimental/dataflow/tainttracking/generator-flow/dataflow-consistency.expected b/python/ql/test/experimental/dataflow/tainttracking/generator-flow/dataflow-consistency.expected
index 8f4dbd04742..410b626ffff 100644
--- a/python/ql/test/experimental/dataflow/tainttracking/generator-flow/dataflow-consistency.expected
+++ b/python/ql/test/experimental/dataflow/tainttracking/generator-flow/dataflow-consistency.expected
@@ -19,3 +19,6 @@ reverseRead
argHasPostUpdate
postWithInFlow
viableImplInCallContextTooLarge
+uniqueParameterNodeAtPosition
+uniqueParameterNodePosition
+uniqueContentApprox
diff --git a/python/ql/test/experimental/library-tests/CallGraph/dataflow-consistency.expected b/python/ql/test/experimental/library-tests/CallGraph/dataflow-consistency.expected
index 8f4dbd04742..410b626ffff 100644
--- a/python/ql/test/experimental/library-tests/CallGraph/dataflow-consistency.expected
+++ b/python/ql/test/experimental/library-tests/CallGraph/dataflow-consistency.expected
@@ -19,3 +19,6 @@ reverseRead
argHasPostUpdate
postWithInFlow
viableImplInCallContextTooLarge
+uniqueParameterNodeAtPosition
+uniqueParameterNodePosition
+uniqueContentApprox
From 1c8cc6a32a863681d40a72309cbc25835e0f18c8 Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Tue, 17 Jan 2023 14:14:05 +0100
Subject: [PATCH 111/415] Python: Add QLDoc for TFunction
---
.../semmle/python/dataflow/new/internal/DataFlowDispatch.qll | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
index 644efbfbf8e..cf610428e4f 100644
--- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
@@ -256,6 +256,10 @@ abstract class LibraryCallable extends string {
}
newtype TDataFlowCallable =
+ /**
+ * Is used as the target for all calls: plain functions, lambdas, methods on classes,
+ * class instantiations, and (in the future) special methods.
+ */
TFunction(Function func) {
// For generators/list-comprehensions we create a synthetic function. In the
// points-to call-graph these were not considered callable, and instead we added
From 24892801ec2034ccf63d48d36264f8c40e4fa18e Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Tue, 17 Jan 2023 14:16:56 +0100
Subject: [PATCH 112/415] Python: `clsTracker` => `clsArgumentTracker`
Co-authored-by: Taus
---
.../dataflow/new/internal/DataFlowDispatch.qll | 18 +++++++++---------
1 file changed, 9 insertions(+), 9 deletions(-)
diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
index cf610428e4f..ebe2737ec09 100644
--- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
@@ -552,7 +552,7 @@ Node selfTracker(Class classWithMethod) {
/**
* Gets a reference to the `cls` argument of a classmethod on class `classWithMethod`.
*/
-private TypeTrackingNode clsTracker(TypeTracker t, Class classWithMethod) {
+private TypeTrackingNode clsArgumentTracker(TypeTracker t, Class classWithMethod) {
t.start() and
(
exists(Function func |
@@ -567,15 +567,15 @@ private TypeTrackingNode clsTracker(TypeTracker t, Class classWithMethod) {
result.(CallCfgNode).getArg(0) = selfTracker(classWithMethod)
)
or
- exists(TypeTracker t2 | result = clsTracker(t2, classWithMethod).track(t2, t)) and
+ exists(TypeTracker t2 | result = clsArgumentTracker(t2, classWithMethod).track(t2, t)) and
not result.(ParameterNodeImpl).isParameterOf(_, any(ParameterPosition pp | pp.isSelf()))
}
/**
* Gets a reference to the `cls` argument of a classmethod on class `classWithMethod`.
*/
-Node clsTracker(Class classWithMethod) {
- clsTracker(TypeTracker::end(), classWithMethod).flowsTo(result)
+Node clsArgumentTracker(Class classWithMethod) {
+ clsArgumentTracker(TypeTracker::end(), classWithMethod).flowsTo(result)
}
/**
@@ -763,7 +763,7 @@ private TypeTrackingNode attrReadTracker(TypeTracker t, AttrRead attr) {
t.start() and
result = attr and
attr.getObject() in [
- classTracker(_), classInstanceTracker(_), selfTracker(_), clsTracker(_),
+ classTracker(_), classInstanceTracker(_), selfTracker(_), clsArgumentTracker(_),
superCallNoArgumentTracker(_), superCallTwoArgumentTracker(_, _)
]
or
@@ -887,7 +887,7 @@ private module MethodCalls {
) {
call.getFunction() = attrReadTracker(attr).asCfgNode() and
attr.accesses(self, functionName) and
- self in [clsTracker(classWithMethod), selfTracker(classWithMethod)]
+ self in [clsArgumentTracker(classWithMethod), selfTracker(classWithMethod)]
}
/**
@@ -897,7 +897,7 @@ private module MethodCalls {
*/
predicate fromSuperNewCall(CallNode call, Class classUsedInSuper, AttrRead attr, Node self) {
fromSuper_join(call, "__new__", classUsedInSuper, attr, self) and
- self in [classTracker(_), clsTracker(_)]
+ self in [classTracker(_), clsArgumentTracker(_)]
}
/**
@@ -998,7 +998,7 @@ predicate resolveClassCall(CallNode call, Class cls) {
or
// `cls()` inside a classmethod (which also contains `type(self)()` inside a method)
exists(Class classWithMethod |
- call.getFunction() = clsTracker(classWithMethod).asCfgNode() and
+ call.getFunction() = clsArgumentTracker(classWithMethod).asCfgNode() and
getADirectSuperclass*(cls) = classWithMethod
)
}
@@ -1149,7 +1149,7 @@ predicate getCallArg(
type instanceof CallTypeClassMethod and
apos.isSelf() and
resolveMethodCall(call, target, type, arg) and
- (arg = classTracker(_) or arg = clsTracker(_)) and
+ (arg = classTracker(_) or arg = clsArgumentTracker(_)) and
// dataflow lib has requirement that arguments and calls are in same enclosing callable.
exists(CfgNode cfgNode | cfgNode.getNode() = call |
cfgNode.getEnclosingCallable() = arg.getEnclosingCallable()
From b83fc3b6eb8a0a51a9617af41ee5afc775789692 Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Tue, 17 Jan 2023 14:38:56 +0100
Subject: [PATCH 113/415] Python: Update QLDoc for `clsArgumentTracker`
---
.../python/dataflow/new/internal/DataFlowDispatch.qll | 8 ++++++--
1 file changed, 6 insertions(+), 2 deletions(-)
diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
index ebe2737ec09..74fb1531aef 100644
--- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
@@ -550,7 +550,9 @@ Node selfTracker(Class classWithMethod) {
}
/**
- * Gets a reference to the `cls` argument of a classmethod on class `classWithMethod`.
+ * Gets a reference to the enclosing class `classWithMethod` from within one of its
+ * methods, either through the `cls` argument from a `classmethod` or from `type(self)`
+ * from a normal method.
*/
private TypeTrackingNode clsArgumentTracker(TypeTracker t, Class classWithMethod) {
t.start() and
@@ -572,7 +574,9 @@ private TypeTrackingNode clsArgumentTracker(TypeTracker t, Class classWithMethod
}
/**
- * Gets a reference to the `cls` argument of a classmethod on class `classWithMethod`.
+ * Gets a reference to the enclosing class `classWithMethod` from within one of its
+ * methods, either through the `cls` argument from a `classmethod` or from `type(self)`
+ * from a normal method.
*/
Node clsArgumentTracker(Class classWithMethod) {
clsArgumentTracker(TypeTracker::end(), classWithMethod).flowsTo(result)
From 4df946b16158cdadaa02ad0afeabbf14cda08c9a Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Wed, 18 Jan 2023 12:29:17 +0100
Subject: [PATCH 114/415] Python: call-graph: Don't design for special method
calls yet
The `call` arguments were not `CallNode`s before, to allow for easier
support of special method calls, such as `a + b` going to `__add__`.
However, this is not implemented yet, so for now we can keep things
simple.
Co-authored-by: Taus
---
.../new/internal/DataFlowDispatch.qll | 138 +++++++++---------
1 file changed, 66 insertions(+), 72 deletions(-)
diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
index 74fb1531aef..a25c5994ada 100644
--- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
@@ -950,7 +950,7 @@ private module MethodCalls {
)
}
- predicate resolveMethodCall(ControlFlowNode call, Function target, CallType type, Node self) {
+ predicate resolveMethodCall(CallNode call, Function target, CallType type, Node self) {
(
directCall(call, target, _, _, _, self)
or
@@ -1046,11 +1046,11 @@ predicate resolveClassInstanceCall(CallNode call, Function target, Node self) {
* Holds if `call` is a call to the `target`, with call-type `type`.
*/
cached
-predicate resolveCall(ControlFlowNode call, Function target, CallType type) {
+predicate resolveCall(CallNode call, Function target, CallType type) {
Stages::DataFlow::ref() and
(
type instanceof CallTypePlainFunction and
- call.(CallNode).getFunction() = functionTracker(target).asCfgNode() and
+ call.getFunction() = functionTracker(target).asCfgNode() and
not exists(Class cls | cls.getAMethod() = target)
or
resolveMethodCall(call, target, type, _)
@@ -1128,83 +1128,77 @@ predicate normalCallArg(CallNode call, Node arg, ArgumentPosition apos) {
* sending both `self` arguments to that function, which is by definition the right thing to do.
*/
cached
-predicate getCallArg(
- ControlFlowNode call, Function target, CallType type, Node arg, ArgumentPosition apos
-) {
+predicate getCallArg(CallNode call, Function target, CallType type, Node arg, ArgumentPosition apos) {
Stages::DataFlow::ref() and
+ resolveCall(call, target, type) and
(
- // normal calls with a real call node
- resolveCall(call, target, type) and
- call instanceof CallNode and
+ type instanceof CallTypePlainFunction and
+ normalCallArg(call, arg, apos)
+ or
+ // self argument for normal method calls
+ type instanceof CallTypeNormalMethod and
+ apos.isSelf() and
+ resolveMethodCall(call, target, type, arg) and
+ // dataflow lib has requirement that arguments and calls are in same enclosing callable.
+ exists(CfgNode cfgNode | cfgNode.getNode() = call |
+ cfgNode.getEnclosingCallable() = arg.getEnclosingCallable()
+ )
+ or
+ // cls argument for classmethod calls
+ type instanceof CallTypeClassMethod and
+ apos.isSelf() and
+ resolveMethodCall(call, target, type, arg) and
+ (arg = classTracker(_) or arg = clsArgumentTracker(_)) and
+ // dataflow lib has requirement that arguments and calls are in same enclosing callable.
+ exists(CfgNode cfgNode | cfgNode.getNode() = call |
+ cfgNode.getEnclosingCallable() = arg.getEnclosingCallable()
+ )
+ or
+ // normal arguments for method calls
(
- type instanceof CallTypePlainFunction and
- normalCallArg(call, arg, apos)
+ type instanceof CallTypeNormalMethod or
+ type instanceof CallTypeStaticMethod or
+ type instanceof CallTypeClassMethod
+ ) and
+ normalCallArg(call, arg, apos)
+ or
+ // method as plain function call.
+ //
+ // argument index 0 of call has position self (and MUST be given as positional
+ // argument in call). This also means that call-arguments are shifted by 1, such
+ // that argument index 1 of call has argument position 0
+ type instanceof CallTypeMethodAsPlainFunction and
+ (
+ apos.isSelf() and arg.asCfgNode() = call.(CallNode).getArg(0)
or
- // self argument for normal method calls
- type instanceof CallTypeNormalMethod and
+ not apos.isPositional(_) and normalCallArg(call, arg, apos)
+ or
+ exists(ArgumentPosition normalPos, int index |
+ apos.isPositional(index - 1) and
+ normalPos.isPositional(index) and
+ normalCallArg(call, arg, normalPos)
+ )
+ )
+ or
+ // class call
+ type instanceof CallTypeClass and
+ (
+ // only pass synthetic node for created object to __init__, and not __new__ since
+ // __new__ is a classmethod.
+ target = invokedFunctionFromClassConstruction(_, "__init__") and
apos.isSelf() and
- resolveMethodCall(call, target, type, arg) and
- // dataflow lib has requirement that arguments and calls are in same enclosing callable.
- exists(CfgNode cfgNode | cfgNode.getNode() = call |
- cfgNode.getEnclosingCallable() = arg.getEnclosingCallable()
- )
+ arg = TSyntheticPreUpdateNode(call)
or
- // cls argument for classmethod calls
- type instanceof CallTypeClassMethod and
- apos.isSelf() and
- resolveMethodCall(call, target, type, arg) and
- (arg = classTracker(_) or arg = clsArgumentTracker(_)) and
- // dataflow lib has requirement that arguments and calls are in same enclosing callable.
- exists(CfgNode cfgNode | cfgNode.getNode() = call |
- cfgNode.getEnclosingCallable() = arg.getEnclosingCallable()
- )
- or
- // normal arguments for method calls
- (
- type instanceof CallTypeNormalMethod or
- type instanceof CallTypeStaticMethod or
- type instanceof CallTypeClassMethod
- ) and
normalCallArg(call, arg, apos)
+ )
+ or
+ // call on class instance, which goes to `__call__` method
+ type instanceof CallTypeClassInstanceCall and
+ (
+ apos.isSelf() and
+ resolveClassInstanceCall(call, target, arg)
or
- // method as plain function call.
- //
- // argument index 0 of call has position self (and MUST be given as positional
- // argument in call). This also means that call-arguments are shifted by 1, such
- // that argument index 1 of call has argument position 0
- type instanceof CallTypeMethodAsPlainFunction and
- (
- apos.isSelf() and arg.asCfgNode() = call.(CallNode).getArg(0)
- or
- not apos.isPositional(_) and normalCallArg(call, arg, apos)
- or
- exists(ArgumentPosition normalPos, int index |
- apos.isPositional(index - 1) and
- normalPos.isPositional(index) and
- normalCallArg(call, arg, normalPos)
- )
- )
- or
- // class call
- type instanceof CallTypeClass and
- (
- // only pass synthetic node for created object to __init__, and not __new__ since
- // __new__ is a classmethod.
- target = invokedFunctionFromClassConstruction(_, "__init__") and
- apos.isSelf() and
- arg = TSyntheticPreUpdateNode(call)
- or
- normalCallArg(call, arg, apos)
- )
- or
- // call on class instance, which goes to `__call__` method
- type instanceof CallTypeClassInstanceCall and
- (
- apos.isSelf() and
- resolveClassInstanceCall(call, target, arg)
- or
- normalCallArg(call, arg, apos)
- )
+ normalCallArg(call, arg, apos)
)
)
}
From 0df3dd68d6eef76e97625883a4ce08b3aaf87f20 Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Fri, 20 Jan 2023 15:13:02 +0100
Subject: [PATCH 115/415] Python: Remove (now) redundant cast
---
.../semmle/python/dataflow/new/internal/DataFlowDispatch.qll | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
index a25c5994ada..fd0ca01c00b 100644
--- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
@@ -1169,7 +1169,7 @@ predicate getCallArg(CallNode call, Function target, CallType type, Node arg, Ar
// that argument index 1 of call has argument position 0
type instanceof CallTypeMethodAsPlainFunction and
(
- apos.isSelf() and arg.asCfgNode() = call.(CallNode).getArg(0)
+ apos.isSelf() and arg.asCfgNode() = call.getArg(0)
or
not apos.isPositional(_) and normalCallArg(call, arg, apos)
or
From d9fbe58ad5a4f515806de4c4baeeb43cde336bc5 Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Fri, 20 Jan 2023 16:34:59 +0100
Subject: [PATCH 116/415] Python: Expand `starargs_only` test
---
.../semmle/python/dataflow/new/internal/DataFlowDispatch.qll | 1 +
.../ql/test/experimental/dataflow/coverage/argumentPassing.py | 4 ++++
2 files changed, 5 insertions(+)
diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
index fd0ca01c00b..ea7c5aab858 100644
--- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
@@ -1085,6 +1085,7 @@ predicate normalCallArg(CallNode call, Node arg, ArgumentPosition apos) {
arg.asCfgNode() = call.getArgByName(name)
)
or
+ // the first `*args`
exists(int index |
apos.isStarArgs(index) and
arg.asCfgNode() = call.getStarArg() and
diff --git a/python/ql/test/experimental/dataflow/coverage/argumentPassing.py b/python/ql/test/experimental/dataflow/coverage/argumentPassing.py
index 2a07a99801f..81c51d3de1e 100644
--- a/python/ql/test/experimental/dataflow/coverage/argumentPassing.py
+++ b/python/ql/test/experimental/dataflow/coverage/argumentPassing.py
@@ -217,6 +217,10 @@ def test_only_starargs():
args = (arg1, arg2, "safe") # $ arg1 arg2 func=starargs_only
starargs_only(*args)
+ args = (arg1, arg2) # $ arg1 arg2 func=starargs_only
+ more_args = (arg3, arg4)
+ starargs_only(*args, *more_args)
+
def starargs_mixed(a, *args):
SINK1(a)
From 41ebb4fb555c100993aa1234358b57926c8cfff6 Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Fri, 20 Jan 2023 16:40:39 +0100
Subject: [PATCH 117/415] Python: Add `p2` in QLDoc example code for synthetic
**kwargs
---
.../semmle/python/dataflow/new/internal/DataFlowPrivate.qll | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll
index 2de56d73e8d..3daf64294de 100644
--- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll
@@ -233,9 +233,9 @@ private predicate dictSplatParameterNodeClearStep(ParameterNode n, DictionaryEle
* and a read step to the `p1` parameter.
*
* ```py
- * def foo(p1): ...
+ * def foo(p1, p2): ...
*
- * kwargs = {"p1": 42}
+ * kwargs = {"p1": 42, "p2": 43}
* foo(**kwargs)
* ```
*
From 25a68c4d71fd55182beb8e95963168dcfae98841 Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Fri, 20 Jan 2023 16:49:33 +0100
Subject: [PATCH 118/415] Python: Include @yoff's suggestion on synthetic *args
handling
---
.../semmle/python/dataflow/new/internal/DataFlowPrivate.qll | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll
index 3daf64294de..ddccd0cccfc 100644
--- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll
@@ -96,6 +96,12 @@ class SyntheticPreUpdateNode extends Node, TSyntheticPreUpdateNode {
* nodes. My gut feeling at that this simple approach will be good enough, but if we need to get it more
* precise, it should be possible to do it like this.
*
+ * In PR review, @yoff suggested an alternative approach for more precise handling:
+ *
+ * - At the call site, all positional arguments are stored into a synthetic starArgs argument, always tarting at index 0
+ * - This is sent to a synthetic star parameter
+ * - At the receiving end, we know the offset of a potential real star parameter, so we can define read steps accordingly: In foo, we read from the synthetic star parameter at index 1 and store to the real star parameter at index 0.
+ *
* ```py
* def foo(one, *args): ...
* def bar(*args): ...
From 80324735bb6e9366a6b0dfcf7ee3373dcfc89a3f Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Mon, 23 Jan 2023 11:31:23 +0100
Subject: [PATCH 119/415] Python: Fixup annotation for
`CWE-022-PathInjection/pathlib_use.py`
---
.../query-tests/Security/CWE-022-PathInjection/pathlib_use.py | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/python/ql/test/query-tests/Security/CWE-022-PathInjection/pathlib_use.py b/python/ql/test/query-tests/Security/CWE-022-PathInjection/pathlib_use.py
index 4eb5909a61d..6f703f903dc 100644
--- a/python/ql/test/query-tests/Security/CWE-022-PathInjection/pathlib_use.py
+++ b/python/ql/test/query-tests/Security/CWE-022-PathInjection/pathlib_use.py
@@ -11,7 +11,7 @@ STATIC_DIR = pathlib.Path("/server/static/")
def path_injection():
filename = request.args.get('filename', '')
p = STATIC_DIR / filename
- p.open() # NOT OK
+ p.open() # $ result=BAD
p2 = pathlib.Path(STATIC_DIR, filename)
- p2.open() # NOT OK
+ p2.open() # $ result=BAD
From 0879c8f8e1e59371e1a18f7fa33c12024dff8991 Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Mon, 23 Jan 2023 14:24:57 +0100
Subject: [PATCH 120/415] Python: Expand comments on C3 MRO
---
.../python/dataflow/new/internal/DataFlowDispatch.qll | 11 ++++++++++-
1 file changed, 10 insertions(+), 1 deletion(-)
diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
index ea7c5aab858..87601631557 100644
--- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
@@ -679,6 +679,7 @@ Class getADirectSubclass(Class cls) { cls = getADirectSuperclass(result) }
* For more info on the C3 MRO used in Python see:
* - https://docs.python.org/3/glossary.html#term-method-resolution-order
* - https://www.python.org/download/releases/2.3/mro/
+ * - https://opendylan.org/_static/c3-linearization.pdf
*/
private Class getNextClassInMro(Class cls) {
// class A(B, ...):
@@ -693,13 +694,21 @@ private Class getNextClassInMro(Class cls) {
sub.getBase(i + 1) = classTracker(result).asExpr() and
not result = cls
)
- // There are two important properties for MRO computed with C3 in Python:
+ // There are three important properties for MRO computed with C3 in Python:
//
// 1) monotonicity: if C1 precedes C2 in the MRO of C, then C1 precedes C2 in the MRO
// of any subclass of C.
// 2) local precedence ordering: if C1 precedes C2 in the list of superclasses for C,
// they will keep the same order in the MRO for C (and due to monotonicity, any
// subclass).
+ // 3) consistency with the extended precedence graph: if A and B (that are part of the
+ // class hierarchy of C) do not have a subclass/superclass relationship on their
+ // own, the ordering of A and B in the MRO of C will be determined by the local
+ // precedence ordering in the classes that use both A and B, either directly or
+ // through a subclass. (see paper for more details)
+ //
+ // Note that not all class hierarchies are allowed with C3, see the Python 2.3 article
+ // for examples.
}
/**
From 63b2bd08715d4a0c9daeec7bd5b4bdb57fb37317 Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Wed, 25 Jan 2023 09:32:37 +0100
Subject: [PATCH 121/415] Python: Fixup `test_only_starargs` addition
validTest.py did not pass, since we use `SINK3_F`.
I initially tried swapping the order
```
args = (arg1, arg2) # $ arg1 arg2 func=starargs_only
more_args = (arg4, arg3)
starargs_only(*args, *more_args)
```
But then asked myself, what is it _actually_ we're testing here? and it
seems to be the way we handle multiple *args arguments in the same call,
so I converted the test to be that instead! (and it matches what we do
in test_stararg_mixed)
---
.../experimental/dataflow/coverage/argumentPassing.py | 11 +++++++----
1 file changed, 7 insertions(+), 4 deletions(-)
diff --git a/python/ql/test/experimental/dataflow/coverage/argumentPassing.py b/python/ql/test/experimental/dataflow/coverage/argumentPassing.py
index 81c51d3de1e..2f80571de7c 100644
--- a/python/ql/test/experimental/dataflow/coverage/argumentPassing.py
+++ b/python/ql/test/experimental/dataflow/coverage/argumentPassing.py
@@ -207,7 +207,7 @@ def starargs_only(*args):
SINK2(args[1])
SINK3_F(args[2])
-@expects(3*3)
+@expects(5*3)
def test_only_starargs():
starargs_only(arg1, arg2, "safe") # $ arg1 arg2 SPURIOUS: bad2,bad3="arg1" bad1,bad3="arg2"
@@ -217,9 +217,12 @@ def test_only_starargs():
args = (arg1, arg2, "safe") # $ arg1 arg2 func=starargs_only
starargs_only(*args)
- args = (arg1, arg2) # $ arg1 arg2 func=starargs_only
- more_args = (arg3, arg4)
- starargs_only(*args, *more_args)
+ empty_args = ()
+
+ args = (arg1, arg2, "safe") # $ arg1 arg2 func=starargs_only
+ starargs_only(*args, *empty_args)
+ args = (arg1, arg2, "safe") # $ MISSING: arg1 arg2 func=starargs_only
+ starargs_only(*empty_args, *args)
def starargs_mixed(a, *args):
From f262dc68f86cb984140bdba2665b25c9e1847e4d Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Wed, 25 Jan 2023 10:07:41 +0100
Subject: [PATCH 122/415] Python: Reword note about debugging
`getNextClassInMro`
---
.../semmle/python/dataflow/new/internal/DataFlowDispatch.qll | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
index 87601631557..7bc4aa9f215 100644
--- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
@@ -673,8 +673,9 @@ Class getADirectSubclass(Class cls) { cls = getADirectSuperclass(result) }
*
* NOTE for debugging the results of this predicate: Since a class can be part of
* multiple MROs, results from this predicate might only be valid in some, but not all,
- * inheritance chains (such as the result `C` for `cls=B` in the first example -- this
- * might make it difficult to see if the definition of `D` is located in an other file)
+ * inheritance chains: This is the case with the result `C` for `cls=B` in the first
+ * example -- if `B` and `C` are defined in the same file, but `D` in a different file,
+ * this might make the results from this predicate difficult to comprehend at first.
*
* For more info on the C3 MRO used in Python see:
* - https://docs.python.org/3/glossary.html#term-method-resolution-order
From 10d6ebf95b5231c09440f401521376049c2325a6 Mon Sep 17 00:00:00 2001
From: Sim4n6
Date: Wed, 25 Jan 2023 19:28:05 +0100
Subject: [PATCH 123/415] Use of inline tests for dataflow queries
---
.../query-tests/Security/CWE-022/DataflowQueryTest.expected | 4 ++++
.../query-tests/Security/CWE-022/DataflowQueryTest.ql | 2 ++
.../experimental/query-tests/Security/CWE-022/UnsafeUnpack.py | 4 ++--
3 files changed, 8 insertions(+), 2 deletions(-)
create mode 100644 python/ql/test/experimental/query-tests/Security/CWE-022/DataflowQueryTest.expected
create mode 100644 python/ql/test/experimental/query-tests/Security/CWE-022/DataflowQueryTest.ql
diff --git a/python/ql/test/experimental/query-tests/Security/CWE-022/DataflowQueryTest.expected b/python/ql/test/experimental/query-tests/Security/CWE-022/DataflowQueryTest.expected
new file mode 100644
index 00000000000..34c94194924
--- /dev/null
+++ b/python/ql/test/experimental/query-tests/Security/CWE-022/DataflowQueryTest.expected
@@ -0,0 +1,4 @@
+missingAnnotationOnSink
+failures
+| UnsafeUnpack.py:12:46:12:58 | Comment # $result=BAD | Missing result:result=BAD |
+| UnsafeUnpack.py:55:53:55:65 | Comment # $result=BAD | Missing result:result=BAD |
diff --git a/python/ql/test/experimental/query-tests/Security/CWE-022/DataflowQueryTest.ql b/python/ql/test/experimental/query-tests/Security/CWE-022/DataflowQueryTest.ql
new file mode 100644
index 00000000000..29ccba982aa
--- /dev/null
+++ b/python/ql/test/experimental/query-tests/Security/CWE-022/DataflowQueryTest.ql
@@ -0,0 +1,2 @@
+import python
+import experimental.dataflow.TestUtil.DataflowQueryTest
diff --git a/python/ql/test/experimental/query-tests/Security/CWE-022/UnsafeUnpack.py b/python/ql/test/experimental/query-tests/Security/CWE-022/UnsafeUnpack.py
index c7820e52b04..b6015bed908 100644
--- a/python/ql/test/experimental/query-tests/Security/CWE-022/UnsafeUnpack.py
+++ b/python/ql/test/experimental/query-tests/Security/CWE-022/UnsafeUnpack.py
@@ -9,7 +9,7 @@ with open(tarpath, "wb") as f:
f.write(response.raw.read())
untarredpath = "/tmp/tmp123"
-shutil.unpack_archive(tarpath, untarredpath)
+shutil.unpack_archive(tarpath, untarredpath) # $result=BAD
import tempfile
@@ -52,5 +52,5 @@ with tempfile.TemporaryDirectory() as temp_dir:
)
fs.get(uri, to_path, recursive=True)
if unpack_path:
- shutil.unpack_archive(to_path, unpack_path)
+ shutil.unpack_archive(to_path, unpack_path) # $result=BAD
to_path = unpack_path
From 0ed480855a8b38dd573e29f6b8112fa4890fd4c9 Mon Sep 17 00:00:00 2001
From: Sim4n6
Date: Wed, 25 Jan 2023 19:44:28 +0100
Subject: [PATCH 124/415] Update
python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpack.ql
Yes, definitely
Co-authored-by: yoff
---
python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpack.ql | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpack.ql b/python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpack.ql
index 1ebdf48397c..2fdbc29dc43 100644
--- a/python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpack.ql
+++ b/python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpack.ql
@@ -69,8 +69,7 @@ class UnsafeUnpackingConfig extends TaintTracking::Configuration {
// Writing the response data to the archive
exists(Stdlib::FileLikeObject::InstanceSource is, Node f, MethodCallNode mc |
is.flowsTo(f) and
- mc.getMethodName() = "write" and
- f = mc.getObject() and
+ mc.calls(f, "write")
nodeFrom = mc.getArg(0) and
nodeTo = is.(CallCfgNode).getArg(0)
)
From 2d38993075a6a5a319b6e493b536f0647e15bf9b Mon Sep 17 00:00:00 2001
From: Sim4n6
Date: Wed, 25 Jan 2023 19:46:13 +0100
Subject: [PATCH 125/415] Add a missing "and"
---
python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpack.ql | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpack.ql b/python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpack.ql
index 2fdbc29dc43..6cb52a48994 100644
--- a/python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpack.ql
+++ b/python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpack.ql
@@ -69,7 +69,7 @@ class UnsafeUnpackingConfig extends TaintTracking::Configuration {
// Writing the response data to the archive
exists(Stdlib::FileLikeObject::InstanceSource is, Node f, MethodCallNode mc |
is.flowsTo(f) and
- mc.calls(f, "write")
+ mc.calls(f, "write") and
nodeFrom = mc.getArg(0) and
nodeTo = is.(CallCfgNode).getArg(0)
)
From 22af6f518272f6e9883bbbc5af3cc1b7dd25c3d1 Mon Sep 17 00:00:00 2001
From: Sim4n6
Date: Wed, 25 Jan 2023 23:00:00 +0100
Subject: [PATCH 126/415] Restrict download_file() to boto3 lib
---
.../Security/CWE-022bis/UnsafeUnpack.ql | 10 ++++++++--
.../query-tests/Security/CWE-022/UnsafeUnpack.py | 14 ++++++++++++++
2 files changed, 22 insertions(+), 2 deletions(-)
diff --git a/python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpack.ql b/python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpack.ql
index 6cb52a48994..965392856c5 100644
--- a/python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpack.ql
+++ b/python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpack.ql
@@ -36,8 +36,14 @@ class UnsafeUnpackingConfig extends TaintTracking::Configuration {
source.(AttrRead).accesses(o, any(string s))
)
or
- // A source catching a S3 filename download
- exists(API::Node s3 | source = s3.getMember("download_file").getACall().getArg(2))
+ // A source catching an S3 filename download
+ // see boto3: https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/s3.html#S3.Client.download_file
+ exists(MethodCallNode mcn, Node s3, Node bc |
+ bc = API::moduleImport("boto3").getMember("client").getACall() and
+ bc = s3.getALocalSource() and
+ mcn.calls(s3, "download_file") and
+ source = mcn.getArg(2)
+ )
or
// A source download a file using wget
exists(MethodCallNode mcn |
diff --git a/python/ql/test/experimental/query-tests/Security/CWE-022/UnsafeUnpack.py b/python/ql/test/experimental/query-tests/Security/CWE-022/UnsafeUnpack.py
index b6015bed908..13c3a32cd5a 100644
--- a/python/ql/test/experimental/query-tests/Security/CWE-022/UnsafeUnpack.py
+++ b/python/ql/test/experimental/query-tests/Security/CWE-022/UnsafeUnpack.py
@@ -54,3 +54,17 @@ with tempfile.TemporaryDirectory() as temp_dir:
if unpack_path:
shutil.unpack_archive(to_path, unpack_path) # $result=BAD
to_path = unpack_path
+
+
+# A source catching an S3 filename download
+# see boto3: https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/s3.html#S3.Client.download_file
+import boto3
+
+remote_ziped_name = "remote_name.tar.gz"
+base_dir = "/tmp/basedir"
+local_ziped_path = os.path.join(base_dir, remote_ziped_name)
+bucket_name = "mybucket"
+
+s3 = boto3.client('s3')
+s3.download_file(bucket_name, remote_ziped_name, local_ziped_path)
+shutil.unpack_archive(local_ziped_path, base_dir) # $result=BAD
\ No newline at end of file
From 9b5b0c60b83fcf112dc0e5778d2ead8c77c86bbc Mon Sep 17 00:00:00 2001
From: Sim4n6
Date: Thu, 26 Jan 2023 00:02:20 +0100
Subject: [PATCH 127/415] Handle the download of a tarball using wget pkg.
---
.../Security/CWE-022bis/UnsafeUnpack.ql | 10 +++++++--
.../Security/CWE-022/UnsafeUnpack.py | 21 ++++++++++++++++++-
2 files changed, 28 insertions(+), 3 deletions(-)
diff --git a/python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpack.ql b/python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpack.ql
index 965392856c5..d9c5ffd6f76 100644
--- a/python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpack.ql
+++ b/python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpack.ql
@@ -46,8 +46,14 @@ class UnsafeUnpackingConfig extends TaintTracking::Configuration {
)
or
// A source download a file using wget
- exists(MethodCallNode mcn |
- mcn = API::moduleImport("wget").getMember("download").getACall() and source = mcn.getArg(1)
+ // see wget: https://pypi.org/project/wget/
+ exists(API::CallNode mcn |
+ mcn = API::moduleImport("wget").getMember("download").getACall() and
+ (
+ source = mcn.getArg(1)
+ or
+ source = mcn.getReturn().asSource() and not exists(Node arg | arg = mcn.getArg(1))
+ )
)
or
// catch the uploaded files as a source
diff --git a/python/ql/test/experimental/query-tests/Security/CWE-022/UnsafeUnpack.py b/python/ql/test/experimental/query-tests/Security/CWE-022/UnsafeUnpack.py
index 13c3a32cd5a..8386f2770b5 100644
--- a/python/ql/test/experimental/query-tests/Security/CWE-022/UnsafeUnpack.py
+++ b/python/ql/test/experimental/query-tests/Security/CWE-022/UnsafeUnpack.py
@@ -59,6 +59,7 @@ with tempfile.TemporaryDirectory() as temp_dir:
# A source catching an S3 filename download
# see boto3: https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/s3.html#S3.Client.download_file
import boto3
+import os
remote_ziped_name = "remote_name.tar.gz"
base_dir = "/tmp/basedir"
@@ -67,4 +68,22 @@ bucket_name = "mybucket"
s3 = boto3.client('s3')
s3.download_file(bucket_name, remote_ziped_name, local_ziped_path)
-shutil.unpack_archive(local_ziped_path, base_dir) # $result=BAD
\ No newline at end of file
+shutil.unpack_archive(local_ziped_path, base_dir) # $result=BAD
+
+
+# wget
+# see wget: https://pypi.org/project/wget/
+import wget
+import os
+
+url = "https://some.remote/location/remote_name.tar.xz"
+compressed_file = "/tmp/basedir/local_name.tar.xz"
+base_dir = "/tmp/basedir"
+
+# download(url, out, bar) contains out parameter
+wget.download(url, compressed_file)
+shutil.unpack_archive(compressed_file, base_dir) # $result=BAD
+
+# download(url) returns filename
+compressed_file = wget.download(url)
+shutil.unpack_archive(compressed_file, base_dir) # $result=BAD
From f867c9008fdd5c4d03331b581bf8594149291d30 Mon Sep 17 00:00:00 2001
From: Sim4n6
Date: Thu, 26 Jan 2023 00:08:54 +0100
Subject: [PATCH 128/415] Commit the expected results
---
.../Security/CWE-022/DataflowQueryTest.expected | 3 +++
.../Security/CWE-022/UnsafeUnpack.expected | 12 ++++++++++++
2 files changed, 15 insertions(+)
diff --git a/python/ql/test/experimental/query-tests/Security/CWE-022/DataflowQueryTest.expected b/python/ql/test/experimental/query-tests/Security/CWE-022/DataflowQueryTest.expected
index 34c94194924..99e42e4cccb 100644
--- a/python/ql/test/experimental/query-tests/Security/CWE-022/DataflowQueryTest.expected
+++ b/python/ql/test/experimental/query-tests/Security/CWE-022/DataflowQueryTest.expected
@@ -2,3 +2,6 @@ missingAnnotationOnSink
failures
| UnsafeUnpack.py:12:46:12:58 | Comment # $result=BAD | Missing result:result=BAD |
| UnsafeUnpack.py:55:53:55:65 | Comment # $result=BAD | Missing result:result=BAD |
+| UnsafeUnpack.py:71:51:71:63 | Comment # $result=BAD | Missing result:result=BAD |
+| UnsafeUnpack.py:85:50:85:62 | Comment # $result=BAD | Missing result:result=BAD |
+| UnsafeUnpack.py:89:50:89:62 | Comment # $result=BAD | Missing result:result=BAD |
diff --git a/python/ql/test/experimental/query-tests/Security/CWE-022/UnsafeUnpack.expected b/python/ql/test/experimental/query-tests/Security/CWE-022/UnsafeUnpack.expected
index b1e93bf3ab2..78214a634b9 100644
--- a/python/ql/test/experimental/query-tests/Security/CWE-022/UnsafeUnpack.expected
+++ b/python/ql/test/experimental/query-tests/Security/CWE-022/UnsafeUnpack.expected
@@ -2,13 +2,25 @@ edges
| UnsafeUnpack.py:5:12:5:41 | ControlFlowNode for Attribute() | UnsafeUnpack.py:9:15:9:26 | ControlFlowNode for Attribute |
| UnsafeUnpack.py:9:15:9:26 | ControlFlowNode for Attribute | UnsafeUnpack.py:12:23:12:29 | ControlFlowNode for tarpath |
| UnsafeUnpack.py:36:24:36:43 | ControlFlowNode for Attribute() | UnsafeUnpack.py:55:31:55:37 | ControlFlowNode for to_path |
+| UnsafeUnpack.py:70:50:70:65 | ControlFlowNode for local_ziped_path | UnsafeUnpack.py:71:23:71:38 | ControlFlowNode for local_ziped_path |
+| UnsafeUnpack.py:84:20:84:34 | ControlFlowNode for compressed_file | UnsafeUnpack.py:85:23:85:37 | ControlFlowNode for compressed_file |
+| UnsafeUnpack.py:88:19:88:36 | ControlFlowNode for Attribute() | UnsafeUnpack.py:89:23:89:37 | ControlFlowNode for compressed_file |
nodes
| UnsafeUnpack.py:5:12:5:41 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
| UnsafeUnpack.py:9:15:9:26 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
| UnsafeUnpack.py:12:23:12:29 | ControlFlowNode for tarpath | semmle.label | ControlFlowNode for tarpath |
| UnsafeUnpack.py:36:24:36:43 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
| UnsafeUnpack.py:55:31:55:37 | ControlFlowNode for to_path | semmle.label | ControlFlowNode for to_path |
+| UnsafeUnpack.py:70:50:70:65 | ControlFlowNode for local_ziped_path | semmle.label | ControlFlowNode for local_ziped_path |
+| UnsafeUnpack.py:71:23:71:38 | ControlFlowNode for local_ziped_path | semmle.label | ControlFlowNode for local_ziped_path |
+| UnsafeUnpack.py:84:20:84:34 | ControlFlowNode for compressed_file | semmle.label | ControlFlowNode for compressed_file |
+| UnsafeUnpack.py:85:23:85:37 | ControlFlowNode for compressed_file | semmle.label | ControlFlowNode for compressed_file |
+| UnsafeUnpack.py:88:19:88:36 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
+| UnsafeUnpack.py:89:23:89:37 | ControlFlowNode for compressed_file | semmle.label | ControlFlowNode for compressed_file |
subpaths
#select
| UnsafeUnpack.py:12:23:12:29 | ControlFlowNode for tarpath | UnsafeUnpack.py:5:12:5:41 | ControlFlowNode for Attribute() | UnsafeUnpack.py:12:23:12:29 | ControlFlowNode for tarpath | Unsafe extraction from a malicious tarball retrieved from a remote location. |
| UnsafeUnpack.py:55:31:55:37 | ControlFlowNode for to_path | UnsafeUnpack.py:36:24:36:43 | ControlFlowNode for Attribute() | UnsafeUnpack.py:55:31:55:37 | ControlFlowNode for to_path | Unsafe extraction from a malicious tarball retrieved from a remote location. |
+| UnsafeUnpack.py:71:23:71:38 | ControlFlowNode for local_ziped_path | UnsafeUnpack.py:70:50:70:65 | ControlFlowNode for local_ziped_path | UnsafeUnpack.py:71:23:71:38 | ControlFlowNode for local_ziped_path | Unsafe extraction from a malicious tarball retrieved from a remote location. |
+| UnsafeUnpack.py:85:23:85:37 | ControlFlowNode for compressed_file | UnsafeUnpack.py:84:20:84:34 | ControlFlowNode for compressed_file | UnsafeUnpack.py:85:23:85:37 | ControlFlowNode for compressed_file | Unsafe extraction from a malicious tarball retrieved from a remote location. |
+| UnsafeUnpack.py:89:23:89:37 | ControlFlowNode for compressed_file | UnsafeUnpack.py:88:19:88:36 | ControlFlowNode for Attribute() | UnsafeUnpack.py:89:23:89:37 | ControlFlowNode for compressed_file | Unsafe extraction from a malicious tarball retrieved from a remote location. |
From 2e4cb63049307e68d6228969c7fefe682effedaa Mon Sep 17 00:00:00 2001
From: Sim4n6
Date: Thu, 26 Jan 2023 01:00:01 +0100
Subject: [PATCH 129/415] Optimize the Argparse filename as a source.
---
.../Security/CWE-022bis/UnsafeUnpack.ql | 17 +++++++++++------
.../Security/CWE-022/UnsafeUnpack.py | 14 ++++++++++++++
2 files changed, 25 insertions(+), 6 deletions(-)
diff --git a/python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpack.ql b/python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpack.ql
index d9c5ffd6f76..052ca3e5d98 100644
--- a/python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpack.ql
+++ b/python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpack.ql
@@ -28,12 +28,17 @@ class UnsafeUnpackingConfig extends TaintTracking::Configuration {
// A source coming from a remote location
exists(Http::Client::Request request | source = request)
or
- //A source coming from a CLI argparse module
- exists(Node o, API::Node ap, MethodCallNode args |
- ap = API::moduleImport("argparse").getMember("ArgumentParser").getACall().getReturn() and
- args = ap.getMember("parse_args").getACall() and
- args.flowsTo(o) and
- source.(AttrRead).accesses(o, any(string s))
+ // A source coming from a CLI argparse module
+ // see argparse: https://docs.python.org/3/library/argparse.html
+ exists(MethodCallNode args |
+ args = source.(AttrRead).getObject().getALocalSource() and
+ args =
+ API::moduleImport("argparse")
+ .getMember("ArgumentParser")
+ .getACall()
+ .getReturn()
+ .getMember("parse_args")
+ .getACall()
)
or
// A source catching an S3 filename download
diff --git a/python/ql/test/experimental/query-tests/Security/CWE-022/UnsafeUnpack.py b/python/ql/test/experimental/query-tests/Security/CWE-022/UnsafeUnpack.py
index 8386f2770b5..eb0ac597a1a 100644
--- a/python/ql/test/experimental/query-tests/Security/CWE-022/UnsafeUnpack.py
+++ b/python/ql/test/experimental/query-tests/Security/CWE-022/UnsafeUnpack.py
@@ -87,3 +87,17 @@ shutil.unpack_archive(compressed_file, base_dir) # $result=BAD
# download(url) returns filename
compressed_file = wget.download(url)
shutil.unpack_archive(compressed_file, base_dir) # $result=BAD
+
+
+# A source coming from a CLI argparse module
+# see argparse: https://docs.python.org/3/library/argparse.html
+import argparse
+
+parser = argparse.ArgumentParser(description='Process some integers.')
+parser.add_argument('integers', metavar='N', type=int, nargs='+',
+ help='an integer for the accumulator')
+parser.add_argument('filename', help='filename to be provided')
+
+args = parser.parse_args()
+compressed_file = args.filename
+shutil.unpack_archive(compressed_file, base_dir) # $result=BAD
\ No newline at end of file
From 9464940214d59899cc16b61cdb4eaed629010fbf Mon Sep 17 00:00:00 2001
From: Sim4n6
Date: Thu, 26 Jan 2023 01:00:19 +0100
Subject: [PATCH 130/415] Add expected results for argparse source
---
.../query-tests/Security/CWE-022/DataflowQueryTest.expected | 1 +
.../query-tests/Security/CWE-022/UnsafeUnpack.expected | 4 ++++
2 files changed, 5 insertions(+)
diff --git a/python/ql/test/experimental/query-tests/Security/CWE-022/DataflowQueryTest.expected b/python/ql/test/experimental/query-tests/Security/CWE-022/DataflowQueryTest.expected
index 99e42e4cccb..ae1beb23a86 100644
--- a/python/ql/test/experimental/query-tests/Security/CWE-022/DataflowQueryTest.expected
+++ b/python/ql/test/experimental/query-tests/Security/CWE-022/DataflowQueryTest.expected
@@ -5,3 +5,4 @@ failures
| UnsafeUnpack.py:71:51:71:63 | Comment # $result=BAD | Missing result:result=BAD |
| UnsafeUnpack.py:85:50:85:62 | Comment # $result=BAD | Missing result:result=BAD |
| UnsafeUnpack.py:89:50:89:62 | Comment # $result=BAD | Missing result:result=BAD |
+| UnsafeUnpack.py:103:50:103:62 | Comment # $result=BAD | Missing result:result=BAD |
diff --git a/python/ql/test/experimental/query-tests/Security/CWE-022/UnsafeUnpack.expected b/python/ql/test/experimental/query-tests/Security/CWE-022/UnsafeUnpack.expected
index 78214a634b9..a2eff1ddd1d 100644
--- a/python/ql/test/experimental/query-tests/Security/CWE-022/UnsafeUnpack.expected
+++ b/python/ql/test/experimental/query-tests/Security/CWE-022/UnsafeUnpack.expected
@@ -5,6 +5,7 @@ edges
| UnsafeUnpack.py:70:50:70:65 | ControlFlowNode for local_ziped_path | UnsafeUnpack.py:71:23:71:38 | ControlFlowNode for local_ziped_path |
| UnsafeUnpack.py:84:20:84:34 | ControlFlowNode for compressed_file | UnsafeUnpack.py:85:23:85:37 | ControlFlowNode for compressed_file |
| UnsafeUnpack.py:88:19:88:36 | ControlFlowNode for Attribute() | UnsafeUnpack.py:89:23:89:37 | ControlFlowNode for compressed_file |
+| UnsafeUnpack.py:102:19:102:31 | ControlFlowNode for Attribute | UnsafeUnpack.py:103:23:103:37 | ControlFlowNode for compressed_file |
nodes
| UnsafeUnpack.py:5:12:5:41 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
| UnsafeUnpack.py:9:15:9:26 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
@@ -17,6 +18,8 @@ nodes
| UnsafeUnpack.py:85:23:85:37 | ControlFlowNode for compressed_file | semmle.label | ControlFlowNode for compressed_file |
| UnsafeUnpack.py:88:19:88:36 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
| UnsafeUnpack.py:89:23:89:37 | ControlFlowNode for compressed_file | semmle.label | ControlFlowNode for compressed_file |
+| UnsafeUnpack.py:102:19:102:31 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
+| UnsafeUnpack.py:103:23:103:37 | ControlFlowNode for compressed_file | semmle.label | ControlFlowNode for compressed_file |
subpaths
#select
| UnsafeUnpack.py:12:23:12:29 | ControlFlowNode for tarpath | UnsafeUnpack.py:5:12:5:41 | ControlFlowNode for Attribute() | UnsafeUnpack.py:12:23:12:29 | ControlFlowNode for tarpath | Unsafe extraction from a malicious tarball retrieved from a remote location. |
@@ -24,3 +27,4 @@ subpaths
| UnsafeUnpack.py:71:23:71:38 | ControlFlowNode for local_ziped_path | UnsafeUnpack.py:70:50:70:65 | ControlFlowNode for local_ziped_path | UnsafeUnpack.py:71:23:71:38 | ControlFlowNode for local_ziped_path | Unsafe extraction from a malicious tarball retrieved from a remote location. |
| UnsafeUnpack.py:85:23:85:37 | ControlFlowNode for compressed_file | UnsafeUnpack.py:84:20:84:34 | ControlFlowNode for compressed_file | UnsafeUnpack.py:85:23:85:37 | ControlFlowNode for compressed_file | Unsafe extraction from a malicious tarball retrieved from a remote location. |
| UnsafeUnpack.py:89:23:89:37 | ControlFlowNode for compressed_file | UnsafeUnpack.py:88:19:88:36 | ControlFlowNode for Attribute() | UnsafeUnpack.py:89:23:89:37 | ControlFlowNode for compressed_file | Unsafe extraction from a malicious tarball retrieved from a remote location. |
+| UnsafeUnpack.py:103:23:103:37 | ControlFlowNode for compressed_file | UnsafeUnpack.py:102:19:102:31 | ControlFlowNode for Attribute | UnsafeUnpack.py:103:23:103:37 | ControlFlowNode for compressed_file | Unsafe extraction from a malicious tarball retrieved from a remote location. |
From aaa004061252b19ef77c6084652dbc2bd4ca487c Mon Sep 17 00:00:00 2001
From: Sim4n6
Date: Thu, 26 Jan 2023 08:53:47 +0100
Subject: [PATCH 131/415] Seperate the dataflow config from the query
---
.../Security/CWE-022bis/UnsafeUnpack.ql | 132 +----------------
.../Security/CWE-022bis/UnsafeUnpackQuery.qll | 137 ++++++++++++++++++
.../CWE-022/DataflowQueryTest.expected | 6 -
.../Security/CWE-022/DataflowQueryTest.ql | 1 +
4 files changed, 140 insertions(+), 136 deletions(-)
create mode 100644 python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpackQuery.qll
diff --git a/python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpack.ql b/python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpack.ql
index 052ca3e5d98..1e17cb728ca 100644
--- a/python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpack.ql
+++ b/python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpack.ql
@@ -10,141 +10,13 @@
* @security-severity 7.5
* @precision high
* @tags security
+ * experimental
* external/cwe/cwe-022
*/
import python
-import semmle.python.Concepts
-import semmle.python.dataflow.new.internal.DataFlowPublic
-import semmle.python.ApiGraphs
+import UnsafeUnpackQuery
import DataFlow::PathGraph
-import semmle.python.dataflow.new.TaintTracking
-import semmle.python.frameworks.Stdlib
-
-class UnsafeUnpackingConfig extends TaintTracking::Configuration {
- UnsafeUnpackingConfig() { this = "UnsafeUnpackingConfig" }
-
- override predicate isSource(DataFlow::Node source) {
- // A source coming from a remote location
- exists(Http::Client::Request request | source = request)
- or
- // A source coming from a CLI argparse module
- // see argparse: https://docs.python.org/3/library/argparse.html
- exists(MethodCallNode args |
- args = source.(AttrRead).getObject().getALocalSource() and
- args =
- API::moduleImport("argparse")
- .getMember("ArgumentParser")
- .getACall()
- .getReturn()
- .getMember("parse_args")
- .getACall()
- )
- or
- // A source catching an S3 filename download
- // see boto3: https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/s3.html#S3.Client.download_file
- exists(MethodCallNode mcn, Node s3, Node bc |
- bc = API::moduleImport("boto3").getMember("client").getACall() and
- bc = s3.getALocalSource() and
- mcn.calls(s3, "download_file") and
- source = mcn.getArg(2)
- )
- or
- // A source download a file using wget
- // see wget: https://pypi.org/project/wget/
- exists(API::CallNode mcn |
- mcn = API::moduleImport("wget").getMember("download").getACall() and
- (
- source = mcn.getArg(1)
- or
- source = mcn.getReturn().asSource() and not exists(Node arg | arg = mcn.getArg(1))
- )
- )
- or
- // catch the uploaded files as a source
- exists(Subscript s, Attribute at |
- at = s.getObject() and at.getAttr() = "FILES" and source.asExpr() = s
- )
- or
- exists(Node obj, AttrRead ar |
- ar.getAMethodCall("get").flowsTo(source) and
- ar.accesses(obj, "FILES")
- )
- or
- exists(Node obj, AttrRead ar |
- ar.getAMethodCall("getlist").flowsTo(source) and
- ar.accesses(obj, "FILES")
- )
- }
-
- override predicate isSink(DataFlow::Node sink) {
- // A sink capturing method calls to `unpack_archive`.
- sink = API::moduleImport("shutil").getMember("unpack_archive").getACall().getArg(0)
- }
-
- override predicate isAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
- // Writing the response data to the archive
- exists(Stdlib::FileLikeObject::InstanceSource is, Node f, MethodCallNode mc |
- is.flowsTo(f) and
- mc.calls(f, "write") and
- nodeFrom = mc.getArg(0) and
- nodeTo = is.(CallCfgNode).getArg(0)
- )
- or
- // Copying the response data to the archive
- exists(Stdlib::FileLikeObject::InstanceSource is, Node f, MethodCallNode mc |
- is.flowsTo(f) and
- mc = API::moduleImport("shutil").getMember("copyfileobj").getACall() and
- f = mc.getArg(1) and
- nodeFrom = mc.getArg(0) and
- nodeTo = is.(CallCfgNode).getArg(0)
- )
- or
- // Reading the response
- exists(MethodCallNode mc |
- nodeFrom = mc.getObject() and
- mc.getMethodName() = "read" and
- mc.flowsTo(nodeTo)
- )
- or
- // Accessing the name or raw content
- exists(AttrRead ar | ar.accesses(nodeFrom, ["name", "raw"]) and ar.flowsTo(nodeTo))
- or
- //Use of join of filename
- exists(API::CallNode mcn |
- mcn = API::moduleImport("os").getMember("path").getMember("join").getACall() and
- nodeFrom = mcn.getArg(1) and
- mcn.flowsTo(nodeTo)
- )
- or
- // Read by chunks
- exists(MethodCallNode mc |
- nodeFrom = mc.getObject() and mc.getMethodName() = "chunks" and mc.flowsTo(nodeTo)
- )
- or
- // Considering the use of closing()
- exists(API::CallNode closing |
- closing = API::moduleImport("contextlib").getMember("closing").getACall() and
- closing.flowsTo(nodeTo) and
- nodeFrom = closing.getArg(0)
- )
- or
- // Considering the use of "fs"
- exists(API::CallNode fs, MethodCallNode mcn |
- fs =
- API::moduleImport("django")
- .getMember("core")
- .getMember("files")
- .getMember("storage")
- .getMember("FileSystemStorage")
- .getACall() and
- fs.flowsTo(mcn.getObject()) and
- mcn.getMethodName() = ["save", "path"] and
- nodeFrom = mcn.getArg(0) and
- nodeTo = mcn
- )
- }
-}
from UnsafeUnpackingConfig config, DataFlow::PathNode source, DataFlow::PathNode sink
where config.hasFlowPath(source, sink)
diff --git a/python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpackQuery.qll b/python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpackQuery.qll
new file mode 100644
index 00000000000..5794d9e32f2
--- /dev/null
+++ b/python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpackQuery.qll
@@ -0,0 +1,137 @@
+/**
+ *
+ * Provides a taint-tracking configuration for detecting "UnsafeUnpacking" vulnerabilities.
+ *
+ */
+
+import python
+import semmle.python.Concepts
+import semmle.python.dataflow.new.internal.DataFlowPublic
+import semmle.python.ApiGraphs
+import semmle.python.dataflow.new.TaintTracking
+import semmle.python.frameworks.Stdlib
+
+class UnsafeUnpackingConfig extends TaintTracking::Configuration {
+ UnsafeUnpackingConfig() { this = "UnsafeUnpackingConfig" }
+
+ override predicate isSource(DataFlow::Node source) {
+ // A source coming from a remote location
+ exists(Http::Client::Request request | source = request)
+ or
+ // A source coming from a CLI argparse module
+ // see argparse: https://docs.python.org/3/library/argparse.html
+ exists(MethodCallNode args |
+ args = source.(AttrRead).getObject().getALocalSource() and
+ args =
+ API::moduleImport("argparse")
+ .getMember("ArgumentParser")
+ .getACall()
+ .getReturn()
+ .getMember("parse_args")
+ .getACall()
+ )
+ or
+ // A source catching an S3 filename download
+ // see boto3: https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/s3.html#S3.Client.download_file
+ exists(MethodCallNode mcn, Node s3, Node bc |
+ bc = API::moduleImport("boto3").getMember("client").getACall() and
+ bc = s3.getALocalSource() and
+ mcn.calls(s3, "download_file") and
+ source = mcn.getArg(2)
+ )
+ or
+ // A source download a file using wget
+ // see wget: https://pypi.org/project/wget/
+ exists(API::CallNode mcn |
+ mcn = API::moduleImport("wget").getMember("download").getACall() and
+ (
+ source = mcn.getArg(1)
+ or
+ source = mcn.getReturn().asSource() and not exists(Node arg | arg = mcn.getArg(1))
+ )
+ )
+ or
+ // catch the uploaded files as a source
+ exists(Subscript s, Attribute at |
+ at = s.getObject() and at.getAttr() = "FILES" and source.asExpr() = s
+ )
+ or
+ exists(Node obj, AttrRead ar |
+ ar.getAMethodCall("get").flowsTo(source) and
+ ar.accesses(obj, "FILES")
+ )
+ or
+ exists(Node obj, AttrRead ar |
+ ar.getAMethodCall("getlist").flowsTo(source) and
+ ar.accesses(obj, "FILES")
+ )
+ }
+
+ override predicate isSink(DataFlow::Node sink) {
+ // A sink capturing method calls to `unpack_archive`.
+ sink = API::moduleImport("shutil").getMember("unpack_archive").getACall().getArg(0)
+ }
+
+ override predicate isAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
+ // Writing the response data to the archive
+ exists(Stdlib::FileLikeObject::InstanceSource is, Node f, MethodCallNode mc |
+ is.flowsTo(f) and
+ mc.calls(f, "write") and
+ nodeFrom = mc.getArg(0) and
+ nodeTo = is.(CallCfgNode).getArg(0)
+ )
+ or
+ // Copying the response data to the archive
+ exists(Stdlib::FileLikeObject::InstanceSource is, Node f, MethodCallNode mc |
+ is.flowsTo(f) and
+ mc = API::moduleImport("shutil").getMember("copyfileobj").getACall() and
+ f = mc.getArg(1) and
+ nodeFrom = mc.getArg(0) and
+ nodeTo = is.(CallCfgNode).getArg(0)
+ )
+ or
+ // Reading the response
+ exists(MethodCallNode mc |
+ nodeFrom = mc.getObject() and
+ mc.getMethodName() = "read" and
+ mc.flowsTo(nodeTo)
+ )
+ or
+ // Accessing the name or raw content
+ exists(AttrRead ar | ar.accesses(nodeFrom, ["name", "raw"]) and ar.flowsTo(nodeTo))
+ or
+ //Use of join of filename
+ exists(API::CallNode mcn |
+ mcn = API::moduleImport("os").getMember("path").getMember("join").getACall() and
+ nodeFrom = mcn.getArg(1) and
+ mcn.flowsTo(nodeTo)
+ )
+ or
+ // Read by chunks
+ exists(MethodCallNode mc |
+ nodeFrom = mc.getObject() and mc.getMethodName() = "chunks" and mc.flowsTo(nodeTo)
+ )
+ or
+ // Considering the use of closing()
+ exists(API::CallNode closing |
+ closing = API::moduleImport("contextlib").getMember("closing").getACall() and
+ closing.flowsTo(nodeTo) and
+ nodeFrom = closing.getArg(0)
+ )
+ or
+ // Considering the use of "fs"
+ exists(API::CallNode fs, MethodCallNode mcn |
+ fs =
+ API::moduleImport("django")
+ .getMember("core")
+ .getMember("files")
+ .getMember("storage")
+ .getMember("FileSystemStorage")
+ .getACall() and
+ fs.flowsTo(mcn.getObject()) and
+ mcn.getMethodName() = ["save", "path"] and
+ nodeFrom = mcn.getArg(0) and
+ nodeTo = mcn
+ )
+ }
+}
diff --git a/python/ql/test/experimental/query-tests/Security/CWE-022/DataflowQueryTest.expected b/python/ql/test/experimental/query-tests/Security/CWE-022/DataflowQueryTest.expected
index ae1beb23a86..3875da4e143 100644
--- a/python/ql/test/experimental/query-tests/Security/CWE-022/DataflowQueryTest.expected
+++ b/python/ql/test/experimental/query-tests/Security/CWE-022/DataflowQueryTest.expected
@@ -1,8 +1,2 @@
missingAnnotationOnSink
failures
-| UnsafeUnpack.py:12:46:12:58 | Comment # $result=BAD | Missing result:result=BAD |
-| UnsafeUnpack.py:55:53:55:65 | Comment # $result=BAD | Missing result:result=BAD |
-| UnsafeUnpack.py:71:51:71:63 | Comment # $result=BAD | Missing result:result=BAD |
-| UnsafeUnpack.py:85:50:85:62 | Comment # $result=BAD | Missing result:result=BAD |
-| UnsafeUnpack.py:89:50:89:62 | Comment # $result=BAD | Missing result:result=BAD |
-| UnsafeUnpack.py:103:50:103:62 | Comment # $result=BAD | Missing result:result=BAD |
diff --git a/python/ql/test/experimental/query-tests/Security/CWE-022/DataflowQueryTest.ql b/python/ql/test/experimental/query-tests/Security/CWE-022/DataflowQueryTest.ql
index 29ccba982aa..e3cd5f784ff 100644
--- a/python/ql/test/experimental/query-tests/Security/CWE-022/DataflowQueryTest.ql
+++ b/python/ql/test/experimental/query-tests/Security/CWE-022/DataflowQueryTest.ql
@@ -1,2 +1,3 @@
import python
import experimental.dataflow.TestUtil.DataflowQueryTest
+import UnsafeUnpackQuery
\ No newline at end of file
From 54cc4d6498d90a5b711a9eead2dca298ae860b64 Mon Sep 17 00:00:00 2001
From: Sim4n6
Date: Thu, 26 Jan 2023 12:51:55 +0100
Subject: [PATCH 132/415] Opt for any source from RemoteFlowSource.
---
.../Security/CWE-022bis/UnsafeUnpackQuery.qll | 5 +-
.../Security/CWE-022/UnsafeUnpack.expected | 62 +++++++------
.../Security/CWE-022/UnsafeUnpack.py | 91 ++++++++-----------
3 files changed, 76 insertions(+), 82 deletions(-)
diff --git a/python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpackQuery.qll b/python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpackQuery.qll
index 5794d9e32f2..c041b710ae0 100644
--- a/python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpackQuery.qll
+++ b/python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpackQuery.qll
@@ -1,7 +1,5 @@
/**
- *
* Provides a taint-tracking configuration for detecting "UnsafeUnpacking" vulnerabilities.
- *
*/
import python
@@ -10,13 +8,14 @@ import semmle.python.dataflow.new.internal.DataFlowPublic
import semmle.python.ApiGraphs
import semmle.python.dataflow.new.TaintTracking
import semmle.python.frameworks.Stdlib
+import semmle.python.dataflow.new.RemoteFlowSources
class UnsafeUnpackingConfig extends TaintTracking::Configuration {
UnsafeUnpackingConfig() { this = "UnsafeUnpackingConfig" }
override predicate isSource(DataFlow::Node source) {
// A source coming from a remote location
- exists(Http::Client::Request request | source = request)
+ source instanceof RemoteFlowSource
or
// A source coming from a CLI argparse module
// see argparse: https://docs.python.org/3/library/argparse.html
diff --git a/python/ql/test/experimental/query-tests/Security/CWE-022/UnsafeUnpack.expected b/python/ql/test/experimental/query-tests/Security/CWE-022/UnsafeUnpack.expected
index a2eff1ddd1d..cda42c13267 100644
--- a/python/ql/test/experimental/query-tests/Security/CWE-022/UnsafeUnpack.expected
+++ b/python/ql/test/experimental/query-tests/Security/CWE-022/UnsafeUnpack.expected
@@ -1,30 +1,40 @@
edges
-| UnsafeUnpack.py:5:12:5:41 | ControlFlowNode for Attribute() | UnsafeUnpack.py:9:15:9:26 | ControlFlowNode for Attribute |
-| UnsafeUnpack.py:9:15:9:26 | ControlFlowNode for Attribute | UnsafeUnpack.py:12:23:12:29 | ControlFlowNode for tarpath |
-| UnsafeUnpack.py:36:24:36:43 | ControlFlowNode for Attribute() | UnsafeUnpack.py:55:31:55:37 | ControlFlowNode for to_path |
-| UnsafeUnpack.py:70:50:70:65 | ControlFlowNode for local_ziped_path | UnsafeUnpack.py:71:23:71:38 | ControlFlowNode for local_ziped_path |
-| UnsafeUnpack.py:84:20:84:34 | ControlFlowNode for compressed_file | UnsafeUnpack.py:85:23:85:37 | ControlFlowNode for compressed_file |
-| UnsafeUnpack.py:88:19:88:36 | ControlFlowNode for Attribute() | UnsafeUnpack.py:89:23:89:37 | ControlFlowNode for compressed_file |
-| UnsafeUnpack.py:102:19:102:31 | ControlFlowNode for Attribute | UnsafeUnpack.py:103:23:103:37 | ControlFlowNode for compressed_file |
+| UnsafeUnpack.py:0:0:0:0 | ModuleVariableNode for UnsafeUnpack.request | UnsafeUnpack.py:11:16:11:22 | ControlFlowNode for request |
+| UnsafeUnpack.py:5:26:5:32 | ControlFlowNode for ImportMember | UnsafeUnpack.py:5:26:5:32 | GSSA Variable request |
+| UnsafeUnpack.py:5:26:5:32 | GSSA Variable request | UnsafeUnpack.py:0:0:0:0 | ModuleVariableNode for UnsafeUnpack.request |
+| UnsafeUnpack.py:11:16:11:22 | ControlFlowNode for request | UnsafeUnpack.py:11:16:11:27 | ControlFlowNode for Attribute |
+| UnsafeUnpack.py:11:16:11:27 | ControlFlowNode for Attribute | UnsafeUnpack.py:17:23:17:34 | ControlFlowNode for Attribute |
+| UnsafeUnpack.py:17:23:17:34 | ControlFlowNode for Attribute | UnsafeUnpack.py:20:31:20:37 | ControlFlowNode for tarpath |
+| UnsafeUnpack.py:34:50:34:65 | ControlFlowNode for local_ziped_path | UnsafeUnpack.py:35:23:35:38 | ControlFlowNode for local_ziped_path |
+| UnsafeUnpack.py:48:20:48:34 | ControlFlowNode for compressed_file | UnsafeUnpack.py:49:23:49:37 | ControlFlowNode for compressed_file |
+| UnsafeUnpack.py:52:19:52:36 | ControlFlowNode for Attribute() | UnsafeUnpack.py:53:23:53:37 | ControlFlowNode for compressed_file |
+| UnsafeUnpack.py:66:19:66:31 | ControlFlowNode for Attribute | UnsafeUnpack.py:67:23:67:37 | ControlFlowNode for compressed_file |
+| UnsafeUnpack.py:80:16:80:28 | ControlFlowNode for Attribute | UnsafeUnpack.py:86:15:86:26 | ControlFlowNode for Attribute |
+| UnsafeUnpack.py:86:15:86:26 | ControlFlowNode for Attribute | UnsafeUnpack.py:88:23:88:29 | ControlFlowNode for tarpath |
nodes
-| UnsafeUnpack.py:5:12:5:41 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
-| UnsafeUnpack.py:9:15:9:26 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
-| UnsafeUnpack.py:12:23:12:29 | ControlFlowNode for tarpath | semmle.label | ControlFlowNode for tarpath |
-| UnsafeUnpack.py:36:24:36:43 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
-| UnsafeUnpack.py:55:31:55:37 | ControlFlowNode for to_path | semmle.label | ControlFlowNode for to_path |
-| UnsafeUnpack.py:70:50:70:65 | ControlFlowNode for local_ziped_path | semmle.label | ControlFlowNode for local_ziped_path |
-| UnsafeUnpack.py:71:23:71:38 | ControlFlowNode for local_ziped_path | semmle.label | ControlFlowNode for local_ziped_path |
-| UnsafeUnpack.py:84:20:84:34 | ControlFlowNode for compressed_file | semmle.label | ControlFlowNode for compressed_file |
-| UnsafeUnpack.py:85:23:85:37 | ControlFlowNode for compressed_file | semmle.label | ControlFlowNode for compressed_file |
-| UnsafeUnpack.py:88:19:88:36 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
-| UnsafeUnpack.py:89:23:89:37 | ControlFlowNode for compressed_file | semmle.label | ControlFlowNode for compressed_file |
-| UnsafeUnpack.py:102:19:102:31 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
-| UnsafeUnpack.py:103:23:103:37 | ControlFlowNode for compressed_file | semmle.label | ControlFlowNode for compressed_file |
+| UnsafeUnpack.py:0:0:0:0 | ModuleVariableNode for UnsafeUnpack.request | semmle.label | ModuleVariableNode for UnsafeUnpack.request |
+| UnsafeUnpack.py:5:26:5:32 | ControlFlowNode for ImportMember | semmle.label | ControlFlowNode for ImportMember |
+| UnsafeUnpack.py:5:26:5:32 | GSSA Variable request | semmle.label | GSSA Variable request |
+| UnsafeUnpack.py:11:16:11:22 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
+| UnsafeUnpack.py:11:16:11:27 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
+| UnsafeUnpack.py:17:23:17:34 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
+| UnsafeUnpack.py:20:31:20:37 | ControlFlowNode for tarpath | semmle.label | ControlFlowNode for tarpath |
+| UnsafeUnpack.py:34:50:34:65 | ControlFlowNode for local_ziped_path | semmle.label | ControlFlowNode for local_ziped_path |
+| UnsafeUnpack.py:35:23:35:38 | ControlFlowNode for local_ziped_path | semmle.label | ControlFlowNode for local_ziped_path |
+| UnsafeUnpack.py:48:20:48:34 | ControlFlowNode for compressed_file | semmle.label | ControlFlowNode for compressed_file |
+| UnsafeUnpack.py:49:23:49:37 | ControlFlowNode for compressed_file | semmle.label | ControlFlowNode for compressed_file |
+| UnsafeUnpack.py:52:19:52:36 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
+| UnsafeUnpack.py:53:23:53:37 | ControlFlowNode for compressed_file | semmle.label | ControlFlowNode for compressed_file |
+| UnsafeUnpack.py:66:19:66:31 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
+| UnsafeUnpack.py:67:23:67:37 | ControlFlowNode for compressed_file | semmle.label | ControlFlowNode for compressed_file |
+| UnsafeUnpack.py:80:16:80:28 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
+| UnsafeUnpack.py:86:15:86:26 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
+| UnsafeUnpack.py:88:23:88:29 | ControlFlowNode for tarpath | semmle.label | ControlFlowNode for tarpath |
subpaths
#select
-| UnsafeUnpack.py:12:23:12:29 | ControlFlowNode for tarpath | UnsafeUnpack.py:5:12:5:41 | ControlFlowNode for Attribute() | UnsafeUnpack.py:12:23:12:29 | ControlFlowNode for tarpath | Unsafe extraction from a malicious tarball retrieved from a remote location. |
-| UnsafeUnpack.py:55:31:55:37 | ControlFlowNode for to_path | UnsafeUnpack.py:36:24:36:43 | ControlFlowNode for Attribute() | UnsafeUnpack.py:55:31:55:37 | ControlFlowNode for to_path | Unsafe extraction from a malicious tarball retrieved from a remote location. |
-| UnsafeUnpack.py:71:23:71:38 | ControlFlowNode for local_ziped_path | UnsafeUnpack.py:70:50:70:65 | ControlFlowNode for local_ziped_path | UnsafeUnpack.py:71:23:71:38 | ControlFlowNode for local_ziped_path | Unsafe extraction from a malicious tarball retrieved from a remote location. |
-| UnsafeUnpack.py:85:23:85:37 | ControlFlowNode for compressed_file | UnsafeUnpack.py:84:20:84:34 | ControlFlowNode for compressed_file | UnsafeUnpack.py:85:23:85:37 | ControlFlowNode for compressed_file | Unsafe extraction from a malicious tarball retrieved from a remote location. |
-| UnsafeUnpack.py:89:23:89:37 | ControlFlowNode for compressed_file | UnsafeUnpack.py:88:19:88:36 | ControlFlowNode for Attribute() | UnsafeUnpack.py:89:23:89:37 | ControlFlowNode for compressed_file | Unsafe extraction from a malicious tarball retrieved from a remote location. |
-| UnsafeUnpack.py:103:23:103:37 | ControlFlowNode for compressed_file | UnsafeUnpack.py:102:19:102:31 | ControlFlowNode for Attribute | UnsafeUnpack.py:103:23:103:37 | ControlFlowNode for compressed_file | Unsafe extraction from a malicious tarball retrieved from a remote location. |
+| UnsafeUnpack.py:20:31:20:37 | ControlFlowNode for tarpath | UnsafeUnpack.py:5:26:5:32 | ControlFlowNode for ImportMember | UnsafeUnpack.py:20:31:20:37 | ControlFlowNode for tarpath | Unsafe extraction from a malicious tarball retrieved from a remote location. |
+| UnsafeUnpack.py:35:23:35:38 | ControlFlowNode for local_ziped_path | UnsafeUnpack.py:34:50:34:65 | ControlFlowNode for local_ziped_path | UnsafeUnpack.py:35:23:35:38 | ControlFlowNode for local_ziped_path | Unsafe extraction from a malicious tarball retrieved from a remote location. |
+| UnsafeUnpack.py:49:23:49:37 | ControlFlowNode for compressed_file | UnsafeUnpack.py:48:20:48:34 | ControlFlowNode for compressed_file | UnsafeUnpack.py:49:23:49:37 | ControlFlowNode for compressed_file | Unsafe extraction from a malicious tarball retrieved from a remote location. |
+| UnsafeUnpack.py:53:23:53:37 | ControlFlowNode for compressed_file | UnsafeUnpack.py:52:19:52:36 | ControlFlowNode for Attribute() | UnsafeUnpack.py:53:23:53:37 | ControlFlowNode for compressed_file | Unsafe extraction from a malicious tarball retrieved from a remote location. |
+| UnsafeUnpack.py:67:23:67:37 | ControlFlowNode for compressed_file | UnsafeUnpack.py:66:19:66:31 | ControlFlowNode for Attribute | UnsafeUnpack.py:67:23:67:37 | ControlFlowNode for compressed_file | Unsafe extraction from a malicious tarball retrieved from a remote location. |
+| UnsafeUnpack.py:88:23:88:29 | ControlFlowNode for tarpath | UnsafeUnpack.py:80:16:80:28 | ControlFlowNode for Attribute | UnsafeUnpack.py:88:23:88:29 | ControlFlowNode for tarpath | Unsafe extraction from a malicious tarball retrieved from a remote location. |
diff --git a/python/ql/test/experimental/query-tests/Security/CWE-022/UnsafeUnpack.py b/python/ql/test/experimental/query-tests/Security/CWE-022/UnsafeUnpack.py
index eb0ac597a1a..b2d23f2dbe0 100644
--- a/python/ql/test/experimental/query-tests/Security/CWE-022/UnsafeUnpack.py
+++ b/python/ql/test/experimental/query-tests/Security/CWE-022/UnsafeUnpack.py
@@ -1,60 +1,24 @@
import requests
import shutil
+import os
-url = "https://www.someremote.location/tarball.tar.gz"
-response = requests.get(url, stream=True)
+from flask import Flask, request
+app = Flask(__name__)
-tarpath = "/tmp/tmp456/tarball.tar.gz"
-with open(tarpath, "wb") as f:
- f.write(response.raw.read())
-
-untarredpath = "/tmp/tmp123"
-shutil.unpack_archive(tarpath, untarredpath) # $result=BAD
-
-
-import tempfile
-import os
-from urllib import request
-import contextlib
-import shutil
-
-unpack = True
-to_path = "/tmp/tmp123"
-uri = "https://www.goog.com/zzz.tar.gz"
-scheme = "https"
-
-with tempfile.TemporaryDirectory() as temp_dir:
- if unpack and (str(uri).endswith("zip") or str(uri).endswith("tar.gz")):
- unpack_path = to_path
- to_path = temp_dir
- else:
- unpack_path = None
- if scheme in ["http", "https", "ftp"]:
- if os.path.isdir(to_path):
- to_path = os.path.join(to_path, os.path.basename(uri))
- url = uri
- url_response = request.urlopen(url)
- with contextlib.closing(url_response) as fp:
- with open(to_path, "wb") as out_file:
- block_size = DEFAULT_BUFFER_SIZE * 8
- while True:
- block = fp.read(block_size)
- if not block:
- break
- out_file.write(block)
- else:
- if scheme == "oci" and not storage_options:
- storage_options = default_signer()
- fs = fsspec.filesystem(scheme, **storage_options)
- if os.path.isdir(to_path):
- to_path = os.path.join(
- to_path, os.path.basename(str(uri).rstrip("/"))
- )
- fs.get(uri, to_path, recursive=True)
- if unpack_path:
- shutil.unpack_archive(to_path, unpack_path) # $result=BAD
- to_path = unpack_path
+# Consider any RemoteFlowSource as a source
+@app.route("/download_from_url")
+def download_from_url():
+ filename = request.args.get('filename', '')
+ if not filename:
+ response = requests.get(filename, stream=True)
+
+ tarpath = "/tmp/tmp456/tarball.tar.gz"
+ with open(tarpath, "wb") as f:
+ f.write(response.raw.read())
+ untarredpath = "/tmp/tmp123"
+ shutil.unpack_archive(tarpath, untarredpath) # $result=BAD
+
# A source catching an S3 filename download
# see boto3: https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/s3.html#S3.Client.download_file
@@ -100,4 +64,25 @@ parser.add_argument('filename', help='filename to be provided')
args = parser.parse_args()
compressed_file = args.filename
-shutil.unpack_archive(compressed_file, base_dir) # $result=BAD
\ No newline at end of file
+shutil.unpack_archive(compressed_file, base_dir) # $result=BAD
+
+
+# A source coming from a CLI and downloaded
+import argparse
+import requests
+
+parser = argparse.ArgumentParser(description='Process some integers.')
+parser.add_argument('integers', metavar='N', type=int, nargs='+',
+ help='an integer for the accumulator')
+parser.add_argument('filename', help='url to filename to be provided')
+
+args = parser.parse_args()
+url_filename = args.filename
+
+response = requests.get(url_filename, stream=True)
+
+tarpath = "/tmp/tmp456/tarball.tar.gz"
+with open(tarpath, "wb") as f:
+ f.write(response.raw.read())
+
+shutil.unpack_archive(tarpath, base_dir) # $result=BAD
\ No newline at end of file
From 51b11de44a24926449543f837d9d1ef8c91408b9 Mon Sep 17 00:00:00 2001
From: Sim4n6
Date: Thu, 26 Jan 2023 15:16:24 +0100
Subject: [PATCH 133/415] Add a Django Upload examples
---
.../Security/CWE-022bis/UnsafeUnpackQuery.qll | 25 ++++++++----
.../Security/CWE-022/UnsafeUnpack.py | 40 ++++++++++++++++++-
2 files changed, 57 insertions(+), 8 deletions(-)
diff --git a/python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpackQuery.qll b/python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpackQuery.qll
index c041b710ae0..29c41b74dab 100644
--- a/python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpackQuery.qll
+++ b/python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpackQuery.qll
@@ -55,13 +55,10 @@ class UnsafeUnpackingConfig extends TaintTracking::Configuration {
at = s.getObject() and at.getAttr() = "FILES" and source.asExpr() = s
)
or
+ // Retrieve Django uploaded files
+ // see HttpRequest.FILES: https://docs.djangoproject.com/en/4.1/ref/request-response/#django.http.HttpRequest.FILES
exists(Node obj, AttrRead ar |
- ar.getAMethodCall("get").flowsTo(source) and
- ar.accesses(obj, "FILES")
- )
- or
- exists(Node obj, AttrRead ar |
- ar.getAMethodCall("getlist").flowsTo(source) and
+ ar.getAMethodCall(["getlist", "get"]).flowsTo(source) and
ar.accesses(obj, "FILES")
)
}
@@ -93,7 +90,21 @@ class UnsafeUnpackingConfig extends TaintTracking::Configuration {
exists(MethodCallNode mc |
nodeFrom = mc.getObject() and
mc.getMethodName() = "read" and
- mc.flowsTo(nodeTo)
+ nodeTo = mc
+ )
+ or
+ // Open for access
+ exists(MethodCallNode cn |
+ nodeTo = cn.getObject() and
+ cn.getMethodName() = "open" and
+ cn.flowsTo(nodeFrom)
+ )
+ or
+ // Write for access
+ exists(MethodCallNode cn |
+ nodeTo = cn.getObject() and
+ cn.getMethodName() = "write" and
+ nodeFrom = cn.getArg(0)
)
or
// Accessing the name or raw content
diff --git a/python/ql/test/experimental/query-tests/Security/CWE-022/UnsafeUnpack.py b/python/ql/test/experimental/query-tests/Security/CWE-022/UnsafeUnpack.py
index b2d23f2dbe0..fe1b6fa6d45 100644
--- a/python/ql/test/experimental/query-tests/Security/CWE-022/UnsafeUnpack.py
+++ b/python/ql/test/experimental/query-tests/Security/CWE-022/UnsafeUnpack.py
@@ -85,4 +85,42 @@ tarpath = "/tmp/tmp456/tarball.tar.gz"
with open(tarpath, "wb") as f:
f.write(response.raw.read())
-shutil.unpack_archive(tarpath, base_dir) # $result=BAD
\ No newline at end of file
+shutil.unpack_archive(tarpath, base_dir) # $result=BAD
+
+# the django upload functionality
+# see HttpRequest.FILES: https://docs.djangoproject.com/en/4.1/ref/request-response/#django.http.HttpRequest.FILES
+from django.shortcuts import render
+from django.core.files.storage import FileSystemStorage
+import shutil
+
+def simple_upload(request):
+
+ base_dir = "/tmp/baase_dir"
+ if request.method == 'POST':
+ # Read uploaded files by chunks of data
+ # see chunks(): https://docs.djangoproject.com/en/4.1/ref/files/uploads/#django.core.files.uploadedfile.UploadedFile.chunks
+ savepath = os.path.join(base_dir, "tarball_compressed.tar.gz")
+ with open(savepath, 'wb+') as wfile:
+ for chunk in request.FILES["ufile1"].chunks():
+ wfile.write(chunk)
+ shutil.unpack_archive(savepath, base_dir) # $result=BAD
+
+ # Write in binary the uploaded tarball
+ myfile = request.FILES.get("ufile1")
+ file_path = os.path.join(base_dir, "tarball.tar")
+ with file_path.open('wb') as f:
+ f.write(myfile.read())
+ shutil.unpack_archive(file_path, base_dir) # $result=BAD
+
+ # Save uploaded files using FileSystemStorage Django API
+ # see FileSystemStorage: https://docs.djangoproject.com/en/4.1/ref/files/storage/#django.core.files.storage.FileSystemStorage
+ for ufile in request.FILES.getlist():
+ fs = FileSystemStorage()
+ filename = fs.save(ufile.name, ufile)
+ uploaded_file_path = fs.path(filename)
+ shutil.unpack_archive(uploaded_file_path, base_dir) # $result=BAD
+
+ return render(request, 'simple_upload.html')
+
+ elif request.method == 'GET':
+ return render(request, 'simple_upload.html')
\ No newline at end of file
From 1a211485a47c814f461ccfb8488b3a1626e2a845 Mon Sep 17 00:00:00 2001
From: Sim4n6
Date: Thu, 26 Jan 2023 17:07:59 +0100
Subject: [PATCH 134/415] Restrain the source and add two steps.
---
.../Security/CWE-022bis/UnsafeUnpackQuery.qll | 18 ++++++++----------
1 file changed, 8 insertions(+), 10 deletions(-)
diff --git a/python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpackQuery.qll b/python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpackQuery.qll
index 29c41b74dab..7409d30fdfa 100644
--- a/python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpackQuery.qll
+++ b/python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpackQuery.qll
@@ -50,17 +50,9 @@ class UnsafeUnpackingConfig extends TaintTracking::Configuration {
)
)
or
- // catch the uploaded files as a source
- exists(Subscript s, Attribute at |
- at = s.getObject() and at.getAttr() = "FILES" and source.asExpr() = s
- )
- or
- // Retrieve Django uploaded files
+ // catch the Django uploaded files as a source
// see HttpRequest.FILES: https://docs.djangoproject.com/en/4.1/ref/request-response/#django.http.HttpRequest.FILES
- exists(Node obj, AttrRead ar |
- ar.getAMethodCall(["getlist", "get"]).flowsTo(source) and
- ar.accesses(obj, "FILES")
- )
+ source.(AttrRead).getAttributeName() = "FILES"
}
override predicate isSink(DataFlow::Node sink) {
@@ -107,6 +99,12 @@ class UnsafeUnpackingConfig extends TaintTracking::Configuration {
nodeFrom = cn.getArg(0)
)
or
+ // Retrieve Django uploaded files
+ // see HttpRequest.FILES.getlist(): https://docs.djangoproject.com/en/4.1/ref/request-response/#django.http.QueryDict.getlist
+ exists(MethodCallNode mc |
+ nodeFrom = mc.getObject() and mc.getMethodName() = ["getlist", "get"] and nodeTo = mc
+ )
+ or
// Accessing the name or raw content
exists(AttrRead ar | ar.accesses(nodeFrom, ["name", "raw"]) and ar.flowsTo(nodeTo))
or
From 998f1bf215ea357943baf702bc98d5efce68ddb8 Mon Sep 17 00:00:00 2001
From: Sim4n6
Date: Thu, 26 Jan 2023 18:54:36 +0100
Subject: [PATCH 135/415] Some reformatting
---
.../Security/CWE-022bis/UnsafeUnpackQuery.qll | 71 +++++++++----------
.../Security/CWE-022/UnsafeUnpack.py | 17 +++--
2 files changed, 40 insertions(+), 48 deletions(-)
diff --git a/python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpackQuery.qll b/python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpackQuery.qll
index 7409d30fdfa..4e7e97188c0 100644
--- a/python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpackQuery.qll
+++ b/python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpackQuery.qll
@@ -61,23 +61,6 @@ class UnsafeUnpackingConfig extends TaintTracking::Configuration {
}
override predicate isAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
- // Writing the response data to the archive
- exists(Stdlib::FileLikeObject::InstanceSource is, Node f, MethodCallNode mc |
- is.flowsTo(f) and
- mc.calls(f, "write") and
- nodeFrom = mc.getArg(0) and
- nodeTo = is.(CallCfgNode).getArg(0)
- )
- or
- // Copying the response data to the archive
- exists(Stdlib::FileLikeObject::InstanceSource is, Node f, MethodCallNode mc |
- is.flowsTo(f) and
- mc = API::moduleImport("shutil").getMember("copyfileobj").getACall() and
- f = mc.getArg(1) and
- nodeFrom = mc.getArg(0) and
- nodeTo = is.(CallCfgNode).getArg(0)
- )
- or
// Reading the response
exists(MethodCallNode mc |
nodeFrom = mc.getObject() and
@@ -94,39 +77,22 @@ class UnsafeUnpackingConfig extends TaintTracking::Configuration {
or
// Write for access
exists(MethodCallNode cn |
- nodeTo = cn.getObject() and
+ nodeFrom = cn.getObject() and
cn.getMethodName() = "write" and
- nodeFrom = cn.getArg(0)
+ nodeTo = cn.getArg(0)
)
or
// Retrieve Django uploaded files
// see HttpRequest.FILES.getlist(): https://docs.djangoproject.com/en/4.1/ref/request-response/#django.http.QueryDict.getlist
exists(MethodCallNode mc |
- nodeFrom = mc.getObject() and mc.getMethodName() = ["getlist", "get"] and nodeTo = mc
+ nodeFrom = mc.getObject() and
+ mc.getMethodName() = ["getlist", "get"] and
+ nodeTo = mc
)
or
// Accessing the name or raw content
exists(AttrRead ar | ar.accesses(nodeFrom, ["name", "raw"]) and ar.flowsTo(nodeTo))
or
- //Use of join of filename
- exists(API::CallNode mcn |
- mcn = API::moduleImport("os").getMember("path").getMember("join").getACall() and
- nodeFrom = mcn.getArg(1) and
- mcn.flowsTo(nodeTo)
- )
- or
- // Read by chunks
- exists(MethodCallNode mc |
- nodeFrom = mc.getObject() and mc.getMethodName() = "chunks" and mc.flowsTo(nodeTo)
- )
- or
- // Considering the use of closing()
- exists(API::CallNode closing |
- closing = API::moduleImport("contextlib").getMember("closing").getACall() and
- closing.flowsTo(nodeTo) and
- nodeFrom = closing.getArg(0)
- )
- or
// Considering the use of "fs"
exists(API::CallNode fs, MethodCallNode mcn |
fs =
@@ -141,5 +107,32 @@ class UnsafeUnpackingConfig extends TaintTracking::Configuration {
nodeFrom = mcn.getArg(0) and
nodeTo = mcn
)
+ or
+ //Use of join of filename
+ exists(API::CallNode mcn |
+ mcn = API::moduleImport("os").getMember("path").getMember("join").getACall() and
+ nodeFrom = mcn.getArg(1) and
+ mcn.flowsTo(nodeTo)
+ )
+ or
+ // Read by chunks
+ exists(MethodCallNode mc |
+ nodeFrom = mc.getObject() and mc.getMethodName() = "chunks" and mc.flowsTo(nodeTo)
+ )
+ or
+ // Write access
+ exists(MethodCallNode cn |
+ nodeTo = cn.getObject() and
+ cn.getMethodName() = "write" and
+ nodeFrom = cn.getArg(0)
+ )
+ or
+ // Writing the response data to the archive
+ exists(Stdlib::FileLikeObject::InstanceSource is, Node f, MethodCallNode mc |
+ is.flowsTo(f) and
+ mc.calls(f, "write") and
+ nodeFrom = mc.getArg(0) and
+ nodeTo = is.(CallCfgNode).getArg(0)
+ )
}
}
diff --git a/python/ql/test/experimental/query-tests/Security/CWE-022/UnsafeUnpack.py b/python/ql/test/experimental/query-tests/Security/CWE-022/UnsafeUnpack.py
index fe1b6fa6d45..ae9ee055999 100644
--- a/python/ql/test/experimental/query-tests/Security/CWE-022/UnsafeUnpack.py
+++ b/python/ql/test/experimental/query-tests/Security/CWE-022/UnsafeUnpack.py
@@ -8,16 +8,15 @@ app = Flask(__name__)
# Consider any RemoteFlowSource as a source
@app.route("/download_from_url")
def download_from_url():
- filename = request.args.get('filename', '')
- if not filename:
- response = requests.get(filename, stream=True)
-
- tarpath = "/tmp/tmp456/tarball.tar.gz"
- with open(tarpath, "wb") as f:
- f.write(response.raw.read())
+ filename = request.args.get('filename', '')
+ if not filename:
+ response = requests.get(filename, stream=True)
- untarredpath = "/tmp/tmp123"
- shutil.unpack_archive(tarpath, untarredpath) # $result=BAD
+ tarpath = "/tmp/tmp456/tarball.tar.gz"
+ with open(tarpath, "wb") as f:
+ f.write(response.raw.read())
+ untarredpath = "/tmp/tmp123"
+ shutil.unpack_archive(tarpath, untarredpath) # $result=BAD
# A source catching an S3 filename download
From 02b3a1b51597222b4e08ec0befd5ecfce36fc397 Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Fri, 27 Jan 2023 11:10:02 +0100
Subject: [PATCH 136/415] Python: At most one `**kwargs` `ParameterNode` per
callable
Similar to the Ruby changes from
https://github.com/github/codeql/pull/11461
I feel the change to `DataFlowFunciton.getParameter` where we use
`not exists(func.getArgByName(_))` is not very great, but I was not allowed
to use `not exists(this.getParameter(any(ParameterPosition _).isKeyword(_)))`
because of negative recursion.
---
.../new/internal/DataFlowDispatch.qll | 31 ++++++++++++++---
.../dataflow/new/internal/DataFlowPrivate.qll | 34 +++++++++++++++++--
.../consistency/dataflow-consistency.expected | 2 --
.../coverage/dataflow-consistency.expected | 10 ------
4 files changed, 59 insertions(+), 18 deletions(-)
diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
index 7bc4aa9f215..a73be967534 100644
--- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
@@ -321,6 +321,7 @@ abstract class DataFlowFunction extends DataFlowCallable, TFunction {
or
exists(string name | ppos.isKeyword(name) | result.getParameter() = func.getArgByName(name))
or
+ // `*args`
exists(int index |
(
ppos.isStarArgs(index) and
@@ -343,9 +344,22 @@ abstract class DataFlowFunction extends DataFlowCallable, TFunction {
not exists(func.getArg(_)) and index = 0
)
or
- ppos.isDictSplat() and result.getParameter() = func.getKwarg()
- or
- ppos.isDictSplat() and result = TSynthDictSplatParameterNode(this)
+ // `**kwargs`
+ // since dataflow library has restriction that we can only have ONE result per
+ // parameter position, if there is both a synthetic **kwargs and a real **kwargs
+ // parameter, we only give the result for the synthetic, and add local flow from the
+ // synthetic to the real. It might seem more natural to do it in the other
+ // direction, but since we have a clearStep on the real **kwargs parameter, we that
+ // content-clearing would also affect the synthetic parameter, which we don't want.
+ (
+ not exists(func.getArgByName(_)) and
+ ppos.isDictSplat() and
+ result.getParameter() = func.getKwarg()
+ or
+ exists(func.getArgByName(_)) and
+ ppos.isDictSplat() and
+ result = TSynthDictSplatParameterNode(this)
+ )
}
}
@@ -1400,7 +1414,16 @@ class SummaryParameterNode extends ParameterNodeImpl, TSummaryParameterNode {
override Parameter getParameter() { none() }
override predicate isParameterOf(DataFlowCallable c, ParameterPosition ppos) {
- sc = c.asLibraryCallable() and ppos = pos
+ sc = c.asLibraryCallable() and
+ ppos = pos and
+ // avoid overlap with `SynthDictSplatParameterNode`
+ not (
+ pos.isDictSplat() and
+ exists(ParameterPosition keywordPos |
+ FlowSummaryImpl::Private::summaryParameterNodeRange(sc, keywordPos) and
+ keywordPos.isKeyword(_)
+ )
+ )
}
override DataFlowCallable getEnclosingCallable() { result.asLibraryCallable() = sc }
diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll
index ddccd0cccfc..e21594c8385 100644
--- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll
@@ -224,8 +224,11 @@ private predicate synthDictSplatArgumentNodeStoreStep(
private predicate dictSplatParameterNodeClearStep(ParameterNode n, DictionaryElementContent c) {
exists(DataFlowCallable callable, ParameterPosition dictSplatPos, ParameterPosition keywordPos |
dictSplatPos.isDictSplat() and
- n = callable.getParameter(dictSplatPos) and
- not n instanceof SynthDictSplatParameterNode and
+ (
+ n.getParameter() = callable.(DataFlowFunction).getScope().getKwarg()
+ or
+ n = TSummaryParameterNode(callable.asLibraryCallable(), dictSplatPos)
+ ) and
exists(callable.getParameter(keywordPos)) and
keywordPos.isKeyword(c.getKey())
)
@@ -276,6 +279,31 @@ class SynthDictSplatParameterNode extends ParameterNodeImpl, TSynthDictSplatPara
override Parameter getParameter() { none() }
}
+/**
+ * Flow step from the synthetic `**kwargs` parameter to the real `**kwargs` parameter.
+ * Due to restriction in dataflow library, we can only give one of them as result for
+ * `DataFlowCallable.getParameter`, so this is a workaround to ensure there is flow to
+ * _both_ of them.
+ */
+private predicate dictSplatParameterNodeFlowStep(
+ ParameterNodeImpl nodeFrom, ParameterNodeImpl nodeTo
+) {
+ exists(DataFlowCallable callable |
+ nodeFrom = TSynthDictSplatParameterNode(callable) and
+ (
+ nodeTo.getParameter() = callable.(DataFlowFunction).getScope().getKwarg()
+ or
+ exists(ParameterPosition pos |
+ nodeTo = TSummaryParameterNode(callable.asLibraryCallable(), pos) and
+ pos.isDictSplat()
+ )
+ )
+ )
+}
+
+/**
+ * Reads from the synthetic **kwargs parameter to each keyword parameter.
+ */
predicate synthDictSplatParameterNodeReadStep(
SynthDictSplatParameterNode nodeFrom, DictionaryElementContent c, ParameterNode nodeTo
) {
@@ -418,6 +446,8 @@ predicate simpleLocalFlowStep(Node nodeFrom, Node nodeTo) {
simpleLocalFlowStepForTypetracking(nodeFrom, nodeTo)
or
summaryFlowSteps(nodeFrom, nodeTo)
+ or
+ dictSplatParameterNodeFlowStep(nodeFrom, nodeTo)
}
/**
diff --git a/python/ql/test/experimental/dataflow/consistency/dataflow-consistency.expected b/python/ql/test/experimental/dataflow/consistency/dataflow-consistency.expected
index ab832392cf5..410b626ffff 100644
--- a/python/ql/test/experimental/dataflow/consistency/dataflow-consistency.expected
+++ b/python/ql/test/experimental/dataflow/consistency/dataflow-consistency.expected
@@ -20,7 +20,5 @@ argHasPostUpdate
postWithInFlow
viableImplInCallContextTooLarge
uniqueParameterNodeAtPosition
-| test.py:239:1:239:42 | Function overflowCallee | ** | test.py:239:1:239:42 | SynthDictSplatParameterNode | Parameters with overlapping positions. |
-| test.py:239:1:239:42 | Function overflowCallee | ** | test.py:239:35:239:40 | ControlFlowNode for kwargs | Parameters with overlapping positions. |
uniqueParameterNodePosition
uniqueContentApprox
diff --git a/python/ql/test/experimental/dataflow/coverage/dataflow-consistency.expected b/python/ql/test/experimental/dataflow/coverage/dataflow-consistency.expected
index 4ee8d7f0fcc..410b626ffff 100644
--- a/python/ql/test/experimental/dataflow/coverage/dataflow-consistency.expected
+++ b/python/ql/test/experimental/dataflow/coverage/dataflow-consistency.expected
@@ -20,15 +20,5 @@ argHasPostUpdate
postWithInFlow
viableImplInCallContextTooLarge
uniqueParameterNodeAtPosition
-| argumentPassing.py:50:1:60:2 | Function argument_passing | ** | argumentPassing.py:50:1:60:2 | SynthDictSplatParameterNode | Parameters with overlapping positions. |
-| argumentPassing.py:50:1:60:2 | Function argument_passing | ** | argumentPassing.py:59:7:59:7 | ControlFlowNode for g | Parameters with overlapping positions. |
-| argumentPassing.py:185:1:185:23 | Function mixed | ** | argumentPassing.py:185:1:185:23 | SynthDictSplatParameterNode | Parameters with overlapping positions. |
-| argumentPassing.py:185:1:185:23 | Function mixed | ** | argumentPassing.py:185:16:185:21 | ControlFlowNode for kwargs | Parameters with overlapping positions. |
-| classes.py:441:5:441:41 | Function __prepare__ | ** | classes.py:441:5:441:41 | SynthDictSplatParameterNode | Parameters with overlapping positions. |
-| classes.py:441:5:441:41 | Function __prepare__ | ** | classes.py:441:36:441:39 | ControlFlowNode for kwds | Parameters with overlapping positions. |
-| test.py:407:1:407:28 | Function f_extra_keyword | ** | test.py:407:1:407:28 | SynthDictSplatParameterNode | Parameters with overlapping positions. |
-| test.py:407:1:407:28 | Function f_extra_keyword | ** | test.py:407:26:407:26 | ControlFlowNode for b | Parameters with overlapping positions. |
-| test.py:521:23:521:43 | Function lambda | ** | test.py:521:23:521:43 | SynthDictSplatParameterNode | Parameters with overlapping positions. |
-| test.py:521:23:521:43 | Function lambda | ** | test.py:521:35:521:35 | ControlFlowNode for b | Parameters with overlapping positions. |
uniqueParameterNodePosition
uniqueContentApprox
From bca053f85554cd0d21a55219740d275035c70f16 Mon Sep 17 00:00:00 2001
From: Sim4n6
Date: Fri, 27 Jan 2023 13:42:14 +0100
Subject: [PATCH 137/415] Move the config query to the parent directory
---
.../experimental/Security/{CWE-022bis => }/UnsafeUnpackQuery.qll | 0
1 file changed, 0 insertions(+), 0 deletions(-)
rename python/ql/src/experimental/Security/{CWE-022bis => }/UnsafeUnpackQuery.qll (100%)
diff --git a/python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpackQuery.qll b/python/ql/src/experimental/Security/UnsafeUnpackQuery.qll
similarity index 100%
rename from python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpackQuery.qll
rename to python/ql/src/experimental/Security/UnsafeUnpackQuery.qll
From 5f0bf1053a3515faf16ec0ef033ab6b2511c97e6 Mon Sep 17 00:00:00 2001
From: Sim4n6
Date: Fri, 27 Jan 2023 13:42:57 +0100
Subject: [PATCH 138/415] Update the dataflow test query and the expected
results
---
.../Security/CWE-022/DataflowQueryTest.ql | 2 +-
.../Security/CWE-022/UnsafeUnpack.expected | 45 +++----------------
2 files changed, 6 insertions(+), 41 deletions(-)
diff --git a/python/ql/test/experimental/query-tests/Security/CWE-022/DataflowQueryTest.ql b/python/ql/test/experimental/query-tests/Security/CWE-022/DataflowQueryTest.ql
index e3cd5f784ff..df70ff9fe51 100644
--- a/python/ql/test/experimental/query-tests/Security/CWE-022/DataflowQueryTest.ql
+++ b/python/ql/test/experimental/query-tests/Security/CWE-022/DataflowQueryTest.ql
@@ -1,3 +1,3 @@
import python
import experimental.dataflow.TestUtil.DataflowQueryTest
-import UnsafeUnpackQuery
\ No newline at end of file
+import experimental.Security.UnsafeUnpackQuery
\ No newline at end of file
diff --git a/python/ql/test/experimental/query-tests/Security/CWE-022/UnsafeUnpack.expected b/python/ql/test/experimental/query-tests/Security/CWE-022/UnsafeUnpack.expected
index cda42c13267..c07d7f1e245 100644
--- a/python/ql/test/experimental/query-tests/Security/CWE-022/UnsafeUnpack.expected
+++ b/python/ql/test/experimental/query-tests/Security/CWE-022/UnsafeUnpack.expected
@@ -1,40 +1,5 @@
-edges
-| UnsafeUnpack.py:0:0:0:0 | ModuleVariableNode for UnsafeUnpack.request | UnsafeUnpack.py:11:16:11:22 | ControlFlowNode for request |
-| UnsafeUnpack.py:5:26:5:32 | ControlFlowNode for ImportMember | UnsafeUnpack.py:5:26:5:32 | GSSA Variable request |
-| UnsafeUnpack.py:5:26:5:32 | GSSA Variable request | UnsafeUnpack.py:0:0:0:0 | ModuleVariableNode for UnsafeUnpack.request |
-| UnsafeUnpack.py:11:16:11:22 | ControlFlowNode for request | UnsafeUnpack.py:11:16:11:27 | ControlFlowNode for Attribute |
-| UnsafeUnpack.py:11:16:11:27 | ControlFlowNode for Attribute | UnsafeUnpack.py:17:23:17:34 | ControlFlowNode for Attribute |
-| UnsafeUnpack.py:17:23:17:34 | ControlFlowNode for Attribute | UnsafeUnpack.py:20:31:20:37 | ControlFlowNode for tarpath |
-| UnsafeUnpack.py:34:50:34:65 | ControlFlowNode for local_ziped_path | UnsafeUnpack.py:35:23:35:38 | ControlFlowNode for local_ziped_path |
-| UnsafeUnpack.py:48:20:48:34 | ControlFlowNode for compressed_file | UnsafeUnpack.py:49:23:49:37 | ControlFlowNode for compressed_file |
-| UnsafeUnpack.py:52:19:52:36 | ControlFlowNode for Attribute() | UnsafeUnpack.py:53:23:53:37 | ControlFlowNode for compressed_file |
-| UnsafeUnpack.py:66:19:66:31 | ControlFlowNode for Attribute | UnsafeUnpack.py:67:23:67:37 | ControlFlowNode for compressed_file |
-| UnsafeUnpack.py:80:16:80:28 | ControlFlowNode for Attribute | UnsafeUnpack.py:86:15:86:26 | ControlFlowNode for Attribute |
-| UnsafeUnpack.py:86:15:86:26 | ControlFlowNode for Attribute | UnsafeUnpack.py:88:23:88:29 | ControlFlowNode for tarpath |
-nodes
-| UnsafeUnpack.py:0:0:0:0 | ModuleVariableNode for UnsafeUnpack.request | semmle.label | ModuleVariableNode for UnsafeUnpack.request |
-| UnsafeUnpack.py:5:26:5:32 | ControlFlowNode for ImportMember | semmle.label | ControlFlowNode for ImportMember |
-| UnsafeUnpack.py:5:26:5:32 | GSSA Variable request | semmle.label | GSSA Variable request |
-| UnsafeUnpack.py:11:16:11:22 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
-| UnsafeUnpack.py:11:16:11:27 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
-| UnsafeUnpack.py:17:23:17:34 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
-| UnsafeUnpack.py:20:31:20:37 | ControlFlowNode for tarpath | semmle.label | ControlFlowNode for tarpath |
-| UnsafeUnpack.py:34:50:34:65 | ControlFlowNode for local_ziped_path | semmle.label | ControlFlowNode for local_ziped_path |
-| UnsafeUnpack.py:35:23:35:38 | ControlFlowNode for local_ziped_path | semmle.label | ControlFlowNode for local_ziped_path |
-| UnsafeUnpack.py:48:20:48:34 | ControlFlowNode for compressed_file | semmle.label | ControlFlowNode for compressed_file |
-| UnsafeUnpack.py:49:23:49:37 | ControlFlowNode for compressed_file | semmle.label | ControlFlowNode for compressed_file |
-| UnsafeUnpack.py:52:19:52:36 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
-| UnsafeUnpack.py:53:23:53:37 | ControlFlowNode for compressed_file | semmle.label | ControlFlowNode for compressed_file |
-| UnsafeUnpack.py:66:19:66:31 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
-| UnsafeUnpack.py:67:23:67:37 | ControlFlowNode for compressed_file | semmle.label | ControlFlowNode for compressed_file |
-| UnsafeUnpack.py:80:16:80:28 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
-| UnsafeUnpack.py:86:15:86:26 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
-| UnsafeUnpack.py:88:23:88:29 | ControlFlowNode for tarpath | semmle.label | ControlFlowNode for tarpath |
-subpaths
-#select
-| UnsafeUnpack.py:20:31:20:37 | ControlFlowNode for tarpath | UnsafeUnpack.py:5:26:5:32 | ControlFlowNode for ImportMember | UnsafeUnpack.py:20:31:20:37 | ControlFlowNode for tarpath | Unsafe extraction from a malicious tarball retrieved from a remote location. |
-| UnsafeUnpack.py:35:23:35:38 | ControlFlowNode for local_ziped_path | UnsafeUnpack.py:34:50:34:65 | ControlFlowNode for local_ziped_path | UnsafeUnpack.py:35:23:35:38 | ControlFlowNode for local_ziped_path | Unsafe extraction from a malicious tarball retrieved from a remote location. |
-| UnsafeUnpack.py:49:23:49:37 | ControlFlowNode for compressed_file | UnsafeUnpack.py:48:20:48:34 | ControlFlowNode for compressed_file | UnsafeUnpack.py:49:23:49:37 | ControlFlowNode for compressed_file | Unsafe extraction from a malicious tarball retrieved from a remote location. |
-| UnsafeUnpack.py:53:23:53:37 | ControlFlowNode for compressed_file | UnsafeUnpack.py:52:19:52:36 | ControlFlowNode for Attribute() | UnsafeUnpack.py:53:23:53:37 | ControlFlowNode for compressed_file | Unsafe extraction from a malicious tarball retrieved from a remote location. |
-| UnsafeUnpack.py:67:23:67:37 | ControlFlowNode for compressed_file | UnsafeUnpack.py:66:19:66:31 | ControlFlowNode for Attribute | UnsafeUnpack.py:67:23:67:37 | ControlFlowNode for compressed_file | Unsafe extraction from a malicious tarball retrieved from a remote location. |
-| UnsafeUnpack.py:88:23:88:29 | ControlFlowNode for tarpath | UnsafeUnpack.py:80:16:80:28 | ControlFlowNode for Attribute | UnsafeUnpack.py:88:23:88:29 | ControlFlowNode for tarpath | Unsafe extraction from a malicious tarball retrieved from a remote location. |
+ERROR: Could not resolve module DataFlow (/home/sim4n6/Desktop/Ghsecuritylab/my-codeql-PR-3/python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpack.ql:19,8-16)
+ERROR: Could not resolve module DataFlow (/home/sim4n6/Desktop/Ghsecuritylab/my-codeql-PR-3/python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpack.ql:21,36-44)
+ERROR: Could not resolve module DataFlow (/home/sim4n6/Desktop/Ghsecuritylab/my-codeql-PR-3/python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpack.ql:21,63-71)
+ERROR: Could not resolve module UnsafeUnpackQuery (/home/sim4n6/Desktop/Ghsecuritylab/my-codeql-PR-3/python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpack.ql:18,8-25)
+ERROR: Could not resolve type UnsafeUnpackingConfig (/home/sim4n6/Desktop/Ghsecuritylab/my-codeql-PR-3/python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpack.ql:21,6-27)
From e41042418aad3922897caaaac0ecdb58da102e97 Mon Sep 17 00:00:00 2001
From: Sim4n6
Date: Fri, 27 Jan 2023 13:46:57 +0100
Subject: [PATCH 139/415] Update the import relative to the dataflow config
---
python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpack.ql | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpack.ql b/python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpack.ql
index 1e17cb728ca..41d46218ec6 100644
--- a/python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpack.ql
+++ b/python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpack.ql
@@ -15,7 +15,7 @@
*/
import python
-import UnsafeUnpackQuery
+import experimental.Security.UnsafeUnpackQuery
import DataFlow::PathGraph
from UnsafeUnpackingConfig config, DataFlow::PathNode source, DataFlow::PathNode sink
From 18d8bbc9a4f09be753ad93c8085f767d6f07755d Mon Sep 17 00:00:00 2001
From: Sim4n6
Date: Fri, 27 Jan 2023 14:05:25 +0100
Subject: [PATCH 140/415] Updated the expected results accordingly
---
.../Security/CWE-022/UnsafeUnpack.expected | 65 +++++++++++++++++--
1 file changed, 60 insertions(+), 5 deletions(-)
diff --git a/python/ql/test/experimental/query-tests/Security/CWE-022/UnsafeUnpack.expected b/python/ql/test/experimental/query-tests/Security/CWE-022/UnsafeUnpack.expected
index c07d7f1e245..eed5e0e45d7 100644
--- a/python/ql/test/experimental/query-tests/Security/CWE-022/UnsafeUnpack.expected
+++ b/python/ql/test/experimental/query-tests/Security/CWE-022/UnsafeUnpack.expected
@@ -1,5 +1,60 @@
-ERROR: Could not resolve module DataFlow (/home/sim4n6/Desktop/Ghsecuritylab/my-codeql-PR-3/python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpack.ql:19,8-16)
-ERROR: Could not resolve module DataFlow (/home/sim4n6/Desktop/Ghsecuritylab/my-codeql-PR-3/python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpack.ql:21,36-44)
-ERROR: Could not resolve module DataFlow (/home/sim4n6/Desktop/Ghsecuritylab/my-codeql-PR-3/python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpack.ql:21,63-71)
-ERROR: Could not resolve module UnsafeUnpackQuery (/home/sim4n6/Desktop/Ghsecuritylab/my-codeql-PR-3/python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpack.ql:18,8-25)
-ERROR: Could not resolve type UnsafeUnpackingConfig (/home/sim4n6/Desktop/Ghsecuritylab/my-codeql-PR-3/python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpack.ql:21,6-27)
+edges
+| UnsafeUnpack.py:0:0:0:0 | ModuleVariableNode for UnsafeUnpack.request | UnsafeUnpack.py:11:18:11:24 | ControlFlowNode for request |
+| UnsafeUnpack.py:5:26:5:32 | ControlFlowNode for ImportMember | UnsafeUnpack.py:5:26:5:32 | GSSA Variable request |
+| UnsafeUnpack.py:5:26:5:32 | GSSA Variable request | UnsafeUnpack.py:0:0:0:0 | ModuleVariableNode for UnsafeUnpack.request |
+| UnsafeUnpack.py:11:18:11:24 | ControlFlowNode for request | UnsafeUnpack.py:11:18:11:29 | ControlFlowNode for Attribute |
+| UnsafeUnpack.py:11:18:11:29 | ControlFlowNode for Attribute | UnsafeUnpack.py:17:27:17:38 | ControlFlowNode for Attribute |
+| UnsafeUnpack.py:17:27:17:38 | ControlFlowNode for Attribute | UnsafeUnpack.py:19:35:19:41 | ControlFlowNode for tarpath |
+| UnsafeUnpack.py:33:50:33:65 | ControlFlowNode for local_ziped_path | UnsafeUnpack.py:34:23:34:38 | ControlFlowNode for local_ziped_path |
+| UnsafeUnpack.py:47:20:47:34 | ControlFlowNode for compressed_file | UnsafeUnpack.py:48:23:48:37 | ControlFlowNode for compressed_file |
+| UnsafeUnpack.py:51:19:51:36 | ControlFlowNode for Attribute() | UnsafeUnpack.py:52:23:52:37 | ControlFlowNode for compressed_file |
+| UnsafeUnpack.py:65:19:65:31 | ControlFlowNode for Attribute | UnsafeUnpack.py:66:23:66:37 | ControlFlowNode for compressed_file |
+| UnsafeUnpack.py:79:16:79:28 | ControlFlowNode for Attribute | UnsafeUnpack.py:85:15:85:26 | ControlFlowNode for Attribute |
+| UnsafeUnpack.py:85:15:85:26 | ControlFlowNode for Attribute | UnsafeUnpack.py:87:23:87:29 | ControlFlowNode for tarpath |
+| UnsafeUnpack.py:103:23:103:27 | SSA variable chunk | UnsafeUnpack.py:105:35:105:42 | ControlFlowNode for savepath |
+| UnsafeUnpack.py:103:32:103:44 | ControlFlowNode for Attribute | UnsafeUnpack.py:103:32:103:54 | ControlFlowNode for Subscript |
+| UnsafeUnpack.py:103:32:103:54 | ControlFlowNode for Subscript | UnsafeUnpack.py:103:23:103:27 | SSA variable chunk |
+| UnsafeUnpack.py:108:22:108:34 | ControlFlowNode for Attribute | UnsafeUnpack.py:112:35:112:43 | ControlFlowNode for file_path |
+| UnsafeUnpack.py:116:17:116:21 | SSA variable ufile | UnsafeUnpack.py:118:38:118:47 | ControlFlowNode for Attribute |
+| UnsafeUnpack.py:116:27:116:39 | ControlFlowNode for Attribute | UnsafeUnpack.py:116:17:116:21 | SSA variable ufile |
+| UnsafeUnpack.py:118:38:118:47 | ControlFlowNode for Attribute | UnsafeUnpack.py:120:41:120:58 | ControlFlowNode for uploaded_file_path |
+nodes
+| UnsafeUnpack.py:0:0:0:0 | ModuleVariableNode for UnsafeUnpack.request | semmle.label | ModuleVariableNode for UnsafeUnpack.request |
+| UnsafeUnpack.py:5:26:5:32 | ControlFlowNode for ImportMember | semmle.label | ControlFlowNode for ImportMember |
+| UnsafeUnpack.py:5:26:5:32 | GSSA Variable request | semmle.label | GSSA Variable request |
+| UnsafeUnpack.py:11:18:11:24 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
+| UnsafeUnpack.py:11:18:11:29 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
+| UnsafeUnpack.py:17:27:17:38 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
+| UnsafeUnpack.py:19:35:19:41 | ControlFlowNode for tarpath | semmle.label | ControlFlowNode for tarpath |
+| UnsafeUnpack.py:33:50:33:65 | ControlFlowNode for local_ziped_path | semmle.label | ControlFlowNode for local_ziped_path |
+| UnsafeUnpack.py:34:23:34:38 | ControlFlowNode for local_ziped_path | semmle.label | ControlFlowNode for local_ziped_path |
+| UnsafeUnpack.py:47:20:47:34 | ControlFlowNode for compressed_file | semmle.label | ControlFlowNode for compressed_file |
+| UnsafeUnpack.py:48:23:48:37 | ControlFlowNode for compressed_file | semmle.label | ControlFlowNode for compressed_file |
+| UnsafeUnpack.py:51:19:51:36 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
+| UnsafeUnpack.py:52:23:52:37 | ControlFlowNode for compressed_file | semmle.label | ControlFlowNode for compressed_file |
+| UnsafeUnpack.py:65:19:65:31 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
+| UnsafeUnpack.py:66:23:66:37 | ControlFlowNode for compressed_file | semmle.label | ControlFlowNode for compressed_file |
+| UnsafeUnpack.py:79:16:79:28 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
+| UnsafeUnpack.py:85:15:85:26 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
+| UnsafeUnpack.py:87:23:87:29 | ControlFlowNode for tarpath | semmle.label | ControlFlowNode for tarpath |
+| UnsafeUnpack.py:103:23:103:27 | SSA variable chunk | semmle.label | SSA variable chunk |
+| UnsafeUnpack.py:103:32:103:44 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
+| UnsafeUnpack.py:103:32:103:54 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
+| UnsafeUnpack.py:105:35:105:42 | ControlFlowNode for savepath | semmle.label | ControlFlowNode for savepath |
+| UnsafeUnpack.py:108:22:108:34 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
+| UnsafeUnpack.py:112:35:112:43 | ControlFlowNode for file_path | semmle.label | ControlFlowNode for file_path |
+| UnsafeUnpack.py:116:17:116:21 | SSA variable ufile | semmle.label | SSA variable ufile |
+| UnsafeUnpack.py:116:27:116:39 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
+| UnsafeUnpack.py:118:38:118:47 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
+| UnsafeUnpack.py:120:41:120:58 | ControlFlowNode for uploaded_file_path | semmle.label | ControlFlowNode for uploaded_file_path |
+subpaths
+#select
+| UnsafeUnpack.py:19:35:19:41 | ControlFlowNode for tarpath | UnsafeUnpack.py:5:26:5:32 | ControlFlowNode for ImportMember | UnsafeUnpack.py:19:35:19:41 | ControlFlowNode for tarpath | Unsafe extraction from a malicious tarball retrieved from a remote location. |
+| UnsafeUnpack.py:34:23:34:38 | ControlFlowNode for local_ziped_path | UnsafeUnpack.py:33:50:33:65 | ControlFlowNode for local_ziped_path | UnsafeUnpack.py:34:23:34:38 | ControlFlowNode for local_ziped_path | Unsafe extraction from a malicious tarball retrieved from a remote location. |
+| UnsafeUnpack.py:48:23:48:37 | ControlFlowNode for compressed_file | UnsafeUnpack.py:47:20:47:34 | ControlFlowNode for compressed_file | UnsafeUnpack.py:48:23:48:37 | ControlFlowNode for compressed_file | Unsafe extraction from a malicious tarball retrieved from a remote location. |
+| UnsafeUnpack.py:52:23:52:37 | ControlFlowNode for compressed_file | UnsafeUnpack.py:51:19:51:36 | ControlFlowNode for Attribute() | UnsafeUnpack.py:52:23:52:37 | ControlFlowNode for compressed_file | Unsafe extraction from a malicious tarball retrieved from a remote location. |
+| UnsafeUnpack.py:66:23:66:37 | ControlFlowNode for compressed_file | UnsafeUnpack.py:65:19:65:31 | ControlFlowNode for Attribute | UnsafeUnpack.py:66:23:66:37 | ControlFlowNode for compressed_file | Unsafe extraction from a malicious tarball retrieved from a remote location. |
+| UnsafeUnpack.py:87:23:87:29 | ControlFlowNode for tarpath | UnsafeUnpack.py:79:16:79:28 | ControlFlowNode for Attribute | UnsafeUnpack.py:87:23:87:29 | ControlFlowNode for tarpath | Unsafe extraction from a malicious tarball retrieved from a remote location. |
+| UnsafeUnpack.py:105:35:105:42 | ControlFlowNode for savepath | UnsafeUnpack.py:103:32:103:44 | ControlFlowNode for Attribute | UnsafeUnpack.py:105:35:105:42 | ControlFlowNode for savepath | Unsafe extraction from a malicious tarball retrieved from a remote location. |
+| UnsafeUnpack.py:112:35:112:43 | ControlFlowNode for file_path | UnsafeUnpack.py:108:22:108:34 | ControlFlowNode for Attribute | UnsafeUnpack.py:112:35:112:43 | ControlFlowNode for file_path | Unsafe extraction from a malicious tarball retrieved from a remote location. |
+| UnsafeUnpack.py:120:41:120:58 | ControlFlowNode for uploaded_file_path | UnsafeUnpack.py:116:27:116:39 | ControlFlowNode for Attribute | UnsafeUnpack.py:120:41:120:58 | ControlFlowNode for uploaded_file_path | Unsafe extraction from a malicious tarball retrieved from a remote location. |
From 207ed3da9c3c4aa52b03c34fcfc20eaaeda44f05 Mon Sep 17 00:00:00 2001
From: Sim4n6
Date: Fri, 27 Jan 2023 15:07:20 +0100
Subject: [PATCH 141/415] Constrain the object & the call
---
.../Security/UnsafeUnpackQuery.qll | 39 ++++++-------------
1 file changed, 11 insertions(+), 28 deletions(-)
diff --git a/python/ql/src/experimental/Security/UnsafeUnpackQuery.qll b/python/ql/src/experimental/Security/UnsafeUnpackQuery.qll
index 4e7e97188c0..182ef30f84b 100644
--- a/python/ql/src/experimental/Security/UnsafeUnpackQuery.qll
+++ b/python/ql/src/experimental/Security/UnsafeUnpackQuery.qll
@@ -61,13 +61,6 @@ class UnsafeUnpackingConfig extends TaintTracking::Configuration {
}
override predicate isAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
- // Reading the response
- exists(MethodCallNode mc |
- nodeFrom = mc.getObject() and
- mc.getMethodName() = "read" and
- nodeTo = mc
- )
- or
// Open for access
exists(MethodCallNode cn |
nodeTo = cn.getObject() and
@@ -77,21 +70,20 @@ class UnsafeUnpackingConfig extends TaintTracking::Configuration {
or
// Write for access
exists(MethodCallNode cn |
- nodeFrom = cn.getObject() and
- cn.getMethodName() = "write" and
+ cn.calls(nodeFrom, "write") and
nodeTo = cn.getArg(0)
)
or
// Retrieve Django uploaded files
- // see HttpRequest.FILES.getlist(): https://docs.djangoproject.com/en/4.1/ref/request-response/#django.http.QueryDict.getlist
- exists(MethodCallNode mc |
- nodeFrom = mc.getObject() and
- mc.getMethodName() = ["getlist", "get"] and
- nodeTo = mc
- )
+ // see getlist(): https://docs.djangoproject.com/en/4.1/ref/request-response/#django.http.QueryDict.getlist
+ // see chunks(): https://docs.djangoproject.com/en/4.1/ref/files/uploads/#django.core.files.uploadedfile.UploadedFile.chunks
+ nodeTo.(MethodCallNode).calls(nodeFrom, ["getlist", "get", "chunks"])
+ or
+ // Reading the response
+ nodeTo.(MethodCallNode).calls(nodeFrom, "read")
or
// Accessing the name or raw content
- exists(AttrRead ar | ar.accesses(nodeFrom, ["name", "raw"]) and ar.flowsTo(nodeTo))
+ nodeTo.(AttrRead).accesses(nodeFrom, ["name", "raw"])
or
// Considering the use of "fs"
exists(API::CallNode fs, MethodCallNode mcn |
@@ -109,21 +101,12 @@ class UnsafeUnpackingConfig extends TaintTracking::Configuration {
)
or
//Use of join of filename
- exists(API::CallNode mcn |
- mcn = API::moduleImport("os").getMember("path").getMember("join").getACall() and
- nodeFrom = mcn.getArg(1) and
- mcn.flowsTo(nodeTo)
- )
- or
- // Read by chunks
- exists(MethodCallNode mc |
- nodeFrom = mc.getObject() and mc.getMethodName() = "chunks" and mc.flowsTo(nodeTo)
- )
+ nodeTo = API::moduleImport("os").getMember("path").getMember("join").getACall() and
+ nodeFrom = nodeTo.(API::CallNode).getArg(1)
or
// Write access
exists(MethodCallNode cn |
- nodeTo = cn.getObject() and
- cn.getMethodName() = "write" and
+ cn.calls(nodeTo, "write") and
nodeFrom = cn.getArg(0)
)
or
From c099dbd04ce85cd9e4f1f3b1f5395d81448aa651 Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Fri, 27 Jan 2023 15:27:45 +0100
Subject: [PATCH 142/415] Python: Expand notes around bound methods `self`
argument passing
---
.../new/internal/DataFlowDispatch.qll | 48 +++++++++++++++++--
1 file changed, 45 insertions(+), 3 deletions(-)
diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
index a73be967534..9073686d4be 100644
--- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
@@ -1151,6 +1151,40 @@ predicate normalCallArg(CallNode call, Node arg, ArgumentPosition apos) {
*
* Note: If `Bar.meth` and `Foo.meth` resolves to the same function, we will end up
* sending both `self` arguments to that function, which is by definition the right thing to do.
+ *
+ * ### Bound methods
+ *
+ * For bound methods, such as `bm = x.m; bm()`, it's a little unclear whether we should
+ * still use the object in the attribute lookup (`x.m`) as the self argument in the
+ * call (`bm()`). We currently do this, but there might also be cases where we don't
+ * want to do this.
+ *
+ * In the example below, we want to clear taint from the list before it reaches the
+ * sink, but because we don't have a use of `l` in the `clear()` call, we currently
+ * don't have any way to achieve our goal. (Note that this is a contrived example)
+ *
+ * ```py
+ * l = list()
+ * clear = l.clear
+ * l.append(tainted)
+ * clear()
+ * sink(l)
+ * ```
+ *
+ * To make the above even worse, bound-methods have a `__self__` property that refers to
+ * the object of the bound-method, so we can re-write the code as:
+ *
+ * ```py
+ * l = list()
+ * clear = l.clear
+ * clear.__self__.append(tainted)
+ * clear()
+ * sink(l)
+ * ```
+ *
+ * One idea to solve this is to track the object in a synthetic data-flow node every
+ * time the bound method is used, such that the `clear()` call would essentially be
+ * translated into `l.clear()`, and we can still have use-use flow.
*/
cached
predicate getCallArg(CallNode call, Function target, CallType type, Node arg, ArgumentPosition apos) {
@@ -1160,16 +1194,24 @@ predicate getCallArg(CallNode call, Function target, CallType type, Node arg, Ar
type instanceof CallTypePlainFunction and
normalCallArg(call, arg, apos)
or
- // self argument for normal method calls
+ // self argument for normal method calls -- see note above about bound methods
type instanceof CallTypeNormalMethod and
apos.isSelf() and
resolveMethodCall(call, target, type, arg) and
- // dataflow lib has requirement that arguments and calls are in same enclosing callable.
+ // dataflow lib has requirement that arguments and calls are in same enclosing
+ // callable. This requirement would be broken if we used `my_obj` as the self
+ // argument in the `f()` call in the example below:
+ // ```py
+ // def call_func(f):
+ // f()
+ //
+ // call_func(my_obj.some_method)
+ // ```
exists(CfgNode cfgNode | cfgNode.getNode() = call |
cfgNode.getEnclosingCallable() = arg.getEnclosingCallable()
)
or
- // cls argument for classmethod calls
+ // cls argument for classmethod calls -- see ntoe above about bound methods
type instanceof CallTypeClassMethod and
apos.isSelf() and
resolveMethodCall(call, target, type, arg) and
From cef933f8139d847f63a7b12c476a3cad372ba8b4 Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Fri, 27 Jan 2023 15:48:59 +0100
Subject: [PATCH 143/415] Python: Add comment explaining `SINK3_F(kwargs["c"])`
test
Co-authored-by: yoff
---
.../ql/test/experimental/dataflow/coverage/argumentPassing.py | 2 ++
1 file changed, 2 insertions(+)
diff --git a/python/ql/test/experimental/dataflow/coverage/argumentPassing.py b/python/ql/test/experimental/dataflow/coverage/argumentPassing.py
index 2f80571de7c..ecf4a0d201d 100644
--- a/python/ql/test/experimental/dataflow/coverage/argumentPassing.py
+++ b/python/ql/test/experimental/dataflow/coverage/argumentPassing.py
@@ -174,6 +174,7 @@ def test_kw_doublestar():
def only_kwargs(**kwargs):
SINK1(kwargs["a"])
SINK2(kwargs["b"])
+ # testing precise content tracking, that content from `a` or `b` does not end up here.
SINK3_F(kwargs["c"])
@expects(3)
@@ -189,6 +190,7 @@ def mixed(a, **kwargs):
except KeyError:
print("OK")
SINK2(kwargs["b"])
+ # testing precise content tracking, that content from `a` or `b` does not end up here.
SINK3_F(kwargs["c"])
@expects(4*3)
From 8ef2aa00e73814b6c84d40cee53b0a3afdb650c5 Mon Sep 17 00:00:00 2001
From: Sim4n6
Date: Fri, 27 Jan 2023 16:07:39 +0100
Subject: [PATCH 144/415] Update
python/ql/src/experimental/Security/UnsafeUnpackQuery.qll
Co-authored-by: yoff
---
.../src/experimental/Security/UnsafeUnpackQuery.qll | 13 +++++++------
1 file changed, 7 insertions(+), 6 deletions(-)
diff --git a/python/ql/src/experimental/Security/UnsafeUnpackQuery.qll b/python/ql/src/experimental/Security/UnsafeUnpackQuery.qll
index 182ef30f84b..c0a370fa9f4 100644
--- a/python/ql/src/experimental/Security/UnsafeUnpackQuery.qll
+++ b/python/ql/src/experimental/Security/UnsafeUnpackQuery.qll
@@ -32,12 +32,13 @@ class UnsafeUnpackingConfig extends TaintTracking::Configuration {
or
// A source catching an S3 filename download
// see boto3: https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/s3.html#S3.Client.download_file
- exists(MethodCallNode mcn, Node s3, Node bc |
- bc = API::moduleImport("boto3").getMember("client").getACall() and
- bc = s3.getALocalSource() and
- mcn.calls(s3, "download_file") and
- source = mcn.getArg(2)
- )
+ source =
+ API::moduleImport("boto3")
+ .getMember("client")
+ .getReturn()
+ .getMember("download_file")
+ .getACall()
+ .getArg(2)
or
// A source download a file using wget
// see wget: https://pypi.org/project/wget/
From 0b27b1314af813607a9a4327eced1aeb46b78a04 Mon Sep 17 00:00:00 2001
From: Sim4n6
Date: Fri, 27 Jan 2023 16:12:08 +0100
Subject: [PATCH 145/415] Update
python/ql/src/experimental/Security/UnsafeUnpackQuery.qll
Co-authored-by: yoff
---
python/ql/src/experimental/Security/UnsafeUnpackQuery.qll | 1 -
1 file changed, 1 deletion(-)
diff --git a/python/ql/src/experimental/Security/UnsafeUnpackQuery.qll b/python/ql/src/experimental/Security/UnsafeUnpackQuery.qll
index c0a370fa9f4..f660294cb59 100644
--- a/python/ql/src/experimental/Security/UnsafeUnpackQuery.qll
+++ b/python/ql/src/experimental/Security/UnsafeUnpackQuery.qll
@@ -24,7 +24,6 @@ class UnsafeUnpackingConfig extends TaintTracking::Configuration {
args =
API::moduleImport("argparse")
.getMember("ArgumentParser")
- .getACall()
.getReturn()
.getMember("parse_args")
.getACall()
From ee213123ac76101840c0762b74973fe03c063e1a Mon Sep 17 00:00:00 2001
From: Sim4n6
Date: Fri, 27 Jan 2023 18:16:11 +0100
Subject: [PATCH 146/415] Add builtin open as an additional step
---
.../src/experimental/Security/UnsafeUnpackQuery.qll | 11 ++++-------
1 file changed, 4 insertions(+), 7 deletions(-)
diff --git a/python/ql/src/experimental/Security/UnsafeUnpackQuery.qll b/python/ql/src/experimental/Security/UnsafeUnpackQuery.qll
index f660294cb59..837f84ee21f 100644
--- a/python/ql/src/experimental/Security/UnsafeUnpackQuery.qll
+++ b/python/ql/src/experimental/Security/UnsafeUnpackQuery.qll
@@ -61,18 +61,15 @@ class UnsafeUnpackingConfig extends TaintTracking::Configuration {
}
override predicate isAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
- // Open for access
+ // Open a file for access
exists(MethodCallNode cn |
nodeTo = cn.getObject() and
cn.getMethodName() = "open" and
cn.flowsTo(nodeFrom)
)
or
- // Write for access
- exists(MethodCallNode cn |
- cn.calls(nodeFrom, "write") and
- nodeTo = cn.getArg(0)
- )
+ // Open a file for access using builtin
+ nodeFrom = API::builtin("open").getACall() and nodeTo = nodeFrom.(API::CallNode).getArg(0)
or
// Retrieve Django uploaded files
// see getlist(): https://docs.djangoproject.com/en/4.1/ref/request-response/#django.http.QueryDict.getlist
@@ -100,7 +97,7 @@ class UnsafeUnpackingConfig extends TaintTracking::Configuration {
nodeTo = mcn
)
or
- //Use of join of filename
+ // Join the base_dir to the filename
nodeTo = API::moduleImport("os").getMember("path").getMember("join").getACall() and
nodeFrom = nodeTo.(API::CallNode).getArg(1)
or
From 0e2f37825ddd56e1dbdd3b131bc4a6e97821cd9c Mon Sep 17 00:00:00 2001
From: Sim4n6
Date: Fri, 27 Jan 2023 23:58:03 +0100
Subject: [PATCH 147/415] Organize steps to correspond to the sample code
---
.../Security/UnsafeUnpackQuery.qll | 38 +++++++++----------
1 file changed, 19 insertions(+), 19 deletions(-)
diff --git a/python/ql/src/experimental/Security/UnsafeUnpackQuery.qll b/python/ql/src/experimental/Security/UnsafeUnpackQuery.qll
index 837f84ee21f..bdac41e8812 100644
--- a/python/ql/src/experimental/Security/UnsafeUnpackQuery.qll
+++ b/python/ql/src/experimental/Security/UnsafeUnpackQuery.qll
@@ -61,6 +61,9 @@ class UnsafeUnpackingConfig extends TaintTracking::Configuration {
}
override predicate isAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
+ // Reading the response
+ nodeTo.(MethodCallNode).calls(nodeFrom, "read")
+ or
// Open a file for access
exists(MethodCallNode cn |
nodeTo = cn.getObject() and
@@ -68,6 +71,12 @@ class UnsafeUnpackingConfig extends TaintTracking::Configuration {
cn.flowsTo(nodeFrom)
)
or
+ // Write access
+ exists(MethodCallNode cn |
+ cn.calls(nodeTo, "write") and
+ nodeFrom = cn.getArg(0)
+ )
+ or
// Open a file for access using builtin
nodeFrom = API::builtin("open").getACall() and nodeTo = nodeFrom.(API::CallNode).getArg(0)
or
@@ -76,11 +85,13 @@ class UnsafeUnpackingConfig extends TaintTracking::Configuration {
// see chunks(): https://docs.djangoproject.com/en/4.1/ref/files/uploads/#django.core.files.uploadedfile.UploadedFile.chunks
nodeTo.(MethodCallNode).calls(nodeFrom, ["getlist", "get", "chunks"])
or
- // Reading the response
- nodeTo.(MethodCallNode).calls(nodeFrom, "read")
- or
- // Accessing the name or raw content
- nodeTo.(AttrRead).accesses(nodeFrom, ["name", "raw"])
+ // Writing the response data to the archive
+ exists(Stdlib::FileLikeObject::InstanceSource is, Node f, MethodCallNode mc |
+ is.flowsTo(f) and
+ mc.calls(f, "write") and
+ nodeFrom = mc.getArg(0) and
+ nodeTo = is.(CallCfgNode).getArg(0)
+ )
or
// Considering the use of "fs"
exists(API::CallNode fs, MethodCallNode mcn |
@@ -97,22 +108,11 @@ class UnsafeUnpackingConfig extends TaintTracking::Configuration {
nodeTo = mcn
)
or
+ // Accessing the name or raw content
+ nodeTo.(AttrRead).accesses(nodeFrom, ["name", "raw"])
+ or
// Join the base_dir to the filename
nodeTo = API::moduleImport("os").getMember("path").getMember("join").getACall() and
nodeFrom = nodeTo.(API::CallNode).getArg(1)
- or
- // Write access
- exists(MethodCallNode cn |
- cn.calls(nodeTo, "write") and
- nodeFrom = cn.getArg(0)
- )
- or
- // Writing the response data to the archive
- exists(Stdlib::FileLikeObject::InstanceSource is, Node f, MethodCallNode mc |
- is.flowsTo(f) and
- mc.calls(f, "write") and
- nodeFrom = mc.getArg(0) and
- nodeTo = is.(CallCfgNode).getArg(0)
- )
}
}
From a4aaf0ec6f9ed100ef1966a37cc65c5fc62553a9 Mon Sep 17 00:00:00 2001
From: Sim4n6
Date: Sat, 28 Jan 2023 09:53:54 +0100
Subject: [PATCH 148/415] Remove a write step & update the builtin open step
---
.../Security/UnsafeUnpackQuery.qll | 21 +++++++------------
1 file changed, 8 insertions(+), 13 deletions(-)
diff --git a/python/ql/src/experimental/Security/UnsafeUnpackQuery.qll b/python/ql/src/experimental/Security/UnsafeUnpackQuery.qll
index bdac41e8812..d449f5f5433 100644
--- a/python/ql/src/experimental/Security/UnsafeUnpackQuery.qll
+++ b/python/ql/src/experimental/Security/UnsafeUnpackQuery.qll
@@ -66,8 +66,14 @@ class UnsafeUnpackingConfig extends TaintTracking::Configuration {
or
// Open a file for access
exists(MethodCallNode cn |
- nodeTo = cn.getObject() and
- cn.getMethodName() = "open" and
+ cn.calls(nodeTo, "open") and
+ cn.flowsTo(nodeFrom)
+ )
+ or
+ // Open a file for access using builtin
+ exists(API::CallNode cn |
+ cn = API::builtin("open").getACall() and
+ nodeTo = cn.(API::CallNode).getArg(0) and
cn.flowsTo(nodeFrom)
)
or
@@ -77,22 +83,11 @@ class UnsafeUnpackingConfig extends TaintTracking::Configuration {
nodeFrom = cn.getArg(0)
)
or
- // Open a file for access using builtin
- nodeFrom = API::builtin("open").getACall() and nodeTo = nodeFrom.(API::CallNode).getArg(0)
- or
// Retrieve Django uploaded files
// see getlist(): https://docs.djangoproject.com/en/4.1/ref/request-response/#django.http.QueryDict.getlist
// see chunks(): https://docs.djangoproject.com/en/4.1/ref/files/uploads/#django.core.files.uploadedfile.UploadedFile.chunks
nodeTo.(MethodCallNode).calls(nodeFrom, ["getlist", "get", "chunks"])
or
- // Writing the response data to the archive
- exists(Stdlib::FileLikeObject::InstanceSource is, Node f, MethodCallNode mc |
- is.flowsTo(f) and
- mc.calls(f, "write") and
- nodeFrom = mc.getArg(0) and
- nodeTo = is.(CallCfgNode).getArg(0)
- )
- or
// Considering the use of "fs"
exists(API::CallNode fs, MethodCallNode mcn |
fs =
From 0707064ab511390a79b359880c97f6034a95a871 Mon Sep 17 00:00:00 2001
From: Sim4n6
Date: Sat, 28 Jan 2023 10:14:24 +0100
Subject: [PATCH 149/415] Constrain the save/path step
---
.../Security/UnsafeUnpackQuery.qll | 24 +++++++++----------
1 file changed, 11 insertions(+), 13 deletions(-)
diff --git a/python/ql/src/experimental/Security/UnsafeUnpackQuery.qll b/python/ql/src/experimental/Security/UnsafeUnpackQuery.qll
index d449f5f5433..99ffb443dca 100644
--- a/python/ql/src/experimental/Security/UnsafeUnpackQuery.qll
+++ b/python/ql/src/experimental/Security/UnsafeUnpackQuery.qll
@@ -89,19 +89,17 @@ class UnsafeUnpackingConfig extends TaintTracking::Configuration {
nodeTo.(MethodCallNode).calls(nodeFrom, ["getlist", "get", "chunks"])
or
// Considering the use of "fs"
- exists(API::CallNode fs, MethodCallNode mcn |
- fs =
- API::moduleImport("django")
- .getMember("core")
- .getMember("files")
- .getMember("storage")
- .getMember("FileSystemStorage")
- .getACall() and
- fs.flowsTo(mcn.getObject()) and
- mcn.getMethodName() = ["save", "path"] and
- nodeFrom = mcn.getArg(0) and
- nodeTo = mcn
- )
+ // see fs: https://docs.djangoproject.com/en/4.1/ref/files/storage/#the-filesystemstorage-class
+ nodeTo =
+ API::moduleImport("django")
+ .getMember("core")
+ .getMember("files")
+ .getMember("storage")
+ .getMember("FileSystemStorage")
+ .getReturn()
+ .getMember(["save", "path"])
+ .getACall() and
+ nodeFrom = nodeTo.(MethodCallNode).getArg(0)
or
// Accessing the name or raw content
nodeTo.(AttrRead).accesses(nodeFrom, ["name", "raw"])
From 7079def7ce31920affd1e585a33116c1ba5a5036 Mon Sep 17 00:00:00 2001
From: Sim4n6
Date: Mon, 30 Jan 2023 00:49:23 +0100
Subject: [PATCH 150/415] Add an S3 source with Session or download_fileobj
---
.../experimental/Security/UnsafeUnpackQuery.qll | 16 ++++++++++++++--
1 file changed, 14 insertions(+), 2 deletions(-)
diff --git a/python/ql/src/experimental/Security/UnsafeUnpackQuery.qll b/python/ql/src/experimental/Security/UnsafeUnpackQuery.qll
index 99ffb443dca..7f305a1d734 100644
--- a/python/ql/src/experimental/Security/UnsafeUnpackQuery.qll
+++ b/python/ql/src/experimental/Security/UnsafeUnpackQuery.qll
@@ -29,13 +29,25 @@ class UnsafeUnpackingConfig extends TaintTracking::Configuration {
.getACall()
)
or
- // A source catching an S3 filename download
+ // A source catching an S3 file download
// see boto3: https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/s3.html#S3.Client.download_file
source =
API::moduleImport("boto3")
.getMember("client")
.getReturn()
- .getMember("download_file")
+ .getMember(["download_file", "download_fileobj"])
+ .getACall()
+ .getArg(2)
+ or
+ // A source catching an S3 file download
+ // see boto3: https://boto3.amazonaws.com/v1/documentation/api/latest/reference/core/session.html
+ source =
+ API::moduleImport("boto3")
+ .getMember("Session")
+ .getReturn()
+ .getMember("client")
+ .getReturn()
+ .getMember(["download_file", "download_fileobj"])
.getACall()
.getArg(2)
or
From a1c2f4c13883d3da8fe842c8c6741bf481d38ab7 Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Mon, 30 Jan 2023 09:42:43 +0100
Subject: [PATCH 151/415] Python: Small rewrite of `**kwargs` getParameter
logic
---
.../dataflow/new/internal/DataFlowDispatch.qll | 13 ++++---------
1 file changed, 4 insertions(+), 9 deletions(-)
diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
index 9073686d4be..c05617df47f 100644
--- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
@@ -351,15 +351,10 @@ abstract class DataFlowFunction extends DataFlowCallable, TFunction {
// synthetic to the real. It might seem more natural to do it in the other
// direction, but since we have a clearStep on the real **kwargs parameter, we that
// content-clearing would also affect the synthetic parameter, which we don't want.
- (
- not exists(func.getArgByName(_)) and
- ppos.isDictSplat() and
- result.getParameter() = func.getKwarg()
- or
- exists(func.getArgByName(_)) and
- ppos.isDictSplat() and
- result = TSynthDictSplatParameterNode(this)
- )
+ ppos.isDictSplat() and
+ if exists(func.getArgByName(_))
+ then result = TSynthDictSplatParameterNode(this)
+ else result.getParameter() = func.getKwarg()
}
}
From c7e552b343ea0cf8ea595c6742fb5b7f11eef7b8 Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Mon, 30 Jan 2023 09:45:45 +0100
Subject: [PATCH 152/415] Python: Fix grammar in qldoc
Co-authored-by: yoff
---
.../python/dataflow/new/internal/DataFlowDispatch.qll | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
index c05617df47f..4641ae4a912 100644
--- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
@@ -345,11 +345,11 @@ abstract class DataFlowFunction extends DataFlowCallable, TFunction {
)
or
// `**kwargs`
- // since dataflow library has restriction that we can only have ONE result per
+ // since the dataflow library has the restriction that we can only have ONE result per
// parameter position, if there is both a synthetic **kwargs and a real **kwargs
// parameter, we only give the result for the synthetic, and add local flow from the
// synthetic to the real. It might seem more natural to do it in the other
- // direction, but since we have a clearStep on the real **kwargs parameter, we that
+ // direction, but since we have a clearStep on the real **kwargs parameter, we would have that
// content-clearing would also affect the synthetic parameter, which we don't want.
ppos.isDictSplat() and
if exists(func.getArgByName(_))
@@ -1193,7 +1193,7 @@ predicate getCallArg(CallNode call, Function target, CallType type, Node arg, Ar
type instanceof CallTypeNormalMethod and
apos.isSelf() and
resolveMethodCall(call, target, type, arg) and
- // dataflow lib has requirement that arguments and calls are in same enclosing
+ // the dataflow library has a requirement that arguments and calls are in same enclosing
// callable. This requirement would be broken if we used `my_obj` as the self
// argument in the `f()` call in the example below:
// ```py
@@ -1206,7 +1206,7 @@ predicate getCallArg(CallNode call, Function target, CallType type, Node arg, Ar
cfgNode.getEnclosingCallable() = arg.getEnclosingCallable()
)
or
- // cls argument for classmethod calls -- see ntoe above about bound methods
+ // cls argument for classmethod calls -- see note above about bound methods
type instanceof CallTypeClassMethod and
apos.isSelf() and
resolveMethodCall(call, target, type, arg) and
From fdb33ff48efadc5aa07d6b9d8ee912137e70b987 Mon Sep 17 00:00:00 2001
From: Rasmus Wriedt Larsen
Date: Wed, 1 Feb 2023 14:01:20 +0100
Subject: [PATCH 153/415] Python: Fix grammar in change-note
Co-authored-by: Taus
---
python/ql/lib/change-notes/2023-01-16-new-call-graph.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/python/ql/lib/change-notes/2023-01-16-new-call-graph.md b/python/ql/lib/change-notes/2023-01-16-new-call-graph.md
index 1dbfd05a80f..3a9e6c3abc0 100644
--- a/python/ql/lib/change-notes/2023-01-16-new-call-graph.md
+++ b/python/ql/lib/change-notes/2023-01-16-new-call-graph.md
@@ -1,4 +1,4 @@
---
category: majorAnalysis
---
-* We use a new analysis for the call-graph (determining which function is called). This can lead to changed results. In most cases this is much more accurate than the old call-graph that was based on points-to, but we do loose a few valid edges in the call-graph, especially around methods that are not defined inside its' class.
+* We use a new analysis for the call-graph (determining which function is called). This can lead to changed results. In most cases this is much more accurate than the old call-graph that was based on points-to, but we do lose a few valid edges in the call-graph, especially around methods that are not defined inside its' class.
From 1a8c9abee232997cd7d90cb10d65b1bc7c905a60 Mon Sep 17 00:00:00 2001
From: Sim4n6
Date: Thu, 2 Feb 2023 21:09:40 +0100
Subject: [PATCH 154/415] Incorporate Sink & Source as steps from TarSlipQry
---
.../Security/UnsafeUnpackQuery.qll | 86 ++++++++++++++++++-
.../Security/CWE-022/UnsafeUnpack.py | 78 ++++++++++++++++-
2 files changed, 161 insertions(+), 3 deletions(-)
diff --git a/python/ql/src/experimental/Security/UnsafeUnpackQuery.qll b/python/ql/src/experimental/Security/UnsafeUnpackQuery.qll
index 7f305a1d734..b43410b14ad 100644
--- a/python/ql/src/experimental/Security/UnsafeUnpackQuery.qll
+++ b/python/ql/src/experimental/Security/UnsafeUnpackQuery.qll
@@ -10,6 +10,35 @@ import semmle.python.dataflow.new.TaintTracking
import semmle.python.frameworks.Stdlib
import semmle.python.dataflow.new.RemoteFlowSources
+/**
+ * Handle those three cases of Tarfile opens:
+ * - `tarfile.open()`
+ * - `tarfile.TarFile()`
+ * - `MKtarfile.Tarfile.open()`
+ */
+API::Node tarfileOpen() {
+ result in [
+ API::moduleImport("tarfile").getMember(["open", "TarFile"]),
+ API::moduleImport("tarfile").getMember("TarFile").getASubclass().getMember("open")
+ ]
+}
+
+/**
+ * Handle the previous three cases, plus the use of `closing` in the previous cases
+ */
+class AllTarfileOpens extends API::CallNode {
+ AllTarfileOpens() {
+ this = tarfileOpen().getACall()
+ or
+ exists(API::Node closing, Node arg |
+ closing = API::moduleImport("contextlib").getMember("closing") and
+ this = closing.getACall() and
+ arg = this.getArg(0) and
+ arg = tarfileOpen().getACall()
+ )
+ }
+}
+
class UnsafeUnpackingConfig extends TaintTracking::Configuration {
UnsafeUnpackingConfig() { this = "UnsafeUnpackingConfig" }
@@ -68,8 +97,47 @@ class UnsafeUnpackingConfig extends TaintTracking::Configuration {
}
override predicate isSink(DataFlow::Node sink) {
- // A sink capturing method calls to `unpack_archive`.
- sink = API::moduleImport("shutil").getMember("unpack_archive").getACall().getArg(0)
+ (
+ // A sink capturing method calls to `unpack_archive`.
+ sink = API::moduleImport("shutil").getMember("unpack_archive").getACall().getArg(0)
+ or
+ // A sink capturing method calls to `extractall` without `members` argument.
+ // For a call to `file.extractall` without `members` argument, `file` is considered a sink.
+ exists(MethodCallNode call, AllTarfileOpens atfo |
+ call = atfo.getReturn().getMember("extractall").getACall() and
+ not exists(Node arg | arg = call.getArgByName("members")) and
+ sink = call.getObject()
+ )
+ or
+ // A sink capturing method calls to `extractall` with `members` argument.
+ // For a call to `file.extractall` with `members` argument, `file` is considered a sink if not
+ // a the `members` argument contains a NameConstant as None, a List or call to the method `getmembers`.
+ // Otherwise, the argument of `members` is considered a sink.
+ exists(MethodCallNode call, Node arg, AllTarfileOpens atfo |
+ call = atfo.getReturn().getMember("extractall").getACall() and
+ arg = call.getArgByName("members") and
+ if
+ arg.asCfgNode() instanceof NameConstantNode or
+ arg.asCfgNode() instanceof ListNode
+ then sink = call.getObject()
+ else
+ if arg.(MethodCallNode).getMethodName() = "getmembers"
+ then sink = arg.(MethodCallNode).getObject()
+ else sink = call.getArgByName("members")
+ )
+ or
+ // An argument to `extract` is considered a sink.
+ exists(AllTarfileOpens atfo |
+ sink = atfo.getReturn().getMember("extract").getACall().getArg(0)
+ )
+ or
+ //An argument to `_extract_member` is considered a sink.
+ exists(MethodCallNode call, AllTarfileOpens atfo |
+ call = atfo.getReturn().getMember("_extract_member").getACall() and
+ call.getArg(1).(AttrRead).accesses(sink, "name")
+ )
+ ) and
+ not sink.getScope().getLocation().getFile().inStdlib()
}
override predicate isAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
@@ -119,5 +187,19 @@ class UnsafeUnpackingConfig extends TaintTracking::Configuration {
// Join the base_dir to the filename
nodeTo = API::moduleImport("os").getMember("path").getMember("join").getACall() and
nodeFrom = nodeTo.(API::CallNode).getArg(1)
+ or
+ // Go through an Open for a Tarfile
+ nodeTo = tarfileOpen().getACall() and nodeFrom = nodeTo.(MethodCallNode).getArg(0)
+ or
+ // Handle the case where the getmembers is used.
+ nodeTo.(MethodCallNode).calls(nodeFrom, "getmembers") and
+ nodeFrom instanceof AllTarfileOpens
+ or
+ // To handle the case of `with closing(tarfile.open()) as file:`
+ // we add a step from the first argument of `closing` to the call to `closing`,
+ // whenever that first argument is a return of `tarfile.open()`.
+ nodeTo = API::moduleImport("contextlib").getMember("closing").getACall() and
+ nodeFrom = nodeTo.(API::CallNode).getArg(0) and
+ nodeFrom = tarfileOpen().getReturn().getAValueReachableFromSource()
}
}
diff --git a/python/ql/test/experimental/query-tests/Security/CWE-022/UnsafeUnpack.py b/python/ql/test/experimental/query-tests/Security/CWE-022/UnsafeUnpack.py
index ae9ee055999..50574281cbd 100644
--- a/python/ql/test/experimental/query-tests/Security/CWE-022/UnsafeUnpack.py
+++ b/python/ql/test/experimental/query-tests/Security/CWE-022/UnsafeUnpack.py
@@ -122,4 +122,80 @@ def simple_upload(request):
return render(request, 'simple_upload.html')
elif request.method == 'GET':
- return render(request, 'simple_upload.html')
\ No newline at end of file
+ return render(request, 'simple_upload.html')
+
+
+import shutil
+import os
+import tarfile
+import tempfile
+import argparse
+
+parser = argparse.ArgumentParser(description='Process some integers.')
+parser.add_argument('integers', metavar='N', type=int, nargs='+',
+ help='an integer for the accumulator')
+parser.add_argument('filename', help='filename to be provided')
+
+args = parser.parse_args()
+unsafe_filename_tar = args.filename
+with tarfile.TarFile(unsafe_filename_tar, mode="r") as tar:
+ tar.extractall(path="/tmp/unpack/", members=tar) # $result=BAD
+tar = tarfile.open(unsafe_filename_tar)
+
+
+from django.shortcuts import render
+from django.core.files.storage import FileSystemStorage
+import shutil
+
+def simple_upload(request):
+
+ base_dir = "/tmp/baase_dir"
+ if request.method == 'POST':
+ # Read uploaded files by chunks of data
+ # see chunks(): https://docs.djangoproject.com/en/4.1/ref/files/uploads/#django.core.files.uploadedfile.UploadedFile.chunks
+ savepath = os.path.join(base_dir, "tarball_compressed.tar.gz")
+ with open(savepath, 'wb+') as wfile:
+ for chunk in request.FILES["ufile1"].chunks():
+ wfile.write(chunk)
+
+ tar = tarfile.open(savepath)
+ result = []
+ for member in tar:
+ if member.issym():
+ raise ValueError("But it is a symlink")
+ result.append(member)
+ tar.extractall(path=tempfile.mkdtemp(), members=result) # $result=BAD
+ tar.close()
+
+
+response = requests.get(url_filename, stream=True)
+tarpath = "/tmp/tmp456/tarball.tar.gz"
+with open(tarpath, "wb") as f:
+ f.write(response.raw.read())
+target_dir = "/tmp/unpack"
+tarfile.TarFile(tarpath, mode="r").extractall(path=target_dir) # $result=BAD
+
+
+from pathlib import Path
+import tempfile
+import boto3
+
+def default_session() -> boto3.Session:
+ _SESSION = None
+ if _SESSION is None:
+ _SESSION = boto3.Session()
+ return _SESSION
+
+cache = False
+cache_dir = "/tmp/artifacts"
+object_path = "/objects/obj1"
+s3 = default_session().client("s3")
+with tempfile.NamedTemporaryFile(suffix=".tar.gz") as tmp:
+ s3.download_fileobj(bucket_name, object_path, tmp)
+ tmp.seek(0)
+ if cache:
+ cache_dir.mkdir(exist_ok=True, parents=True)
+ target = cache_dir
+ else:
+ target = Path(tempfile.mkdtemp())
+ shutil.unpack_archive(tmp.name, target) # $result=BAD
\ No newline at end of file
From 61095b3c5895700bca5223413a50076dc6ec1300 Mon Sep 17 00:00:00 2001
From: Alex Ford
Date: Thu, 2 Feb 2023 20:27:05 +0000
Subject: [PATCH 155/415] ConceptsShared: Add deprecated DataFlow::Node
CryptographicOperation#getInput() predicate
---
.../ql/lib/semmle/javascript/internal/ConceptsShared.qll | 6 ++++++
python/ql/lib/semmle/python/internal/ConceptsShared.qll | 6 ++++++
ruby/ql/lib/codeql/ruby/internal/ConceptsShared.qll | 6 ++++++
3 files changed, 18 insertions(+)
diff --git a/javascript/ql/lib/semmle/javascript/internal/ConceptsShared.qll b/javascript/ql/lib/semmle/javascript/internal/ConceptsShared.qll
index 2f6c8bb8b29..5be626877cc 100644
--- a/javascript/ql/lib/semmle/javascript/internal/ConceptsShared.qll
+++ b/javascript/ql/lib/semmle/javascript/internal/ConceptsShared.qll
@@ -43,6 +43,9 @@ module Cryptography {
/** Gets an input the algorithm is used on, for example the plain text input to be encrypted. */
DataFlow::Node getAnInput() { result = super.getAnInput() }
+ /** DEPRECATED. This predicate has been renamed to `getAnInput`. */
+ deprecated final DataFlow::Node getInput() { result = super.getInput() }
+
/**
* Gets the block mode used to perform this cryptographic operation.
* This may have no result - for example if the `CryptographicAlgorithm` used
@@ -67,6 +70,9 @@ module Cryptography {
/** Gets an input the algorithm is used on, for example the plain text input to be encrypted. */
abstract DataFlow::Node getAnInput();
+ /** DEPRECATED. This predicate has been renamed to `getAnInput`. */
+ deprecated final DataFlow::Node getInput() { result = this.getAnInput() }
+
/**
* Gets the block mode used to perform this cryptographic operation.
* This may have no result - for example if the `CryptographicAlgorithm` used
diff --git a/python/ql/lib/semmle/python/internal/ConceptsShared.qll b/python/ql/lib/semmle/python/internal/ConceptsShared.qll
index 2f6c8bb8b29..5be626877cc 100644
--- a/python/ql/lib/semmle/python/internal/ConceptsShared.qll
+++ b/python/ql/lib/semmle/python/internal/ConceptsShared.qll
@@ -43,6 +43,9 @@ module Cryptography {
/** Gets an input the algorithm is used on, for example the plain text input to be encrypted. */
DataFlow::Node getAnInput() { result = super.getAnInput() }
+ /** DEPRECATED. This predicate has been renamed to `getAnInput`. */
+ deprecated final DataFlow::Node getInput() { result = super.getInput() }
+
/**
* Gets the block mode used to perform this cryptographic operation.
* This may have no result - for example if the `CryptographicAlgorithm` used
@@ -67,6 +70,9 @@ module Cryptography {
/** Gets an input the algorithm is used on, for example the plain text input to be encrypted. */
abstract DataFlow::Node getAnInput();
+ /** DEPRECATED. This predicate has been renamed to `getAnInput`. */
+ deprecated final DataFlow::Node getInput() { result = this.getAnInput() }
+
/**
* Gets the block mode used to perform this cryptographic operation.
* This may have no result - for example if the `CryptographicAlgorithm` used
diff --git a/ruby/ql/lib/codeql/ruby/internal/ConceptsShared.qll b/ruby/ql/lib/codeql/ruby/internal/ConceptsShared.qll
index 2f6c8bb8b29..5be626877cc 100644
--- a/ruby/ql/lib/codeql/ruby/internal/ConceptsShared.qll
+++ b/ruby/ql/lib/codeql/ruby/internal/ConceptsShared.qll
@@ -43,6 +43,9 @@ module Cryptography {
/** Gets an input the algorithm is used on, for example the plain text input to be encrypted. */
DataFlow::Node getAnInput() { result = super.getAnInput() }
+ /** DEPRECATED. This predicate has been renamed to `getAnInput`. */
+ deprecated final DataFlow::Node getInput() { result = super.getInput() }
+
/**
* Gets the block mode used to perform this cryptographic operation.
* This may have no result - for example if the `CryptographicAlgorithm` used
@@ -67,6 +70,9 @@ module Cryptography {
/** Gets an input the algorithm is used on, for example the plain text input to be encrypted. */
abstract DataFlow::Node getAnInput();
+ /** DEPRECATED. This predicate has been renamed to `getAnInput`. */
+ deprecated final DataFlow::Node getInput() { result = this.getAnInput() }
+
/**
* Gets the block mode used to perform this cryptographic operation.
* This may have no result - for example if the `CryptographicAlgorithm` used
From e5dfbe2c8ddd8620e0c70c51715e15e8e1d13f6c Mon Sep 17 00:00:00 2001
From: Alex Ford
Date: Thu, 2 Feb 2023 20:27:52 +0000
Subject: [PATCH 156/415] ConceptsShared: Add BlockMode#matchesString(string)
predicate
---
.../ql/lib/semmle/javascript/internal/ConceptsShared.qll | 4 ++++
python/ql/lib/semmle/python/internal/ConceptsShared.qll | 4 ++++
ruby/ql/lib/codeql/ruby/internal/ConceptsShared.qll | 4 ++++
3 files changed, 12 insertions(+)
diff --git a/javascript/ql/lib/semmle/javascript/internal/ConceptsShared.qll b/javascript/ql/lib/semmle/javascript/internal/ConceptsShared.qll
index 5be626877cc..23b34592852 100644
--- a/javascript/ql/lib/semmle/javascript/internal/ConceptsShared.qll
+++ b/javascript/ql/lib/semmle/javascript/internal/ConceptsShared.qll
@@ -91,6 +91,10 @@ module Cryptography {
/** Holds if this block mode is considered to be insecure. */
predicate isWeak() { this = "ECB" }
+
+ /** Holds if the given string appears to match this block mode. */
+ bindingset[s]
+ predicate matchesString(string s) { s.toUpperCase().matches("%" + this + "%") }
}
}
diff --git a/python/ql/lib/semmle/python/internal/ConceptsShared.qll b/python/ql/lib/semmle/python/internal/ConceptsShared.qll
index 5be626877cc..23b34592852 100644
--- a/python/ql/lib/semmle/python/internal/ConceptsShared.qll
+++ b/python/ql/lib/semmle/python/internal/ConceptsShared.qll
@@ -91,6 +91,10 @@ module Cryptography {
/** Holds if this block mode is considered to be insecure. */
predicate isWeak() { this = "ECB" }
+
+ /** Holds if the given string appears to match this block mode. */
+ bindingset[s]
+ predicate matchesString(string s) { s.toUpperCase().matches("%" + this + "%") }
}
}
diff --git a/ruby/ql/lib/codeql/ruby/internal/ConceptsShared.qll b/ruby/ql/lib/codeql/ruby/internal/ConceptsShared.qll
index 5be626877cc..23b34592852 100644
--- a/ruby/ql/lib/codeql/ruby/internal/ConceptsShared.qll
+++ b/ruby/ql/lib/codeql/ruby/internal/ConceptsShared.qll
@@ -91,6 +91,10 @@ module Cryptography {
/** Holds if this block mode is considered to be insecure. */
predicate isWeak() { this = "ECB" }
+
+ /** Holds if the given string appears to match this block mode. */
+ bindingset[s]
+ predicate matchesString(string s) { s.toUpperCase().matches("%" + this + "%") }
}
}
From 983055b8f9628181ddb19882df6c60ea5f3544bb Mon Sep 17 00:00:00 2001
From: Alex Ford
Date: Thu, 2 Feb 2023 20:28:37 +0000
Subject: [PATCH 157/415] JS: Use shared CryptographicOperation concept and
implement BlockMode getBlockMode()
---
.../EndpointCharacteristics.qll | 2 +-
.../ql/lib/semmle/javascript/Concepts.qll | 7 +
.../javascript/frameworks/CryptoLibraries.qll | 158 ++++++++++++------
.../BrokenCryptoAlgorithmCustomizations.qll | 2 +-
...InsufficientPasswordHashCustomizations.qll | 2 +-
.../CryptoLibraries/CryptographicOperation.ql | 2 +-
6 files changed, 121 insertions(+), 52 deletions(-)
diff --git a/javascript/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/EndpointCharacteristics.qll b/javascript/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/EndpointCharacteristics.qll
index e95b2785ceb..fe3286032ad 100644
--- a/javascript/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/EndpointCharacteristics.qll
+++ b/javascript/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/EndpointCharacteristics.qll
@@ -387,7 +387,7 @@ private class CryptographicOperationFlowCharacteristic extends NotASinkCharacter
CryptographicOperationFlowCharacteristic() { this = "CryptographicOperationFlow" }
override predicate appliesToEndpoint(DataFlow::Node n) {
- any(CryptographicOperation op).getInput() = n
+ any(CryptographicOperation op).getAnInput() = n
}
}
diff --git a/javascript/ql/lib/semmle/javascript/Concepts.qll b/javascript/ql/lib/semmle/javascript/Concepts.qll
index e3c3f0d2357..a760e746030 100644
--- a/javascript/ql/lib/semmle/javascript/Concepts.qll
+++ b/javascript/ql/lib/semmle/javascript/Concepts.qll
@@ -110,3 +110,10 @@ abstract class PersistentWriteAccess extends DataFlow::Node {
*/
abstract DataFlow::Node getValue();
}
+
+/**
+ * Provides models for cryptographic things.
+ */
+module Cryptography {
+ import semmle.javascript.internal.ConceptsShared::Cryptography
+}
diff --git a/javascript/ql/lib/semmle/javascript/frameworks/CryptoLibraries.qll b/javascript/ql/lib/semmle/javascript/frameworks/CryptoLibraries.qll
index 9cf4dcfaace..8d2921e48cb 100644
--- a/javascript/ql/lib/semmle/javascript/frameworks/CryptoLibraries.qll
+++ b/javascript/ql/lib/semmle/javascript/frameworks/CryptoLibraries.qll
@@ -3,22 +3,7 @@
*/
import javascript
-import semmle.javascript.security.CryptoAlgorithms
-
-/**
- * An application of a cryptographic algorithm.
- */
-abstract class CryptographicOperation extends DataFlow::Node {
- /**
- * Gets the input the algorithm is used on, e.g. the plain text input to be encrypted.
- */
- abstract DataFlow::Node getInput();
-
- /**
- * Gets the applied algorithm.
- */
- abstract CryptographicAlgorithm getAlgorithm();
-}
+import semmle.javascript.Concepts::Cryptography
/**
* A key used in a cryptographic algorithm.
@@ -52,13 +37,20 @@ class CryptographicKeyCredentialsExpr extends CredentialsNode instanceof Cryptog
override string getCredentialsKind() { result = "key" }
}
+// Holds if `algorithm` is an `EncryptionAlgorithm` that uses a block cipher
+private predicate isBlockEncryptionAlgorithm(CryptographicAlgorithm algorithm) {
+ algorithm instanceof EncryptionAlgorithm and
+ not algorithm.(EncryptionAlgorithm).isStreamCipher()
+}
+
/**
* A model of the asmCrypto library.
*/
private module AsmCrypto {
- private class Apply extends CryptographicOperation instanceof DataFlow::CallNode {
+ private class Apply extends CryptographicOperation::Range instanceof DataFlow::CallNode {
DataFlow::Node input;
CryptographicAlgorithm algorithm; // non-functional
+ private string algorithmName;
Apply() {
/*
@@ -71,17 +63,22 @@ private module AsmCrypto {
* ```
*/
- exists(DataFlow::SourceNode asmCrypto, string algorithmName |
+ exists(DataFlow::SourceNode asmCrypto |
asmCrypto = DataFlow::globalVarRef("asmCrypto") and
algorithm.matchesName(algorithmName) and
this = asmCrypto.getAPropertyRead(algorithmName).getAMemberCall(_) and
- input = this.getAnArgument()
+ input = this.getArgument(0)
)
}
- override DataFlow::Node getInput() { result = input }
+ override DataFlow::Node getAnInput() { result = input }
override CryptographicAlgorithm getAlgorithm() { result = algorithm }
+
+ override BlockMode getBlockMode() {
+ isBlockEncryptionAlgorithm(this.getAlgorithm()) and
+ result.matchesString(algorithmName)
+ }
}
}
@@ -93,7 +90,7 @@ private module BrowserIdCrypto {
Key() { this = any(Apply apply).getKey() }
}
- private class Apply extends CryptographicOperation instanceof DataFlow::MethodCallNode {
+ private class Apply extends CryptographicOperation::Range instanceof DataFlow::MethodCallNode {
CryptographicAlgorithm algorithm; // non-functional
Apply() {
@@ -126,10 +123,13 @@ private module BrowserIdCrypto {
)
}
- override DataFlow::Node getInput() { result = super.getArgument(0) }
+ override DataFlow::Node getAnInput() { result = super.getArgument(0) }
override CryptographicAlgorithm getAlgorithm() { result = algorithm }
+ // not relevant for browserid-crypto
+ override BlockMode getBlockMode() { none() }
+
DataFlow::Node getKey() { result = super.getArgument(1) }
}
}
@@ -140,6 +140,7 @@ private module BrowserIdCrypto {
private module NodeJSCrypto {
private class InstantiatedAlgorithm extends DataFlow::CallNode {
CryptographicAlgorithm algorithm; // non-functional
+ private string algorithmName;
InstantiatedAlgorithm() {
/*
@@ -158,11 +159,25 @@ private module NodeJSCrypto {
exists(DataFlow::SourceNode mod |
mod = DataFlow::moduleImport("crypto") and
this = mod.getAMemberCall("create" + ["Hash", "Hmac", "Sign", "Cipher"]) and
- algorithm.matchesName(this.getArgument(0).getStringValue())
+ algorithmName = this.getArgument(0).getStringValue() and
+ algorithm.matchesName(algorithmName)
)
}
CryptographicAlgorithm getAlgorithm() { result = algorithm }
+
+ private BlockMode getExplicitBlockMode() { result.matchesString(algorithmName) }
+
+ BlockMode getBlockMode() {
+ isBlockEncryptionAlgorithm(this.getAlgorithm()) and
+ (
+ if exists(this.getExplicitBlockMode())
+ then result = this.getExplicitBlockMode()
+ else
+ // CBC is the default if not explicitly specified
+ result = "CBC"
+ )
+ }
}
private class CreateKey extends CryptographicKeyCreation, DataFlow::CallNode {
@@ -211,14 +226,16 @@ private module NodeJSCrypto {
override predicate isSymmetricKey() { none() }
}
- private class Apply extends CryptographicOperation instanceof DataFlow::MethodCallNode {
+ private class Apply extends CryptographicOperation::Range instanceof DataFlow::MethodCallNode {
InstantiatedAlgorithm instantiation;
Apply() { this = instantiation.getAMethodCall(any(string m | m = "update" or m = "write")) }
- override DataFlow::Node getInput() { result = super.getArgument(0) }
+ override DataFlow::Node getAnInput() { result = super.getArgument(0) }
override CryptographicAlgorithm getAlgorithm() { result = instantiation.getAlgorithm() }
+
+ override BlockMode getBlockMode() { result = instantiation.getBlockMode() }
}
private class Key extends CryptographicKey {
@@ -307,7 +324,7 @@ private module CryptoJS {
input = result.getArgument(0)
}
- private class Apply extends CryptographicOperation {
+ private class Apply extends CryptographicOperation::Range, DataFlow::CallNode {
DataFlow::Node input;
CryptographicAlgorithm algorithm; // non-functional
@@ -316,9 +333,31 @@ private module CryptoJS {
this = getDirectApplication(input, algorithm)
}
- override DataFlow::Node getInput() { result = input }
+ override DataFlow::Node getAnInput() { result = input }
override CryptographicAlgorithm getAlgorithm() { result = algorithm }
+
+ // e.g. CryptoJS.AES.encrypt("msg", "key", { mode: CryptoJS.mode. })
+ private BlockMode getExplicitBlockMode() {
+ exists(DataFlow::ObjectLiteralNode o, DataFlow::SourceNode modeNode, string modeString |
+ modeNode = API::moduleImport("crypto-js").getMember("mode").getMember(modeString).asSource() and
+ o.flowsTo(this.getArgument(2)) and
+ modeNode = o.getAPropertySource("mode")
+ |
+ result.matchesString(modeString)
+ )
+ }
+
+ override BlockMode getBlockMode() {
+ isBlockEncryptionAlgorithm(this.getAlgorithm()) and
+ (
+ if exists(this.getExplicitBlockMode())
+ then result = this.getExplicitBlockMode()
+ else
+ // CBC is the default if not explicitly specified
+ result = "CBC"
+ )
+ }
}
private class Key extends CryptographicKey {
@@ -374,7 +413,7 @@ private module CryptoJS {
* A model of the TweetNaCl library.
*/
private module TweetNaCl {
- private class Apply extends CryptographicOperation instanceof DataFlow::CallNode {
+ private class Apply extends CryptographicOperation::Range instanceof DataFlow::CallNode {
DataFlow::Node input;
CryptographicAlgorithm algorithm;
@@ -401,9 +440,12 @@ private module TweetNaCl {
)
}
- override DataFlow::Node getInput() { result = input }
+ override DataFlow::Node getAnInput() { result = input }
override CryptographicAlgorithm getAlgorithm() { result = algorithm }
+
+ // No block ciphers implemented
+ override BlockMode getBlockMode() { none() }
}
}
@@ -434,7 +476,7 @@ private module HashJs {
)
}
- private class Apply extends CryptographicOperation instanceof DataFlow::CallNode {
+ private class Apply extends CryptographicOperation::Range instanceof DataFlow::CallNode {
DataFlow::Node input;
CryptographicAlgorithm algorithm; // non-functional
@@ -456,9 +498,12 @@ private module HashJs {
input = super.getArgument(0)
}
- override DataFlow::Node getInput() { result = input }
+ override DataFlow::Node getAnInput() { result = input }
override CryptographicAlgorithm getAlgorithm() { result = algorithm }
+
+ // not relevant for hash.js
+ override BlockMode getBlockMode() { none() }
}
}
@@ -478,19 +523,20 @@ private module Forge {
private class KeyCipher extends Cipher {
DataFlow::Node key;
CryptographicAlgorithm algorithm; // non-functional
+ private string blockModeString;
KeyCipher() {
exists(DataFlow::SourceNode mod, string algorithmName |
mod = getAnImportNode() and
algorithm.matchesName(algorithmName)
|
- exists(string createName, string cipherName, string cipherPrefix, string cipherSuffix |
+ exists(string createName, string cipherName, string cipherPrefix |
// `require('forge').cipher.createCipher("3DES-CBC").update("secret", "key");`
(createName = "createCipher" or createName = "createDecipher") and
this = mod.getAPropertyRead("cipher").getAMemberCall(createName) and
this.getArgument(0).mayHaveStringValue(cipherName) and
- cipherName = cipherPrefix + "-" + cipherSuffix and
- cipherSuffix = ["CBC", "CFB", "CTR", "ECB", "GCM", "OFB"] and
+ cipherName = cipherPrefix + "-" + blockModeString and
+ blockModeString = ["CBC", "CFB", "CTR", "ECB", "GCM", "OFB"] and
algorithmName = cipherPrefix and
key = this.getArgument(1)
)
@@ -500,7 +546,8 @@ private module Forge {
createName = "createEncryptionCipher" or createName = "createDecryptionCipher"
|
this = mod.getAPropertyRead(algorithmName).getAMemberCall(createName) and
- key = this.getArgument(0)
+ key = this.getArgument(0) and
+ blockModeString = algorithmName
)
)
}
@@ -508,6 +555,11 @@ private module Forge {
override CryptographicAlgorithm getAlgorithm() { result = algorithm }
DataFlow::Node getKey() { result = key }
+
+ BlockMode getBlockMode() {
+ isBlockEncryptionAlgorithm(this.getAlgorithm()) and
+ result.matchesString(blockModeString)
+ }
}
private class NonKeyCipher extends Cipher {
@@ -527,21 +579,22 @@ private module Forge {
override CryptographicAlgorithm getAlgorithm() { result = algorithm }
}
- private class Apply extends CryptographicOperation instanceof DataFlow::CallNode {
+ private class Apply extends CryptographicOperation::Range instanceof DataFlow::CallNode {
DataFlow::Node input;
CryptographicAlgorithm algorithm; // non-functional
+ private Cipher cipher;
Apply() {
- exists(Cipher cipher |
- this = cipher.getAMemberCall("update") and
- super.getArgument(0) = input and
- algorithm = cipher.getAlgorithm()
- )
+ this = cipher.getAMemberCall("update") and
+ super.getArgument(0) = input and
+ algorithm = cipher.getAlgorithm()
}
- override DataFlow::Node getInput() { result = input }
+ override DataFlow::Node getAnInput() { result = input }
override CryptographicAlgorithm getAlgorithm() { result = algorithm }
+
+ override BlockMode getBlockMode() { result = cipher.(KeyCipher).getBlockMode() }
}
private class Key extends CryptographicKey {
@@ -586,7 +639,7 @@ private module Forge {
* A model of the md5 library.
*/
private module Md5 {
- private class Apply extends CryptographicOperation instanceof DataFlow::CallNode {
+ private class Apply extends CryptographicOperation::Range instanceof DataFlow::CallNode {
DataFlow::Node input;
CryptographicAlgorithm algorithm;
@@ -600,9 +653,12 @@ private module Md5 {
)
}
- override DataFlow::Node getInput() { result = input }
+ override DataFlow::Node getAnInput() { result = input }
override CryptographicAlgorithm getAlgorithm() { result = algorithm }
+
+ // not relevant for md5
+ override BlockMode getBlockMode() { none() }
}
}
@@ -610,7 +666,7 @@ private module Md5 {
* A model of the bcrypt, bcryptjs, bcrypt-nodejs libraries.
*/
private module Bcrypt {
- private class Apply extends CryptographicOperation instanceof DataFlow::CallNode {
+ private class Apply extends CryptographicOperation::Range instanceof DataFlow::CallNode {
DataFlow::Node input;
CryptographicAlgorithm algorithm;
@@ -633,9 +689,12 @@ private module Bcrypt {
)
}
- override DataFlow::Node getInput() { result = input }
+ override DataFlow::Node getAnInput() { result = input }
override CryptographicAlgorithm getAlgorithm() { result = algorithm }
+
+ // not relevant for bcrypt
+ override BlockMode getBlockMode() { none() }
}
}
@@ -643,7 +702,7 @@ private module Bcrypt {
* A model of the hasha library.
*/
private module Hasha {
- private class Apply extends CryptographicOperation instanceof DataFlow::CallNode {
+ private class Apply extends CryptographicOperation::Range instanceof DataFlow::CallNode {
DataFlow::Node input;
CryptographicAlgorithm algorithm;
@@ -659,9 +718,12 @@ private module Hasha {
)
}
- override DataFlow::Node getInput() { result = input }
+ override DataFlow::Node getAnInput() { result = input }
override CryptographicAlgorithm getAlgorithm() { result = algorithm }
+
+ // not relevant for hasha
+ override BlockMode getBlockMode() { none() }
}
}
diff --git a/javascript/ql/lib/semmle/javascript/security/dataflow/BrokenCryptoAlgorithmCustomizations.qll b/javascript/ql/lib/semmle/javascript/security/dataflow/BrokenCryptoAlgorithmCustomizations.qll
index 832f811f67b..01a5b1b260b 100644
--- a/javascript/ql/lib/semmle/javascript/security/dataflow/BrokenCryptoAlgorithmCustomizations.qll
+++ b/javascript/ql/lib/semmle/javascript/security/dataflow/BrokenCryptoAlgorithmCustomizations.qll
@@ -41,7 +41,7 @@ module BrokenCryptoAlgorithm {
WeakCryptographicOperationSink() {
exists(CryptographicOperation application |
application.getAlgorithm().isWeak() and
- this = application.getInput()
+ this = application.getAnInput()
)
}
}
diff --git a/javascript/ql/lib/semmle/javascript/security/dataflow/InsufficientPasswordHashCustomizations.qll b/javascript/ql/lib/semmle/javascript/security/dataflow/InsufficientPasswordHashCustomizations.qll
index 1697d55fe0b..8901be9962f 100644
--- a/javascript/ql/lib/semmle/javascript/security/dataflow/InsufficientPasswordHashCustomizations.qll
+++ b/javascript/ql/lib/semmle/javascript/security/dataflow/InsufficientPasswordHashCustomizations.qll
@@ -47,7 +47,7 @@ module InsufficientPasswordHash {
application.getAlgorithm().isWeak() or
not application.getAlgorithm() instanceof PasswordHashingAlgorithm
|
- this = application.getInput()
+ this = application.getAnInput()
)
}
}
diff --git a/javascript/ql/test/library-tests/CryptoLibraries/CryptographicOperation.ql b/javascript/ql/test/library-tests/CryptoLibraries/CryptographicOperation.ql
index d085113b9c7..10e5ae851f4 100644
--- a/javascript/ql/test/library-tests/CryptoLibraries/CryptographicOperation.ql
+++ b/javascript/ql/test/library-tests/CryptoLibraries/CryptographicOperation.ql
@@ -1,4 +1,4 @@
import javascript
from CryptographicOperation operation
-select operation, operation.getAlgorithm().getName(), operation.getInput()
+select operation, operation.getAlgorithm().getName(), operation.getAnInput()
From 1435ef186293ff4296931405cc9e50cc53aab905 Mon Sep 17 00:00:00 2001
From: Alex Ford
Date: Thu, 2 Feb 2023 20:20:03 +0000
Subject: [PATCH 158/415] CryptoAlgorithms: make
CryptographicAlgorithm#matchesName split on underscores
---
.../ql/lib/semmle/javascript/security/CryptoAlgorithms.qll | 4 ++--
python/ql/lib/semmle/python/concepts/CryptoAlgorithms.qll | 4 ++--
ruby/ql/lib/codeql/ruby/security/CryptoAlgorithms.qll | 4 ++--
3 files changed, 6 insertions(+), 6 deletions(-)
diff --git a/javascript/ql/lib/semmle/javascript/security/CryptoAlgorithms.qll b/javascript/ql/lib/semmle/javascript/security/CryptoAlgorithms.qll
index 22a2d1c1eb2..766f99c61da 100644
--- a/javascript/ql/lib/semmle/javascript/security/CryptoAlgorithms.qll
+++ b/javascript/ql/lib/semmle/javascript/security/CryptoAlgorithms.qll
@@ -40,12 +40,12 @@ abstract class CryptographicAlgorithm extends TCryptographicAlgorithm {
/**
* Holds if the name of this algorithm matches `name` modulo case,
- * white space, dashes, underscores, and anything after a dash in the name
+ * white space, dashes, underscores, and anything after a dash or underscore in the name
* (to ignore modes of operation, such as CBC or ECB).
*/
bindingset[name]
predicate matchesName(string name) {
- [name.toUpperCase(), name.toUpperCase().regexpCapture("^(\\w+)(?:-.*)?$", 1)]
+ [name.toUpperCase(), name.toUpperCase().regexpCapture("^([A-Z0-9]+)(?:(-|_).*)?$", 1)]
.regexpReplaceAll("[-_ ]", "") = getName()
}
diff --git a/python/ql/lib/semmle/python/concepts/CryptoAlgorithms.qll b/python/ql/lib/semmle/python/concepts/CryptoAlgorithms.qll
index 22a2d1c1eb2..766f99c61da 100644
--- a/python/ql/lib/semmle/python/concepts/CryptoAlgorithms.qll
+++ b/python/ql/lib/semmle/python/concepts/CryptoAlgorithms.qll
@@ -40,12 +40,12 @@ abstract class CryptographicAlgorithm extends TCryptographicAlgorithm {
/**
* Holds if the name of this algorithm matches `name` modulo case,
- * white space, dashes, underscores, and anything after a dash in the name
+ * white space, dashes, underscores, and anything after a dash or underscore in the name
* (to ignore modes of operation, such as CBC or ECB).
*/
bindingset[name]
predicate matchesName(string name) {
- [name.toUpperCase(), name.toUpperCase().regexpCapture("^(\\w+)(?:-.*)?$", 1)]
+ [name.toUpperCase(), name.toUpperCase().regexpCapture("^([A-Z0-9]+)(?:(-|_).*)?$", 1)]
.regexpReplaceAll("[-_ ]", "") = getName()
}
diff --git a/ruby/ql/lib/codeql/ruby/security/CryptoAlgorithms.qll b/ruby/ql/lib/codeql/ruby/security/CryptoAlgorithms.qll
index 22a2d1c1eb2..766f99c61da 100644
--- a/ruby/ql/lib/codeql/ruby/security/CryptoAlgorithms.qll
+++ b/ruby/ql/lib/codeql/ruby/security/CryptoAlgorithms.qll
@@ -40,12 +40,12 @@ abstract class CryptographicAlgorithm extends TCryptographicAlgorithm {
/**
* Holds if the name of this algorithm matches `name` modulo case,
- * white space, dashes, underscores, and anything after a dash in the name
+ * white space, dashes, underscores, and anything after a dash or underscore in the name
* (to ignore modes of operation, such as CBC or ECB).
*/
bindingset[name]
predicate matchesName(string name) {
- [name.toUpperCase(), name.toUpperCase().regexpCapture("^(\\w+)(?:-.*)?$", 1)]
+ [name.toUpperCase(), name.toUpperCase().regexpCapture("^([A-Z0-9]+)(?:(-|_).*)?$", 1)]
.regexpReplaceAll("[-_ ]", "") = getName()
}
From c25dc978df7cb6a74ac4175940ea853803fd9ef3 Mon Sep 17 00:00:00 2001
From: Alex Ford
Date: Thu, 2 Feb 2023 16:16:44 +0000
Subject: [PATCH 159/415] JS: add blockMode to CryptographicOperation tests
---
.../CryptographicOperation.expected | 62 +++++++++----------
.../CryptoLibraries/CryptographicOperation.ql | 14 ++++-
2 files changed, 44 insertions(+), 32 deletions(-)
diff --git a/javascript/ql/test/library-tests/CryptoLibraries/CryptographicOperation.expected b/javascript/ql/test/library-tests/CryptoLibraries/CryptographicOperation.expected
index 09134c235bc..6408535c0f4 100644
--- a/javascript/ql/test/library-tests/CryptoLibraries/CryptographicOperation.expected
+++ b/javascript/ql/test/library-tests/CryptoLibraries/CryptographicOperation.expected
@@ -1,31 +1,31 @@
-| tst.js:1:1:1:27 | asmCryp ... (input) | SHA256 | tst.js:1:22:1:26 | input |
-| tst.js:5:5:5:43 | jwcrypt ... retKey) | DSA | tst.js:5:19:5:23 | input |
-| tst.js:10:18:10:55 | cipher. ... 'hex') | AES192 | tst.js:10:32:10:39 | 'input1' |
-| tst.js:11:18:11:54 | cipher. ... 'hex') | AES192 | tst.js:11:31:11:38 | 'input2' |
-| tst.js:15:1:15:21 | hash.up ... nput1') | SHA256 | tst.js:15:13:15:20 | 'input1' |
-| tst.js:16:1:16:20 | hash.write('input2') | SHA256 | tst.js:16:12:16:19 | 'input2' |
-| tst.js:20:1:20:21 | hmac.up ... nput1') | SHA256 | tst.js:20:13:20:20 | 'input1' |
-| tst.js:21:1:21:20 | hmac.write('input2') | SHA256 | tst.js:21:12:21:19 | 'input2' |
-| tst.js:25:1:25:21 | sign.up ... nput1') | SHA256 | tst.js:25:13:25:20 | 'input1' |
-| tst.js:26:1:26:20 | sign.write('input2') | SHA256 | tst.js:26:12:26:19 | 'input2' |
-| tst.js:29:1:29:52 | CryptoJ ... y 123') | AES | tst.js:29:22:29:33 | 'my message' |
-| tst.js:32:1:32:31 | CryptoJ ... "Key") | SHA1 | tst.js:32:15:32:23 | "Message" |
-| tst.js:35:1:35:35 | CryptoJ ... "Key") | SHA1 | tst.js:35:19:35:27 | "Message" |
-| tst.js:37:1:37:64 | require ... y 123') | AES | tst.js:37:34:37:45 | 'my message' |
-| tst.js:39:1:39:43 | require ... "Key") | SHA1 | tst.js:39:27:39:35 | "Message" |
-| tst.js:41:1:41:34 | require ... ssage') | ED25519 | tst.js:41:22:41:33 | 'my message' |
-| tst.js:43:1:43:34 | require ... ssage') | SHA512 | tst.js:43:22:43:33 | 'my message' |
-| tst.js:45:1:45:39 | require ... ssage') | ED25519 | tst.js:45:27:45:38 | 'my message' |
-| tst.js:47:1:47:39 | require ... ssage') | SHA512 | tst.js:47:27:47:38 | 'my message' |
-| tst.js:49:1:49:41 | require ... ('abc') | SHA256 | tst.js:49:36:49:40 | 'abc' |
-| tst.js:51:1:51:51 | require ... ('abc') | SHA512 | tst.js:51:46:51:50 | 'abc' |
-| tst.js:53:1:53:86 | require ... y dog') | MD5 | tst.js:53:41:53:85 | 'The qu ... zy dog' |
-| tst.js:55:1:55:91 | require ... y dog') | MD5 | tst.js:55:46:55:90 | 'The qu ... zy dog' |
-| tst.js:57:1:57:65 | require ... ecret") | RC2 | tst.js:57:57:57:64 | "secret" |
-| tst.js:59:1:59:70 | require ... ecret") | 3DES | tst.js:59:62:59:69 | "secret" |
-| tst.js:61:1:61:25 | require ... ssage") | MD5 | tst.js:61:16:61:24 | "message" |
-| tst.js:63:1:63:32 | require ... ssword) | BCRYPT | tst.js:63:24:63:31 | password |
-| tst.js:65:1:65:36 | require ... ssword) | BCRYPT | tst.js:65:28:65:35 | password |
-| tst.js:67:1:67:34 | require ... ssword) | BCRYPT | tst.js:67:26:67:33 | password |
-| tst.js:69:1:69:39 | require ... ssword) | BCRYPT | tst.js:69:31:69:38 | password |
-| tst.js:71:1:71:49 | require ... md5" }) | MD5 | tst.js:71:18:71:26 | 'unicorn' |
+| tst.js:1:1:1:27 | asmCryp ... (input) | SHA256 | tst.js:1:22:1:26 | input | |
+| tst.js:5:5:5:43 | jwcrypt ... retKey) | DSA | tst.js:5:19:5:23 | input | |
+| tst.js:10:18:10:55 | cipher. ... 'hex') | AES192 | tst.js:10:32:10:39 | 'input1' | CBC |
+| tst.js:11:18:11:54 | cipher. ... 'hex') | AES192 | tst.js:11:31:11:38 | 'input2' | CBC |
+| tst.js:15:1:15:21 | hash.up ... nput1') | SHA256 | tst.js:15:13:15:20 | 'input1' | |
+| tst.js:16:1:16:20 | hash.write('input2') | SHA256 | tst.js:16:12:16:19 | 'input2' | |
+| tst.js:20:1:20:21 | hmac.up ... nput1') | SHA256 | tst.js:20:13:20:20 | 'input1' | |
+| tst.js:21:1:21:20 | hmac.write('input2') | SHA256 | tst.js:21:12:21:19 | 'input2' | |
+| tst.js:25:1:25:21 | sign.up ... nput1') | SHA256 | tst.js:25:13:25:20 | 'input1' | |
+| tst.js:26:1:26:20 | sign.write('input2') | SHA256 | tst.js:26:12:26:19 | 'input2' | |
+| tst.js:29:1:29:52 | CryptoJ ... y 123') | AES | tst.js:29:22:29:33 | 'my message' | CBC |
+| tst.js:32:1:32:31 | CryptoJ ... "Key") | SHA1 | tst.js:32:15:32:23 | "Message" | |
+| tst.js:35:1:35:35 | CryptoJ ... "Key") | SHA1 | tst.js:35:19:35:27 | "Message" | |
+| tst.js:37:1:37:64 | require ... y 123') | AES | tst.js:37:34:37:45 | 'my message' | CBC |
+| tst.js:39:1:39:43 | require ... "Key") | SHA1 | tst.js:39:27:39:35 | "Message" | |
+| tst.js:41:1:41:34 | require ... ssage') | ED25519 | tst.js:41:22:41:33 | 'my message' | |
+| tst.js:43:1:43:34 | require ... ssage') | SHA512 | tst.js:43:22:43:33 | 'my message' | |
+| tst.js:45:1:45:39 | require ... ssage') | ED25519 | tst.js:45:27:45:38 | 'my message' | |
+| tst.js:47:1:47:39 | require ... ssage') | SHA512 | tst.js:47:27:47:38 | 'my message' | |
+| tst.js:49:1:49:41 | require ... ('abc') | SHA256 | tst.js:49:36:49:40 | 'abc' | |
+| tst.js:51:1:51:51 | require ... ('abc') | SHA512 | tst.js:51:46:51:50 | 'abc' | |
+| tst.js:53:1:53:86 | require ... y dog') | MD5 | tst.js:53:41:53:85 | 'The qu ... zy dog' | |
+| tst.js:55:1:55:91 | require ... y dog') | MD5 | tst.js:55:46:55:90 | 'The qu ... zy dog' | |
+| tst.js:57:1:57:65 | require ... ecret") | RC2 | tst.js:57:57:57:64 | "secret" | |
+| tst.js:59:1:59:70 | require ... ecret") | 3DES | tst.js:59:62:59:69 | "secret" | CBC |
+| tst.js:61:1:61:25 | require ... ssage") | MD5 | tst.js:61:16:61:24 | "message" | |
+| tst.js:63:1:63:32 | require ... ssword) | BCRYPT | tst.js:63:24:63:31 | password | |
+| tst.js:65:1:65:36 | require ... ssword) | BCRYPT | tst.js:65:28:65:35 | password | |
+| tst.js:67:1:67:34 | require ... ssword) | BCRYPT | tst.js:67:26:67:33 | password | |
+| tst.js:69:1:69:39 | require ... ssword) | BCRYPT | tst.js:69:31:69:38 | password | |
+| tst.js:71:1:71:49 | require ... md5" }) | MD5 | tst.js:71:18:71:26 | 'unicorn' | |
diff --git a/javascript/ql/test/library-tests/CryptoLibraries/CryptographicOperation.ql b/javascript/ql/test/library-tests/CryptoLibraries/CryptographicOperation.ql
index 10e5ae851f4..33b56f278a0 100644
--- a/javascript/ql/test/library-tests/CryptoLibraries/CryptographicOperation.ql
+++ b/javascript/ql/test/library-tests/CryptoLibraries/CryptographicOperation.ql
@@ -1,4 +1,16 @@
import javascript
+string getBlockMode(CryptographicOperation operation) {
+ if
+ operation.getAlgorithm() instanceof EncryptionAlgorithm and
+ not operation.getAlgorithm().(EncryptionAlgorithm).isStreamCipher()
+ then
+ if exists(operation.getBlockMode())
+ then result = operation.getBlockMode()
+ else result = ""
+ else result = ""
+}
+
from CryptographicOperation operation
-select operation, operation.getAlgorithm().getName(), operation.getAnInput()
+select operation, operation.getAlgorithm().getName(), operation.getAnInput(),
+ getBlockMode(operation)
From aa2c532a7892cfedfc9f3d5d91470ce384efbf3c Mon Sep 17 00:00:00 2001
From: Alex Ford
Date: Thu, 2 Feb 2023 20:22:02 +0000
Subject: [PATCH 160/415] JS: adjust test whitespace
---
.../CryptographicOperation.expected | 60 +++++++++----------
.../test/library-tests/CryptoLibraries/tst.js | 11 ++++
2 files changed, 41 insertions(+), 30 deletions(-)
diff --git a/javascript/ql/test/library-tests/CryptoLibraries/CryptographicOperation.expected b/javascript/ql/test/library-tests/CryptoLibraries/CryptographicOperation.expected
index 6408535c0f4..cd01b1d1b42 100644
--- a/javascript/ql/test/library-tests/CryptoLibraries/CryptographicOperation.expected
+++ b/javascript/ql/test/library-tests/CryptoLibraries/CryptographicOperation.expected
@@ -1,31 +1,31 @@
| tst.js:1:1:1:27 | asmCryp ... (input) | SHA256 | tst.js:1:22:1:26 | input | |
-| tst.js:5:5:5:43 | jwcrypt ... retKey) | DSA | tst.js:5:19:5:23 | input | |
-| tst.js:10:18:10:55 | cipher. ... 'hex') | AES192 | tst.js:10:32:10:39 | 'input1' | CBC |
-| tst.js:11:18:11:54 | cipher. ... 'hex') | AES192 | tst.js:11:31:11:38 | 'input2' | CBC |
-| tst.js:15:1:15:21 | hash.up ... nput1') | SHA256 | tst.js:15:13:15:20 | 'input1' | |
-| tst.js:16:1:16:20 | hash.write('input2') | SHA256 | tst.js:16:12:16:19 | 'input2' | |
-| tst.js:20:1:20:21 | hmac.up ... nput1') | SHA256 | tst.js:20:13:20:20 | 'input1' | |
-| tst.js:21:1:21:20 | hmac.write('input2') | SHA256 | tst.js:21:12:21:19 | 'input2' | |
-| tst.js:25:1:25:21 | sign.up ... nput1') | SHA256 | tst.js:25:13:25:20 | 'input1' | |
-| tst.js:26:1:26:20 | sign.write('input2') | SHA256 | tst.js:26:12:26:19 | 'input2' | |
-| tst.js:29:1:29:52 | CryptoJ ... y 123') | AES | tst.js:29:22:29:33 | 'my message' | CBC |
-| tst.js:32:1:32:31 | CryptoJ ... "Key") | SHA1 | tst.js:32:15:32:23 | "Message" | |
-| tst.js:35:1:35:35 | CryptoJ ... "Key") | SHA1 | tst.js:35:19:35:27 | "Message" | |
-| tst.js:37:1:37:64 | require ... y 123') | AES | tst.js:37:34:37:45 | 'my message' | CBC |
-| tst.js:39:1:39:43 | require ... "Key") | SHA1 | tst.js:39:27:39:35 | "Message" | |
-| tst.js:41:1:41:34 | require ... ssage') | ED25519 | tst.js:41:22:41:33 | 'my message' | |
-| tst.js:43:1:43:34 | require ... ssage') | SHA512 | tst.js:43:22:43:33 | 'my message' | |
-| tst.js:45:1:45:39 | require ... ssage') | ED25519 | tst.js:45:27:45:38 | 'my message' | |
-| tst.js:47:1:47:39 | require ... ssage') | SHA512 | tst.js:47:27:47:38 | 'my message' | |
-| tst.js:49:1:49:41 | require ... ('abc') | SHA256 | tst.js:49:36:49:40 | 'abc' | |
-| tst.js:51:1:51:51 | require ... ('abc') | SHA512 | tst.js:51:46:51:50 | 'abc' | |
-| tst.js:53:1:53:86 | require ... y dog') | MD5 | tst.js:53:41:53:85 | 'The qu ... zy dog' | |
-| tst.js:55:1:55:91 | require ... y dog') | MD5 | tst.js:55:46:55:90 | 'The qu ... zy dog' | |
-| tst.js:57:1:57:65 | require ... ecret") | RC2 | tst.js:57:57:57:64 | "secret" | |
-| tst.js:59:1:59:70 | require ... ecret") | 3DES | tst.js:59:62:59:69 | "secret" | CBC |
-| tst.js:61:1:61:25 | require ... ssage") | MD5 | tst.js:61:16:61:24 | "message" | |
-| tst.js:63:1:63:32 | require ... ssword) | BCRYPT | tst.js:63:24:63:31 | password | |
-| tst.js:65:1:65:36 | require ... ssword) | BCRYPT | tst.js:65:28:65:35 | password | |
-| tst.js:67:1:67:34 | require ... ssword) | BCRYPT | tst.js:67:26:67:33 | password | |
-| tst.js:69:1:69:39 | require ... ssword) | BCRYPT | tst.js:69:31:69:38 | password | |
-| tst.js:71:1:71:49 | require ... md5" }) | MD5 | tst.js:71:18:71:26 | 'unicorn' | |
+| tst.js:7:5:7:43 | jwcrypt ... retKey) | DSA | tst.js:7:19:7:23 | input | |
+| tst.js:12:18:12:55 | cipher. ... 'hex') | AES192 | tst.js:12:32:12:39 | 'input1' | CBC |
+| tst.js:13:18:13:54 | cipher. ... 'hex') | AES192 | tst.js:13:31:13:38 | 'input2' | CBC |
+| tst.js:17:1:17:21 | hash.up ... nput1') | SHA256 | tst.js:17:13:17:20 | 'input1' | |
+| tst.js:18:1:18:20 | hash.write('input2') | SHA256 | tst.js:18:12:18:19 | 'input2' | |
+| tst.js:22:1:22:21 | hmac.up ... nput1') | SHA256 | tst.js:22:13:22:20 | 'input1' | |
+| tst.js:23:1:23:20 | hmac.write('input2') | SHA256 | tst.js:23:12:23:19 | 'input2' | |
+| tst.js:27:1:27:21 | sign.up ... nput1') | SHA256 | tst.js:27:13:27:20 | 'input1' | |
+| tst.js:28:1:28:20 | sign.write('input2') | SHA256 | tst.js:28:12:28:19 | 'input2' | |
+| tst.js:36:1:36:52 | CryptoJ ... y 123') | AES | tst.js:36:22:36:33 | 'my message' | CBC |
+| tst.js:39:1:39:31 | CryptoJ ... "Key") | SHA1 | tst.js:39:15:39:23 | "Message" | |
+| tst.js:42:1:42:35 | CryptoJ ... "Key") | SHA1 | tst.js:42:19:42:27 | "Message" | |
+| tst.js:44:1:44:64 | require ... y 123') | AES | tst.js:44:34:44:45 | 'my message' | CBC |
+| tst.js:46:1:46:43 | require ... "Key") | SHA1 | tst.js:46:27:46:35 | "Message" | |
+| tst.js:52:1:52:34 | require ... ssage') | ED25519 | tst.js:52:22:52:33 | 'my message' | |
+| tst.js:54:1:54:34 | require ... ssage') | SHA512 | tst.js:54:22:54:33 | 'my message' | |
+| tst.js:56:1:56:39 | require ... ssage') | ED25519 | tst.js:56:27:56:38 | 'my message' | |
+| tst.js:58:1:58:39 | require ... ssage') | SHA512 | tst.js:58:27:58:38 | 'my message' | |
+| tst.js:60:1:60:41 | require ... ('abc') | SHA256 | tst.js:60:36:60:40 | 'abc' | |
+| tst.js:62:1:62:51 | require ... ('abc') | SHA512 | tst.js:62:46:62:50 | 'abc' | |
+| tst.js:64:1:64:86 | require ... y dog') | MD5 | tst.js:64:41:64:85 | 'The qu ... zy dog' | |
+| tst.js:66:1:66:91 | require ... y dog') | MD5 | tst.js:66:46:66:90 | 'The qu ... zy dog' | |
+| tst.js:68:1:68:65 | require ... ecret") | RC2 | tst.js:68:57:68:64 | "secret" | |
+| tst.js:70:1:70:70 | require ... ecret") | 3DES | tst.js:70:62:70:69 | "secret" | CBC |
+| tst.js:72:1:72:25 | require ... ssage") | MD5 | tst.js:72:16:72:24 | "message" | |
+| tst.js:74:1:74:32 | require ... ssword) | BCRYPT | tst.js:74:24:74:31 | password | |
+| tst.js:76:1:76:36 | require ... ssword) | BCRYPT | tst.js:76:28:76:35 | password | |
+| tst.js:78:1:78:34 | require ... ssword) | BCRYPT | tst.js:78:26:78:33 | password | |
+| tst.js:80:1:80:39 | require ... ssword) | BCRYPT | tst.js:80:31:80:38 | password | |
+| tst.js:82:1:82:49 | require ... md5" }) | MD5 | tst.js:82:18:82:26 | 'unicorn' | |
diff --git a/javascript/ql/test/library-tests/CryptoLibraries/tst.js b/javascript/ql/test/library-tests/CryptoLibraries/tst.js
index 5efd150bd16..a7aa47359d5 100644
--- a/javascript/ql/test/library-tests/CryptoLibraries/tst.js
+++ b/javascript/ql/test/library-tests/CryptoLibraries/tst.js
@@ -1,5 +1,7 @@
asmCrypto.SHA256.hex(input);
+
+
var jwcrypto = require("browserid-crypto");
jwcrypto.generateKeypair({algorithm: 'DSA'}, function(err, keypair) {
jwcrypto.sign(input, keypair.secretKey);
@@ -25,6 +27,11 @@ const sign = crypto.createSign('SHA256');
sign.update('input1');
sign.write('input2');
+
+
+
+
+
var CryptoJS = require("crypto-js");
CryptoJS.AES.encrypt('my message', 'secret key 123');
@@ -38,6 +45,10 @@ require("crypto-js/aes").encrypt('my message', 'secret key 123');
require("crypto-js/sha1")("Message", "Key");
+
+
+
+
require("nacl").sign('my message');
require("nacl").hash('my message');
From b0b8f8725ea6376cc1c796eca3077110bf17ebad Mon Sep 17 00:00:00 2001
From: Alex Ford
Date: Thu, 2 Feb 2023 20:22:45 +0000
Subject: [PATCH 161/415] JS: add some CryptographicOperation#getBlockMode()
tests
---
.../CryptographicOperation.expected | 4 ++++
.../ql/test/library-tests/CryptoLibraries/tst.js | 16 ++++++++--------
2 files changed, 12 insertions(+), 8 deletions(-)
diff --git a/javascript/ql/test/library-tests/CryptoLibraries/CryptographicOperation.expected b/javascript/ql/test/library-tests/CryptoLibraries/CryptographicOperation.expected
index cd01b1d1b42..0844cb2c577 100644
--- a/javascript/ql/test/library-tests/CryptoLibraries/CryptographicOperation.expected
+++ b/javascript/ql/test/library-tests/CryptoLibraries/CryptographicOperation.expected
@@ -1,4 +1,5 @@
| tst.js:1:1:1:27 | asmCryp ... (input) | SHA256 | tst.js:1:22:1:26 | input | |
+| tst.js:3:1:3:41 | asmCryp ... ey, iv) | AES | tst.js:3:27:3:31 | input | OFB |
| tst.js:7:5:7:43 | jwcrypt ... retKey) | DSA | tst.js:7:19:7:23 | input | |
| tst.js:12:18:12:55 | cipher. ... 'hex') | AES192 | tst.js:12:32:12:39 | 'input1' | CBC |
| tst.js:13:18:13:54 | cipher. ... 'hex') | AES192 | tst.js:13:31:13:38 | 'input2' | CBC |
@@ -8,11 +9,14 @@
| tst.js:23:1:23:20 | hmac.write('input2') | SHA256 | tst.js:23:12:23:19 | 'input2' | |
| tst.js:27:1:27:21 | sign.up ... nput1') | SHA256 | tst.js:27:13:27:20 | 'input1' | |
| tst.js:28:1:28:20 | sign.write('input2') | SHA256 | tst.js:28:12:28:19 | 'input2' | |
+| tst.js:32:1:32:38 | cipher. ... 'hex') | AES | tst.js:32:15:32:22 | 'input1' | ECB |
+| tst.js:33:1:33:37 | cipher. ... 'hex') | AES | tst.js:33:14:33:21 | 'input2' | ECB |
| tst.js:36:1:36:52 | CryptoJ ... y 123') | AES | tst.js:36:22:36:33 | 'my message' | CBC |
| tst.js:39:1:39:31 | CryptoJ ... "Key") | SHA1 | tst.js:39:15:39:23 | "Message" | |
| tst.js:42:1:42:35 | CryptoJ ... "Key") | SHA1 | tst.js:42:19:42:27 | "Message" |