From cfbc3f73ec47a7d8e69311f280019b926f5ffaab Mon Sep 17 00:00:00 2001 From: Rasmus Lerchedahl Petersen Date: Fri, 15 Mar 2024 15:13:39 +0100 Subject: [PATCH 1/3] Pyhton: add test for conflicting summaries We noticed that when - a function has more than one summary (with different charpred) - one summary is subsumed by a subpath (or something happens around the function being extracted) - the function is called multiple times(we needed at least three) one of the summaries would no longer lead to flow. --- .../summaries/InlineTaintTest.expected | 4 + .../dataflow/summaries/InlineTaintTest.ql | 4 + .../dataflow/summaries/TestSummaries.qll | 105 ++++++++++++++++++ .../summaries/conflicting_summaries.py | 18 +++ .../summaries/extracted_package/functions.py | 5 + 5 files changed, 136 insertions(+) create mode 100644 python/ql/test/experimental/dataflow/summaries/InlineTaintTest.expected create mode 100644 python/ql/test/experimental/dataflow/summaries/InlineTaintTest.ql create mode 100644 python/ql/test/experimental/dataflow/summaries/conflicting_summaries.py create mode 100644 python/ql/test/experimental/dataflow/summaries/extracted_package/functions.py diff --git a/python/ql/test/experimental/dataflow/summaries/InlineTaintTest.expected b/python/ql/test/experimental/dataflow/summaries/InlineTaintTest.expected new file mode 100644 index 00000000000..366de37b867 --- /dev/null +++ b/python/ql/test/experimental/dataflow/summaries/InlineTaintTest.expected @@ -0,0 +1,4 @@ +argumentToEnsureNotTaintedNotMarkedAsSpurious +untaintedArgumentToEnsureTaintedNotMarkedAsMissing +testFailures +failures diff --git a/python/ql/test/experimental/dataflow/summaries/InlineTaintTest.ql b/python/ql/test/experimental/dataflow/summaries/InlineTaintTest.ql new file mode 100644 index 00000000000..96cc5c3e31f --- /dev/null +++ b/python/ql/test/experimental/dataflow/summaries/InlineTaintTest.ql @@ -0,0 +1,4 @@ +import python +import experimental.meta.InlineTaintTest +import MakeInlineTaintTest +import TestSummaries diff --git a/python/ql/test/experimental/dataflow/summaries/TestSummaries.qll b/python/ql/test/experimental/dataflow/summaries/TestSummaries.qll index b2e29e9999e..534fac62491 100644 --- a/python/ql/test/experimental/dataflow/summaries/TestSummaries.qll +++ b/python/ql/test/experimental/dataflow/summaries/TestSummaries.qll @@ -136,3 +136,108 @@ private class SummarizedCallableJsonLoads extends SummarizedCallable { preservesValue = true } } + +// Repeated summaries +private class SummarizedCallableWithSubpath extends SummarizedCallable { + SummarizedCallableWithSubpath() { this = "extracted_package.functions.with_subpath" } + + override DataFlow::CallCfgNode getACall() { + result = + API::moduleImport("extracted_package") + .getMember("functions") + .getMember("with_subpath") + .getACall() + } + + override DataFlow::ArgumentNode getACallback() { + result = + API::moduleImport("extracted_package") + .getMember("functions") + .getMember("with_subpath") + .getAValueReachableFromSource() + } + + override predicate propagatesFlow(string input, string output, boolean preservesValue) { + input = "Argument[0]" and + output = "ReturnValue" and + preservesValue = false + } +} + +private class SummarizedCallableWithSubpathAgain extends SummarizedCallable { + SummarizedCallableWithSubpathAgain() { this = "extracted_package.functions.with_subpathII" } + + override DataFlow::CallCfgNode getACall() { + result = + API::moduleImport("extracted_package") + .getMember("functions") + .getMember("with_subpath") + .getACall() + } + + override DataFlow::ArgumentNode getACallback() { + result = + API::moduleImport("extracted_package") + .getMember("functions") + .getMember("with_subpath") + .getAValueReachableFromSource() + } + + override predicate propagatesFlow(string input, string output, boolean preservesValue) { + input = "Argument[0]" and + output = "ReturnValue.Attribute[pattern]" and + preservesValue = true + } +} + +private class SummarizedCallableWithoutSubpath extends SummarizedCallable { + SummarizedCallableWithoutSubpath() { this = "extracted_package.functions.without_subpath" } + + override DataFlow::CallCfgNode getACall() { + result = + API::moduleImport("extracted_package") + .getMember("functions") + .getMember("without_subpath") + .getACall() + } + + override DataFlow::ArgumentNode getACallback() { + result = + API::moduleImport("extracted_package") + .getMember("functions") + .getMember("without_subpath") + .getAValueReachableFromSource() + } + + override predicate propagatesFlow(string input, string output, boolean preservesValue) { + input = "Argument[0]" and + output = "ReturnValue" and + preservesValue = false + } +} + +private class SummarizedCallableWithoutSubpathAgain extends SummarizedCallable { + SummarizedCallableWithoutSubpathAgain() { this = "extracted_package.functions.without_subpathII" } + + override DataFlow::CallCfgNode getACall() { + result = + API::moduleImport("extracted_package") + .getMember("functions") + .getMember("without_subpath") + .getACall() + } + + override DataFlow::ArgumentNode getACallback() { + result = + API::moduleImport("extracted_package") + .getMember("functions") + .getMember("without_subpath") + .getAValueReachableFromSource() + } + + override predicate propagatesFlow(string input, string output, boolean preservesValue) { + input = "Argument[0]" and + output = "ReturnValue.Attribute[pattern]" and + preservesValue = true + } +} diff --git a/python/ql/test/experimental/dataflow/summaries/conflicting_summaries.py b/python/ql/test/experimental/dataflow/summaries/conflicting_summaries.py new file mode 100644 index 00000000000..9528e9cdafc --- /dev/null +++ b/python/ql/test/experimental/dataflow/summaries/conflicting_summaries.py @@ -0,0 +1,18 @@ +# Bad interaction of two summaries for the same function +ts = TAINTED_STRING + +from extracted_package.functions import with_subpath, without_subpath + +# For the function `with_subpath`, flow from the first argument to the return value +# can be concluded from its definition. This seems to discard all summaries, including +# the one with flow to `ReturnValue.Attribute[pattern]`. +ensure_tainted( + with_subpath(ts).pattern, # $ MISSING: tainted + with_subpath(ts), # $ tainted + with_subpath(ts), # $ tainted +) +ensure_tainted( + without_subpath(ts).pattern, # $ tainted + without_subpath(ts), # $ tainted + without_subpath(ts), # $ tainted +) \ No newline at end of file diff --git a/python/ql/test/experimental/dataflow/summaries/extracted_package/functions.py b/python/ql/test/experimental/dataflow/summaries/extracted_package/functions.py new file mode 100644 index 00000000000..f4780be20a3 --- /dev/null +++ b/python/ql/test/experimental/dataflow/summaries/extracted_package/functions.py @@ -0,0 +1,5 @@ +def with_subpath(x): + return x + +def without_subpath(x): + pass \ No newline at end of file From 45c65b48aaa747a3198d59618fa64505af90d4a1 Mon Sep 17 00:00:00 2001 From: Rasmus Lerchedahl Petersen Date: Mon, 18 Mar 2024 08:49:31 +0100 Subject: [PATCH 2/3] python: make it a real package so python2 also respects it --- .../experimental/dataflow/summaries/extracted_package/__init__.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 python/ql/test/experimental/dataflow/summaries/extracted_package/__init__.py diff --git a/python/ql/test/experimental/dataflow/summaries/extracted_package/__init__.py b/python/ql/test/experimental/dataflow/summaries/extracted_package/__init__.py new file mode 100644 index 00000000000..e69de29bb2d From 2a0c451d2d3ceeeb4a6d10a619cbe084e0f133bd Mon Sep 17 00:00:00 2001 From: Rasmus Lerchedahl Petersen Date: Mon, 18 Mar 2024 10:29:36 +0100 Subject: [PATCH 3/3] python: No `fieldFlowBranchLimit` for `SummarizedCallable`s Like https://github.com/github/codeql/pull/15689 for Ruby. --- .../python/dataflow/new/internal/DataFlowImplSpecific.qll | 2 ++ .../experimental/dataflow/summaries/conflicting_summaries.py | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowImplSpecific.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowImplSpecific.qll index cffdefe41ba..704af5b08e7 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowImplSpecific.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowImplSpecific.qll @@ -22,4 +22,6 @@ module PythonDataFlow implements InputSig { predicate neverSkipInPathGraph = Private::neverSkipInPathGraph/1; Node exprNode(DataFlowExpr e) { result = Public::exprNode(e) } + + predicate ignoreFieldFlowBranchLimit(DataFlowCallable c) { exists(c.asLibraryCallable()) } } diff --git a/python/ql/test/experimental/dataflow/summaries/conflicting_summaries.py b/python/ql/test/experimental/dataflow/summaries/conflicting_summaries.py index 9528e9cdafc..7b031f37a9f 100644 --- a/python/ql/test/experimental/dataflow/summaries/conflicting_summaries.py +++ b/python/ql/test/experimental/dataflow/summaries/conflicting_summaries.py @@ -7,7 +7,7 @@ from extracted_package.functions import with_subpath, without_subpath # can be concluded from its definition. This seems to discard all summaries, including # the one with flow to `ReturnValue.Attribute[pattern]`. ensure_tainted( - with_subpath(ts).pattern, # $ MISSING: tainted + with_subpath(ts).pattern, # $ tainted with_subpath(ts), # $ tainted with_subpath(ts), # $ tainted )