From 44bb41e84b9393da0fd681697f8fa1ce8c2aa9c3 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Wed, 27 Jan 2021 19:06:51 +0100 Subject: [PATCH 1/3] Python: Add extra type-tracking test for "long" import chain While trying to debug an other problem related to full import of django view, I stumbled upon this oddity. (yikes) --- .../dataflow/typetracking/test.py | 19 +++++ .../dataflow/typetracking/tracked.ql | 70 +++++++++++++++++++ 2 files changed, 89 insertions(+) diff --git a/python/ql/test/experimental/dataflow/typetracking/test.py b/python/ql/test/experimental/dataflow/typetracking/test.py index b03a2e6912d..a797c302023 100644 --- a/python/ql/test/experimental/dataflow/typetracking/test.py +++ b/python/ql/test/experimental/dataflow/typetracking/test.py @@ -101,3 +101,22 @@ class Bar(Foo): self.meth1() # $ tracked_self super().meth2() super(Bar, self).foo3() # $ tracked_self + +# ------------------------------------------------------------------------------ +# Tracking of attribute lookup after "long" import chain +# ------------------------------------------------------------------------------ + +def test_long_import_chain(): + import foo.bar + foo.baz # $ SPURIOUS: tracked_foo_bar_baz + x = foo.bar.baz # $ tracked_foo_bar_baz + do_stuff(x) # $ tracked_foo_bar_baz + + class Example(foo.bar.baz): # $ tracked_foo_bar_baz + pass + + +def test_long_import_chain_full_path(): + from foo.bar import baz # $ tracked_foo_bar_baz + x = baz # $ tracked_foo_bar_baz + do_stuff(x) # $ tracked_foo_bar_baz diff --git a/python/ql/test/experimental/dataflow/typetracking/tracked.ql b/python/ql/test/experimental/dataflow/typetracking/tracked.ql index 4ce35a2d265..0797e484c97 100644 --- a/python/ql/test/experimental/dataflow/typetracking/tracked.ql +++ b/python/ql/test/experimental/dataflow/typetracking/tracked.ql @@ -3,6 +3,9 @@ import semmle.python.dataflow.new.DataFlow import semmle.python.dataflow.new.TypeTracker import TestUtilities.InlineExpectationsTest +// ----------------------------------------------------------------------------- +// tracked +// ----------------------------------------------------------------------------- DataFlow::Node tracked(TypeTracker t) { t.start() and result.asCfgNode() = any(NameNode n | n.getId() = "tracked") @@ -28,6 +31,9 @@ class TrackedTest extends InlineExpectationsTest { } } +// ----------------------------------------------------------------------------- +// int + str +// ----------------------------------------------------------------------------- DataFlow::Node int_type(TypeTracker t) { t.start() and result.asCfgNode() = any(CallNode c | c.getFunction().(NameNode).getId() = "int") @@ -74,6 +80,9 @@ class TrackedStringTest extends InlineExpectationsTest { } } +// ----------------------------------------------------------------------------- +// tracked_self +// ----------------------------------------------------------------------------- DataFlow::Node tracked_self(TypeTracker t) { t.start() and exists(Function f | @@ -102,3 +111,64 @@ class TrackedSelfTest extends InlineExpectationsTest { ) } } + +// ----------------------------------------------------------------------------- +// tracked_foo_bar_baz +// ----------------------------------------------------------------------------- +// This modeling follows the same pattern that we currently use in our real library modeling. +/** Gets a reference to `foo` (fictive module). */ +private DataFlow::Node foo(DataFlow::TypeTracker t) { + t.start() and + result = DataFlow::importNode("foo") + or + exists(DataFlow::TypeTracker t2 | result = foo(t2).track(t2, t)) +} + +/** Gets a reference to `foo` (fictive module). */ +DataFlow::Node foo() { result = foo(DataFlow::TypeTracker::end()) } + +/** Gets a reference to `foo.bar` (fictive module). */ +private DataFlow::Node foo_bar(DataFlow::TypeTracker t) { + t.start() and + result = DataFlow::importNode("foo.bar") + or + t.startInAttr("bar") and + result = foo() + or + exists(DataFlow::TypeTracker t2 | result = foo_bar(t2).track(t2, t)) +} + +/** Gets a reference to `foo.bar` (fictive module). */ +DataFlow::Node foo_bar() { result = foo_bar(DataFlow::TypeTracker::end()) } + +/** Gets a reference to `foo.bar.baz` (fictive attribute on `foo.bar` module). */ +private DataFlow::Node foo_bar_baz(DataFlow::TypeTracker t) { + t.start() and + result = DataFlow::importNode("foo.bar.baz") + or + t.startInAttr("baz") and + result = foo_bar() + or + exists(DataFlow::TypeTracker t2 | result = foo_bar_baz(t2).track(t2, t)) +} + +/** Gets a reference to `foo.bar.baz` (fictive attribute on `foo.bar` module). */ +DataFlow::Node foo_bar_baz() { result = foo_bar_baz(DataFlow::TypeTracker::end()) } + +class TrackedFooBarBaz extends InlineExpectationsTest { + TrackedFooBarBaz() { this = "TrackedFooBarBaz" } + + override string getARelevantTag() { result = "tracked_foo_bar_baz" } + + override predicate hasActualResult(Location location, string element, string tag, string value) { + exists(DataFlow::Node e | + e = foo_bar_baz() and + // Module variables have no sensible location, and hence can't be annotated. + not e instanceof DataFlow::ModuleVariableNode and + tag = "tracked_foo_bar_baz" and + location = e.getLocation() and + value = "" and + element = e.toString() + ) + } +} From 0d42e546a07f180ce5a9e3e4852cd06b10fab107 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Wed, 27 Jan 2021 19:08:50 +0100 Subject: [PATCH 2/3] Python: Add deep import chain to import-helper tests --- .../experimental/dataflow/import-helper/ImportHelper.expected | 4 ++++ .../ql/test/experimental/dataflow/import-helper/test_deep.py | 3 +++ 2 files changed, 7 insertions(+) create mode 100644 python/ql/test/experimental/dataflow/import-helper/test_deep.py diff --git a/python/ql/test/experimental/dataflow/import-helper/ImportHelper.expected b/python/ql/test/experimental/dataflow/import-helper/ImportHelper.expected index 9d263331f6a..a30908932da 100644 --- a/python/ql/test/experimental/dataflow/import-helper/ImportHelper.expected +++ b/python/ql/test/experimental/dataflow/import-helper/ImportHelper.expected @@ -21,3 +21,7 @@ | test7.py:5:8:5:16 | ControlFlowNode for ImportExpr | mypkg.foo | | test7.py:9:6:9:10 | ControlFlowNode for ImportExpr | mypkg | | test7.py:9:19:9:21 | ControlFlowNode for ImportMember | mypkg.foo | +| test_deep.py:1:6:1:21 | ControlFlowNode for ImportExpr | start.middle.end | +| test_deep.py:1:6:1:21 | ControlFlowNode for ImportExpr | start.middle.end | +| test_deep.py:1:30:1:32 | ControlFlowNode for ImportMember | start.middle.end.foo | +| test_deep.py:1:35:1:37 | ControlFlowNode for ImportMember | start.middle.end.bar | diff --git a/python/ql/test/experimental/dataflow/import-helper/test_deep.py b/python/ql/test/experimental/dataflow/import-helper/test_deep.py new file mode 100644 index 00000000000..49c4bc6c184 --- /dev/null +++ b/python/ql/test/experimental/dataflow/import-helper/test_deep.py @@ -0,0 +1,3 @@ +from start.middle.end import foo, bar +print(foo) +print(bar) From 5646af56dd5a9bc3c11f02ccd9151c6243b17e04 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Wed, 27 Jan 2021 19:11:55 +0100 Subject: [PATCH 3/3] Python: Fix too many results from DataFlow::importNode --- .../semmle/python/dataflow/new/internal/DataFlowUtil.qll | 8 +++++++- .../dataflow/import-helper/ImportHelper.expected | 4 ---- python/ql/test/experimental/dataflow/typetracking/test.py | 2 +- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/python/ql/src/semmle/python/dataflow/new/internal/DataFlowUtil.qll b/python/ql/src/semmle/python/dataflow/new/internal/DataFlowUtil.qll index 8c9daf0c8d8..174564db96b 100644 --- a/python/ql/src/semmle/python/dataflow/new/internal/DataFlowUtil.qll +++ b/python/ql/src/semmle/python/dataflow/new/internal/DataFlowUtil.qll @@ -68,5 +68,11 @@ Node importNode(string name) { // Because named imports are modelled as `AttrRead`s, the statement `from foo import bar as baz` // is interpreted as if it was an assignment `baz = foo.bar`, which means `baz` gets tracked as a // reference to `foo.bar`, as desired. - result.asCfgNode().getNode() = any(ImportExpr i | i.getName() = name) + exists(ImportExpr imp_expr | + imp_expr.getName() = name and + result.asCfgNode().getNode() = imp_expr and + // in `import foo.bar` we DON'T want to give a result for `importNode("foo.bar")`, + // only for `importNode("foo")`. We exclude those cases with the following clause. + not exists(Import imp | imp.getAName().getValue() = imp_expr) + ) } diff --git a/python/ql/test/experimental/dataflow/import-helper/ImportHelper.expected b/python/ql/test/experimental/dataflow/import-helper/ImportHelper.expected index a30908932da..11608b04ab8 100644 --- a/python/ql/test/experimental/dataflow/import-helper/ImportHelper.expected +++ b/python/ql/test/experimental/dataflow/import-helper/ImportHelper.expected @@ -4,9 +4,7 @@ | test2.py:1:19:1:21 | ControlFlowNode for ImportMember | mypkg.foo | | test2.py:1:24:1:26 | ControlFlowNode for ImportMember | mypkg.bar | | test3.py:1:8:1:16 | ControlFlowNode for ImportExpr | mypkg | -| test3.py:1:8:1:16 | ControlFlowNode for ImportExpr | mypkg.foo | | test3.py:2:8:2:16 | ControlFlowNode for ImportExpr | mypkg | -| test3.py:2:8:2:16 | ControlFlowNode for ImportExpr | mypkg.bar | | test4.py:1:8:1:16 | ControlFlowNode for ImportExpr | mypkg.foo | | test4.py:2:8:2:16 | ControlFlowNode for ImportExpr | mypkg.bar | | test5.py:1:8:1:12 | ControlFlowNode for ImportExpr | mypkg | @@ -14,11 +12,9 @@ | test5.py:9:19:9:29 | ControlFlowNode for ImportMember | mypkg.bar | | test6.py:1:8:1:12 | ControlFlowNode for ImportExpr | mypkg | | test6.py:5:8:5:16 | ControlFlowNode for ImportExpr | mypkg | -| test6.py:5:8:5:16 | ControlFlowNode for ImportExpr | mypkg.foo | | test7.py:1:6:1:10 | ControlFlowNode for ImportExpr | mypkg | | test7.py:1:19:1:21 | ControlFlowNode for ImportMember | mypkg.foo | | test7.py:5:8:5:16 | ControlFlowNode for ImportExpr | mypkg | -| test7.py:5:8:5:16 | ControlFlowNode for ImportExpr | mypkg.foo | | test7.py:9:6:9:10 | ControlFlowNode for ImportExpr | mypkg | | test7.py:9:19:9:21 | ControlFlowNode for ImportMember | mypkg.foo | | test_deep.py:1:6:1:21 | ControlFlowNode for ImportExpr | start.middle.end | diff --git a/python/ql/test/experimental/dataflow/typetracking/test.py b/python/ql/test/experimental/dataflow/typetracking/test.py index a797c302023..a4dd1baeac1 100644 --- a/python/ql/test/experimental/dataflow/typetracking/test.py +++ b/python/ql/test/experimental/dataflow/typetracking/test.py @@ -108,7 +108,7 @@ class Bar(Foo): def test_long_import_chain(): import foo.bar - foo.baz # $ SPURIOUS: tracked_foo_bar_baz + foo.baz x = foo.bar.baz # $ tracked_foo_bar_baz do_stuff(x) # $ tracked_foo_bar_baz