Python: Add support for from foo.bar import baz

This turned out to be fairly simple. Given an import such as
```python
from foo.bar.baz import quux
```
we create an API-graph node for each valid dotted prefix of
`foo.bar.baz`, i.e. `foo`, `foo.bar`, and `foo.bar.baz`. For these, we
then insert nodes in the API graph, such that `foo` steps to `foo.bar`
along an edge labeled `bar`, etc.

Finally, we only allow undotted names to hang off of the API-graph
root. Thus, `foo` will have a `moduleImport` edge off of the root, and
a `getMember` edge for `bar` (which in turn has a `getMember` edge for
`baz`).

Relative imports are explicitly ignored.

Finally, this commit also adds inline tests for a variety of ways of
importing modules, including a copy of the "import-helper" tests (with
a few modifications to allow a single annotation per line, as these
get rather long quickly!).
This commit is contained in:
Taus Brock-Nannestad
2021-02-02 21:59:33 +01:00
parent cd7b013a0c
commit e4c3544a3f
16 changed files with 168 additions and 4 deletions

View File

@@ -230,14 +230,52 @@ module API {
/** The root of the API graph. */
MkRoot() or
/** An abstract representative for imports of the module called `name`. */
MkModuleImport(string name) { imports(_, name) } or
MkModuleImport(string name) {
imports(_, name) or name = any(ImportExpr e | not e.isRelative()).getAnImportedModuleName()
} or
/** A use of an API member at the node `nd`. */
MkUse(DataFlow::Node nd) { use(_, _, nd) }
class TUse = MkModuleImport or MkUse;
/**
* Holds if the dotted module name `sub` refers to the `member` member of `base`.
*
* For instance, `prefix_member("foo.bar", "baz", "foo.bar.baz")` would hold.
*/
private predicate prefix_member(TApiNode base, string member, TApiNode sub) {
exists(string base_str, string sub_str |
base = MkModuleImport(base_str) and
sub = MkModuleImport(sub_str)
|
base_str + "." + member = sub_str and
not member.matches("%.%")
)
}
/** Holds if `imp` is an import of a module named `name` */
private predicate imports(DataFlow::Node imp, string name) { imp = DataFlow::importNode(name) }
private predicate imports(DataFlow::Node import_node, string name) {
exists(Variable var, Import imp, Alias alias |
alias = imp.getAName() and
alias.getAsname() = var.getAStore() and
(
name = alias.getValue().(ImportMember).getImportedModuleName()
or
name = alias.getValue().(ImportExpr).getImportedModuleName() and
not alias.getValue().(ImportExpr).isRelative()
) and
import_node.asExpr() = alias.getValue()
)
or
exists(ImportExpr imp_expr |
not imp_expr.isRelative() and
imp_expr.getName() = name and
import_node.asCfgNode().getNode() = imp_expr and
// in `import foo.bar` we DON'T want to give a result for `importNode("foo.bar")`,
// only for `importNode("foo")`. We exclude those cases with the following clause.
not exists(Import imp | imp.getAName().getValue() = imp_expr)
)
}
/**
* Holds if `ref` is a use of a node that should have an incoming edge from `base` labeled
@@ -248,9 +286,11 @@ module API {
exists(DataFlow::LocalSourceNode src, DataFlow::LocalSourceNode pred |
use(base, src) and pred = trackUseNode(src)
|
// Reading an attribute on a node that is a use of `base`:
lbl = Label::memberFromRef(ref) and
ref = pred.getAnAttributeRead()
or
// Calling a node that is a use of `base`
lbl = Label::return() and
ref = pred.getAnInvocation()
)
@@ -263,7 +303,7 @@ module API {
predicate use(TApiNode nd, DataFlow::Node ref) {
exists(string name |
nd = MkModuleImport(name) and
ref = DataFlow::importNode(name)
imports(ref, name)
)
or
nd = MkUse(ref)
@@ -310,7 +350,15 @@ module API {
pred = MkRoot() and
lbl = Label::mod(m)
|
succ = MkModuleImport(m)
succ = MkModuleImport(m) and
// Only allow undotted names to count as base modules.
not m.matches("%.%")
)
or
/* Step from the dotted module name `foo.bar` to `foo.bar.baz` along an edge labeled `baz` */
exists(string member |
prefix_member(pred, member, succ) and
lbl = Label::member(member)
)
or
/* Every node that is a use of an API component is itself added to the API graph. */

View File

@@ -0,0 +1 @@
foo = 42

View File

@@ -0,0 +1 @@
pass

View File

@@ -0,0 +1 @@
pass

View File

@@ -0,0 +1 @@
semmle-extractor-options: --lang=3

View File

@@ -0,0 +1,34 @@
import a1 #$ use=moduleImport("a1")
x = a1.blah1 #$ use=moduleImport("a1").getMember("blah1")
import a2 as m2 #$ use=moduleImport("a2")
x2 = m2.blah2 #$ use=moduleImport("a2").getMember("blah2")
import a3.b3 as m3 #$ use=moduleImport("a3").getMember("b3")
x3 = m3.blah3 #$ use=moduleImport("a3").getMember("b3").getMember("blah3")
from a4.b4 import c4 as m4 #$ use=moduleImport("a4").getMember("b4").getMember("c4")
x4 = m4.blah4 #$ use=moduleImport("a4").getMember("b4").getMember("c4").getMember("blah4")
import a.b.c.d #$ use=moduleImport("a")
ab = a.b #$ use=moduleImport("a").getMember("b")
abc = ab.c #$ use=moduleImport("a").getMember("b").getMember("c")
abcd = abc.d #$ use=moduleImport("a").getMember("b").getMember("c").getMember("d")
x5 = abcd() #$ use=moduleImport("a").getMember("b").getMember("c").getMember("d").getReturn()
y5 = x5.method() #$ use=moduleImport("a").getMember("b").getMember("c").getMember("d").getReturn().getMember("method").getReturn()
# Relative imports. These are ignored
from .foo import bar
from ..foobar import baz

View File

@@ -0,0 +1,6 @@
import mypkg #$ use=moduleImport("mypkg")
print(mypkg.foo) #$ use=moduleImport("mypkg").getMember("foo") // 42
try:
print(mypkg.bar) #$ use=moduleImport("mypkg").getMember("bar")
except AttributeError as e:
print(e) # module 'mypkg' has no attribute 'bar'

View File

@@ -0,0 +1,4 @@
from mypkg import foo #$ use=moduleImport("mypkg").getMember("foo")
from mypkg import bar #$ use=moduleImport("mypkg").getMember("bar")
print(foo) #$ use=moduleImport("mypkg").getMember("foo")
print(bar) #$ use=moduleImport("mypkg").getMember("bar")

View File

@@ -0,0 +1,4 @@
import mypkg.foo #$ use=moduleImport("mypkg")
import mypkg.bar #$ use=moduleImport("mypkg")
print(mypkg.foo) #$ use=moduleImport("mypkg").getMember("foo") // <module 'mypkg.foo' ...
print(mypkg.bar) #$ use=moduleImport("mypkg").getMember("bar") // <module 'mypkg.bar' ...

View File

@@ -0,0 +1,4 @@
import mypkg.foo as _foo #$ use=moduleImport("mypkg").getMember("foo")
import mypkg.bar as _bar #$ use=moduleImport("mypkg").getMember("bar")
print(_foo) #$ use=moduleImport("mypkg").getMember("foo") // <module 'mypkg.bar' ...
print(_bar) #$ use=moduleImport("mypkg").getMember("bar") // <module 'mypkg.bar' ...

View File

@@ -0,0 +1,10 @@
import mypkg #$ use=moduleImport("mypkg")
print(mypkg.foo) #$ use=moduleImport("mypkg").getMember("foo") // 42
try:
print(mypkg.bar) #$ use=moduleImport("mypkg").getMember("bar")
except AttributeError as e:
print(e) # module 'mypkg' has no attribute 'bar'
from mypkg import bar as _bar #$ use=moduleImport("mypkg").getMember("bar")
print(mypkg.bar) #$ use=moduleImport("mypkg").getMember("bar") // <module 'mypkg.bar' ...

View File

@@ -0,0 +1,6 @@
import mypkg #$ use=moduleImport("mypkg")
print(mypkg.foo) #$ use=moduleImport("mypkg").getMember("foo") // 42
import mypkg.foo #$ use=moduleImport("mypkg")
print(mypkg.foo) #$ use=moduleImport("mypkg").getMember("foo") // <module 'mypkg.foo' ...

View File

@@ -0,0 +1,10 @@
from mypkg import foo #$ use=moduleImport("mypkg").getMember("foo")
print(foo) #$ use=moduleImport("mypkg").getMember("foo") // 42
import mypkg.foo #$ use=moduleImport("mypkg")
print(foo) #$ use=moduleImport("mypkg").getMember("foo") // 42
print(mypkg.foo) #$ use=moduleImport("mypkg").getMember("foo") // <module 'mypkg.bar' ...
from mypkg import foo #$ use=moduleImport("mypkg").getMember("foo")
print(foo) #$ use=moduleImport("mypkg").getMember("foo") // <module 'mypkg.bar' ...

View File

@@ -0,0 +1,4 @@
from start.middle.end import foo #$ use=moduleImport("start").getMember("middle").getMember("end").getMember("foo")
from start.middle.end import bar #$ use=moduleImport("start").getMember("middle").getMember("end").getMember("bar")
print(foo) #$ use=moduleImport("start").getMember("middle").getMember("end").getMember("foo")
print(bar) #$ use=moduleImport("start").getMember("middle").getMember("end").getMember("bar")

View File

@@ -0,0 +1,30 @@
import python
import semmle.python.dataflow.new.DataFlow
import TestUtilities.InlineExpectationsTest
import semmle.python.ApiGraphs
class ApiUseTest extends InlineExpectationsTest {
ApiUseTest() { this = "ApiUseTest" }
override string getARelevantTag() { result = "use" }
private predicate relevant_node(API::Node a, DataFlow::Node n, Location l) {
n = a.getAUse() and l = n.getLocation()
}
override predicate hasActualResult(Location location, string element, string tag, string value) {
exists(API::Node a, DataFlow::Node n | relevant_node(a, n, location) |
tag = "use" and
// Only report the longest path on this line:
value =
max(API::Node a2, Location l2 |
relevant_node(a2, _, l2) and
l2.getFile() = location.getFile() and
l2.getStartLine() = location.getStartLine()
|
a2.getPath()
) and
element = n.toString()
)
}
}