Merge pull request #5388 from tausbn/python-api-graph-builtins

Python: Support built-ins in API graphs
This commit is contained in:
yoff
2021-03-12 17:45:59 +01:00
committed by GitHub
9 changed files with 120 additions and 8 deletions

View File

@@ -216,6 +216,9 @@ module API {
*/
Node moduleImport(string m) { result = Impl::MkModuleImport(m) }
/** Gets a node corresponding to the built-in with the given name, if any. */
Node builtin(string n) { result = moduleImport("builtins").getMember(n) }
/**
* Provides the actual implementation of API graphs, cached for performance.
*
@@ -300,11 +303,18 @@ module API {
MkRoot() or
/** An abstract representative for imports of the module called `name`. */
MkModuleImport(string name) {
imports(_, name)
// Ignore the following module name for Python 2, as we alias `__builtin__` to `builtins` elsewhere
(name != "__builtin__" or major_version() = 3) and
(
imports(_, name)
or
// When we `import foo.bar.baz` we want to create API graph nodes also for the prefixes
// `foo` and `foo.bar`:
name = any(ImportExpr e | not e.isRelative()).getAnImportedModuleName()
)
or
// When we `import foo.bar.baz` we want to create API graph nodes also for the prefixes
// `foo` and `foo.bar`:
name = any(ImportExpr e | not e.isRelative()).getAnImportedModuleName()
// The `builtins` module should always be implicitly available
name = "builtins"
} or
/** A use of an API member at the node `nd`. */
MkUse(DataFlow::Node nd) { use(_, _, nd) }
@@ -339,6 +349,24 @@ module API {
)
}
private import semmle.python.types.Builtins as Builtins
/**
* Gets a data flow node that is likely to refer to a built-in with the name `name`.
*
* Currently this is an over-approximation, and does not account for things like overwriting a
* built-in with a different value.
*/
private DataFlow::Node likely_builtin(string name) {
result.asCfgNode() =
any(NameNode n |
n.isGlobal() and
n.isLoad() and
name = n.getId() and
name = any(Builtins::Builtin b).getName()
)
}
/**
* Holds if `ref` is a use of a node that should have an incoming edge from `base` labeled
* `lbl` in the API graph.
@@ -369,6 +397,10 @@ module API {
ref.asExpr().(ClassExpr).getABase() = superclass.asExpr()
)
)
or
// Built-ins, treated as members of the module `builtins`
base = MkModuleImport("builtins") and
lbl = Label::member(any(string name | ref = likely_builtin(name)))
}
/**
@@ -381,6 +413,11 @@ module API {
imports(ref, name)
)
or
// Ensure the Python 2 `__builtin__` module gets the name of the Python 3 `builtins` module.
major_version() = 2 and
nd = MkModuleImport("builtins") and
imports(ref, "__builtin__")
or
nd = MkUse(ref)
}

View File

@@ -0,0 +1 @@
semmle-extractor-options: --lang=2 --max-import-depth=1

View File

@@ -0,0 +1,3 @@
def python2_style():
from __builtin__ import open #$ use=moduleImport("builtins").getMember("open")
open("hello.txt") #$ use=moduleImport("builtins").getMember("open").getReturn()

View File

@@ -0,0 +1,30 @@
import python
import semmle.python.dataflow.new.DataFlow
import TestUtilities.InlineExpectationsTest
import semmle.python.ApiGraphs
class ApiUseTest extends InlineExpectationsTest {
ApiUseTest() { this = "ApiUseTest" }
override string getARelevantTag() { result = "use" }
private predicate relevant_node(API::Node a, DataFlow::Node n, Location l) {
n = a.getAUse() and l = n.getLocation()
}
override predicate hasActualResult(Location location, string element, string tag, string value) {
exists(API::Node a, DataFlow::Node n | relevant_node(a, n, location) |
tag = "use" and
// Only report the longest path on this line:
value =
max(API::Node a2, Location l2 |
relevant_node(a2, _, l2) and
l2.getFile() = location.getFile() and
l2.getStartLine() = location.getStartLine()
|
a2.getPath()
) and
element = n.toString()
)
}
}

View File

@@ -100,3 +100,42 @@ def internal():
pass
int_instance = IntMyView() #$ use=moduleImport("pflask").getMember("views").getMember("View").getASubclass().getReturn()
# Built-ins
def use_of_builtins():
for x in range(5): #$ use=moduleImport("builtins").getMember("range").getReturn()
if x < len([]): #$ use=moduleImport("builtins").getMember("len").getReturn()
print("Hello") #$ use=moduleImport("builtins").getMember("print").getReturn()
raise Exception("Farewell") #$ use=moduleImport("builtins").getMember("Exception").getReturn()
def imported_builtins():
import builtins #$ use=moduleImport("builtins")
def open(f):
return builtins.open(f) #$ MISSING: use=moduleImport("builtins").getMember("open").getReturn()
def redefine_print():
def my_print(x):
import builtins #$ use=moduleImport("builtins")
builtins.print("I'm printing", x) #$ use=moduleImport("builtins").getMember("print").getReturn()
print = my_print
print("these words")
def local_redefine_range():
range = 5
return range
def global_redefine_range():
global range
range = 6
return range #$ SPURIOUS: use=moduleImport("builtins").getMember("range")
def obscured_print():
p = print #$ use=moduleImport("builtins").getMember("print")
p("Can you see me?") #$ use=moduleImport("builtins").getMember("print").getReturn()
def python2_style():
# In Python 3, `__builtin__` has no special meaning.
from __builtin__ import open #$ use=moduleImport("__builtin__").getMember("open")
open("hello.txt") #$ use=moduleImport("__builtin__").getMember("open").getReturn()

View File

@@ -2,5 +2,5 @@ import mypkg #$ use=moduleImport("mypkg")
print(mypkg.foo) #$ use=moduleImport("mypkg").getMember("foo") // 42
try:
print(mypkg.bar) #$ use=moduleImport("mypkg").getMember("bar")
except AttributeError as e:
print(e) # module 'mypkg' has no attribute 'bar'
except AttributeError as e: #$ use=moduleImport("builtins").getMember("AttributeError")
print(e) #$ use=moduleImport("builtins").getMember("print").getReturn() // module 'mypkg' has no attribute 'bar'

View File

@@ -3,8 +3,8 @@ import mypkg #$ use=moduleImport("mypkg")
print(mypkg.foo) #$ use=moduleImport("mypkg").getMember("foo") // 42
try:
print(mypkg.bar) #$ use=moduleImport("mypkg").getMember("bar")
except AttributeError as e:
print(e) # module 'mypkg' has no attribute 'bar'
except AttributeError as e: #$ use=moduleImport("builtins").getMember("AttributeError")
print(e) #$ use=moduleImport("builtins").getMember("print").getReturn() // module 'mypkg' has no attribute 'bar'
from mypkg import bar as _bar #$ use=moduleImport("mypkg").getMember("bar")
print(mypkg.bar) #$ use=moduleImport("mypkg").getMember("bar") // <module 'mypkg.bar' ...