Merge pull request #7246 from tausbn/python/import-star-flow

Python: Support flow through `import *`
This commit is contained in:
yoff
2021-12-10 16:34:32 +01:00
committed by GitHub
18 changed files with 268 additions and 122 deletions

View File

@@ -358,134 +358,26 @@ module API {
)
}
/** Gets the name of a known built-in. */
private string getBuiltInName() {
// These lists were created by inspecting the `builtins` and `__builtin__` modules in
// Python 3 and 2 respectively, using the `dir` built-in.
// Built-in functions and exceptions shared between Python 2 and 3
result in [
"abs", "all", "any", "bin", "bool", "bytearray", "callable", "chr", "classmethod",
"compile", "complex", "delattr", "dict", "dir", "divmod", "enumerate", "eval", "filter",
"float", "format", "frozenset", "getattr", "globals", "hasattr", "hash", "help", "hex",
"id", "input", "int", "isinstance", "issubclass", "iter", "len", "list", "locals", "map",
"max", "memoryview", "min", "next", "object", "oct", "open", "ord", "pow", "print",
"property", "range", "repr", "reversed", "round", "set", "setattr", "slice", "sorted",
"staticmethod", "str", "sum", "super", "tuple", "type", "vars", "zip", "__import__",
// Exceptions
"ArithmeticError", "AssertionError", "AttributeError", "BaseException", "BufferError",
"BytesWarning", "DeprecationWarning", "EOFError", "EnvironmentError", "Exception",
"FloatingPointError", "FutureWarning", "GeneratorExit", "IOError", "ImportError",
"ImportWarning", "IndentationError", "IndexError", "KeyError", "KeyboardInterrupt",
"LookupError", "MemoryError", "NameError", "NotImplemented", "NotImplementedError",
"OSError", "OverflowError", "PendingDeprecationWarning", "ReferenceError", "RuntimeError",
"RuntimeWarning", "StandardError", "StopIteration", "SyntaxError", "SyntaxWarning",
"SystemError", "SystemExit", "TabError", "TypeError", "UnboundLocalError",
"UnicodeDecodeError", "UnicodeEncodeError", "UnicodeError", "UnicodeTranslateError",
"UnicodeWarning", "UserWarning", "ValueError", "Warning", "ZeroDivisionError",
// Added for compatibility
"exec"
]
or
// Built-in constants shared between Python 2 and 3
result in ["False", "True", "None", "NotImplemented", "Ellipsis", "__debug__"]
or
// Python 3 only
result in [
"ascii", "breakpoint", "bytes", "exec", "aiter", "anext",
// Exceptions
"BlockingIOError", "BrokenPipeError", "ChildProcessError", "ConnectionAbortedError",
"ConnectionError", "ConnectionRefusedError", "ConnectionResetError", "FileExistsError",
"FileNotFoundError", "InterruptedError", "IsADirectoryError", "ModuleNotFoundError",
"NotADirectoryError", "PermissionError", "ProcessLookupError", "RecursionError",
"ResourceWarning", "StopAsyncIteration", "TimeoutError"
]
or
// Python 2 only
result in [
"basestring", "cmp", "execfile", "file", "long", "raw_input", "reduce", "reload",
"unichr", "unicode", "xrange"
]
}
/**
* Gets a data flow node that is likely to refer to a built-in with the name `name`.
*
* Currently this is an over-approximation, and may not account for things like overwriting a
* built-in with a different value.
*/
private DataFlow::Node likely_builtin(string name) {
exists(Module m |
result.asCfgNode() =
any(NameNode n |
possible_builtin_accessed_in_module(n, name, m) and
not possible_builtin_defined_in_module(name, m)
)
)
}
/**
* Holds if a global variable called `name` (which is also the name of a built-in) is assigned
* a value in the module `m`.
*/
private predicate possible_builtin_defined_in_module(string name, Module m) {
global_name_defined_in_module(name, m) and
name = getBuiltInName()
}
/**
* Holds if `n` is an access of a global variable called `name` (which is also the name of a
* built-in) inside the module `m`.
*/
private predicate possible_builtin_accessed_in_module(NameNode n, string name, Module m) {
n.isGlobal() and
n.isLoad() and
name = n.getId() and
name = getBuiltInName() and
m = n.getEnclosingModule()
}
/**
* Holds if `n` is an access of a variable called `name` (which is _not_ the name of a
* built-in, and which is _not_ a global defined in the enclosing module) inside the scope `s`.
*/
private predicate name_possibly_defined_in_import_star(NameNode n, string name, Scope s) {
n.isLoad() and
name = n.getId() and
// Not already defined in an enclosing scope.
not exists(LocalVariable v |
v.getId() = name and v.getScope() = n.getScope().getEnclosingScope*()
) and
not name = getBuiltInName() and
s = n.getScope().getEnclosingScope*() and
exists(potential_import_star_base(s)) and
not global_name_defined_in_module(name, n.getEnclosingModule())
}
/** Holds if a global variable called `name` is assigned a value in the module `m`. */
private predicate global_name_defined_in_module(string name, Module m) {
exists(NameNode n |
not exists(LocalVariable v | n.defines(v)) and
n.isStore() and
name = n.getId() and
m = n.getEnclosingModule()
)
}
private import semmle.python.dataflow.new.internal.Builtins
private import semmle.python.dataflow.new.internal.ImportStar
/**
* Gets the API graph node for all modules imported with `from ... import *` inside the scope `s`.
*
* For example, given
*
* `from foo.bar import *`
* ```python
* from foo.bar import *
* ```
*
* this would be the API graph node with the path
*
* `moduleImport("foo").getMember("bar")`
*/
private TApiNode potential_import_star_base(Scope s) {
exists(DataFlow::Node ref |
ref.asCfgNode() = any(ImportStarNode n | n.getScope() = s).getModule() and
use(result, ref)
exists(DataFlow::Node n |
n.asCfgNode() = ImportStar::potentialImportStarBase(s) and
use(result, n)
)
}
@@ -529,14 +421,14 @@ module API {
or
// Built-ins, treated as members of the module `builtins`
base = MkModuleImport("builtins") and
lbl = Label::member(any(string name | ref = likely_builtin(name)))
lbl = Label::member(any(string name | ref = Builtins::likelyBuiltin(name)))
or
// Unknown variables that may belong to a module imported with `import *`
exists(Scope s |
base = potential_import_star_base(s) and
lbl =
Label::member(any(string name |
name_possibly_defined_in_import_star(ref.asCfgNode(), name, s)
ImportStar::namePossiblyDefinedInImportStar(ref.asCfgNode(), name, s)
))
)
}

View File

@@ -0,0 +1,93 @@
/** Provides predicates for reasoning about built-ins in Python. */
private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.internal.ImportStar
module Builtins {
/** Gets the name of a known built-in. */
string getBuiltinName() {
// These lists were created by inspecting the `builtins` and `__builtin__` modules in
// Python 3 and 2 respectively, using the `dir` built-in.
// Built-in functions and exceptions shared between Python 2 and 3
result in [
"abs", "all", "any", "bin", "bool", "bytearray", "callable", "chr", "classmethod",
"compile", "complex", "delattr", "dict", "dir", "divmod", "enumerate", "eval", "filter",
"float", "format", "frozenset", "getattr", "globals", "hasattr", "hash", "help", "hex",
"id", "input", "int", "isinstance", "issubclass", "iter", "len", "list", "locals", "map",
"max", "memoryview", "min", "next", "object", "oct", "open", "ord", "pow", "print",
"property", "range", "repr", "reversed", "round", "set", "setattr", "slice", "sorted",
"staticmethod", "str", "sum", "super", "tuple", "type", "vars", "zip", "__import__",
// Exceptions
"ArithmeticError", "AssertionError", "AttributeError", "BaseException", "BufferError",
"BytesWarning", "DeprecationWarning", "EOFError", "EnvironmentError", "Exception",
"FloatingPointError", "FutureWarning", "GeneratorExit", "IOError", "ImportError",
"ImportWarning", "IndentationError", "IndexError", "KeyError", "KeyboardInterrupt",
"LookupError", "MemoryError", "NameError", "NotImplemented", "NotImplementedError",
"OSError", "OverflowError", "PendingDeprecationWarning", "ReferenceError", "RuntimeError",
"RuntimeWarning", "StandardError", "StopIteration", "SyntaxError", "SyntaxWarning",
"SystemError", "SystemExit", "TabError", "TypeError", "UnboundLocalError",
"UnicodeDecodeError", "UnicodeEncodeError", "UnicodeError", "UnicodeTranslateError",
"UnicodeWarning", "UserWarning", "ValueError", "Warning", "ZeroDivisionError",
// Added for compatibility
"exec"
]
or
// Built-in constants shared between Python 2 and 3
result in ["False", "True", "None", "NotImplemented", "Ellipsis", "__debug__"]
or
// Python 3 only
result in [
"ascii", "breakpoint", "bytes", "exec",
// Exceptions
"BlockingIOError", "BrokenPipeError", "ChildProcessError", "ConnectionAbortedError",
"ConnectionError", "ConnectionRefusedError", "ConnectionResetError", "FileExistsError",
"FileNotFoundError", "InterruptedError", "IsADirectoryError", "ModuleNotFoundError",
"NotADirectoryError", "PermissionError", "ProcessLookupError", "RecursionError",
"ResourceWarning", "StopAsyncIteration", "TimeoutError"
]
or
// Python 2 only
result in [
"basestring", "cmp", "execfile", "file", "long", "raw_input", "reduce", "reload", "unichr",
"unicode", "xrange"
]
}
/**
* Gets a data flow node that is likely to refer to a built-in with the name `name`.
*
* Currently this is an over-approximation, and may not account for things like overwriting a
* built-in with a different value.
*/
DataFlow::Node likelyBuiltin(string name) {
exists(Module m |
result.asCfgNode() =
any(NameNode n |
possible_builtin_accessed_in_module(n, name, m) and
not possible_builtin_defined_in_module(name, m)
)
)
}
/**
* Holds if a global variable called `name` (which is also the name of a built-in) is assigned
* a value in the module `m`.
*/
private predicate possible_builtin_defined_in_module(string name, Module m) {
ImportStar::globalNameDefinedInModule(name, m) and
name = getBuiltinName()
}
/**
* Holds if `n` is an access of a global variable called `name` (which is also the name of a
* built-in) inside the module `m`.
*/
private predicate possible_builtin_accessed_in_module(NameNode n, string name, Module m) {
n.isGlobal() and
n.isLoad() and
name = n.getId() and
name = getBuiltinName() and
m = n.getEnclosingModule()
}
}

View File

@@ -2,6 +2,7 @@ private import python
private import DataFlowPublic
import semmle.python.SpecialMethods
private import semmle.python.essa.SsaCompute
private import semmle.python.dataflow.new.internal.ImportStar
/** Gets the callable in which this node occurs. */
DataFlowCallable nodeGetEnclosingCallable(Node n) { result = n.getEnclosingCallable() }
@@ -948,7 +949,7 @@ predicate jumpStep(Node nodeFrom, Node nodeTo) {
private predicate module_export(Module m, string name, CfgNode defn) {
exists(EssaVariable v |
v.getName() = name and
v.getAUse() = m.getANormalExit()
v.getAUse() = ImportStar::getStarImported*(m).getANormalExit()
|
defn.getNode() = v.getDefinition().(AssignmentDefinition).getValue()
or

View File

@@ -8,6 +8,7 @@ import semmle.python.dataflow.new.TypeTracker
import Attributes
import LocalSources
private import semmle.python.essa.SsaCompute
private import semmle.python.dataflow.new.internal.ImportStar
/**
* IPA type for data flow nodes.
@@ -30,7 +31,15 @@ newtype TNode =
/** A synthetic node representing the value of an object after a state change. */
TSyntheticPostUpdateNode(NeedsSyntheticPostUpdateNode pre) or
/** A node representing a global (module-level) variable in a specific module. */
TModuleVariableNode(Module m, GlobalVariable v) { v.getScope() = m and v.escapes() } or
TModuleVariableNode(Module m, GlobalVariable v) {
v.getScope() = m and
(
v.escapes()
or
isAccessedThroughImportStar(m) and
ImportStar::globalNameDefinedInModule(v.getId(), m)
)
} or
/**
* A node representing the overflow positional arguments to a call.
* That is, `call` contains more positional arguments than there are
@@ -346,6 +355,8 @@ class ModuleVariableNode extends Node, TModuleVariableNode {
result.asCfgNode() = var.getALoad().getAFlowNode() and
// Ignore reads that happen when the module is imported. These are only executed once.
not result.getScope() = mod
or
this = import_star_read(result)
}
/** Gets an `EssaNode` that corresponds to an assignment of this global variable. */
@@ -358,6 +369,13 @@ class ModuleVariableNode extends Node, TModuleVariableNode {
override Location getLocation() { result = mod.getLocation() }
}
private predicate isAccessedThroughImportStar(Module m) { m = ImportStar::getStarImported(_) }
private ModuleVariableNode import_star_read(Node n) {
ImportStar::importStarResolvesTo(n.asCfgNode(), result.getModule()) and
n.asCfgNode().(NameNode).getId() = result.getVariable().getId()
}
/**
* The node holding the extra positional arguments to a call. This node is passed as a tuple
* to the starred parameter of the callable.

View File

@@ -0,0 +1,95 @@
/** Provides predicates for reasoning about uses of `import *` in Python. */
private import python
private import semmle.python.dataflow.new.internal.Builtins
cached
module ImportStar {
/**
* Holds if `n` is an access of a variable called `name` (which is _not_ the name of a
* built-in, and which is _not_ a global defined in the enclosing module) inside the scope `s`.
*/
cached
predicate namePossiblyDefinedInImportStar(NameNode n, string name, Scope s) {
n.isLoad() and
name = n.getId() and
s = n.getScope().getEnclosingScope*() and
exists(potentialImportStarBase(s)) and
// Not already defined in an enclosing scope.
not isDefinedLocally(n.getNode())
}
/** Holds if `n` refers to a variable that is defined in the module in which it occurs. */
cached
private predicate isDefinedLocally(Name n) {
// Defined in an enclosing scope
enclosing_scope_defines_name(n.getScope(), n.getId())
or
// Defined as a built-in
n.getId() = Builtins::getBuiltinName()
or
// Defined as a global in this module
globalNameDefinedInModule(n.getId(), n.getEnclosingModule())
or
// A non-built-in that still has file-specific meaning
n.getId() in ["__name__", "__package__"]
}
pragma[nomagic]
private predicate enclosing_scope_defines_name(Scope s, string name) {
exists(LocalVariable v |
v.getId() = name and v.getScope() = s and not name = Builtins::getBuiltinName()
)
or
enclosing_scope_defines_name(s.getEnclosingScope(), name)
}
/** Holds if a global variable called `name` is assigned a value in the module `m`. */
cached
predicate globalNameDefinedInModule(string name, Module m) {
exists(NameNode n |
not exists(LocalVariable v | n.defines(v)) and
n.isStore() and
name = n.getId() and
m = n.getEnclosingModule()
)
}
/**
* Holds if `n` may refer to a global variable of the same name in the module `m`, accessible
* from the scope of `n` by a chain of `import *` imports.
*/
cached
predicate importStarResolvesTo(NameNode n, Module m) {
m = getStarImported+(n.getEnclosingModule()) and
globalNameDefinedInModule(n.getId(), m) and
not isDefinedLocally(n.getNode())
}
/**
* Gets a module that is imported from `m` via `import *`.
*/
cached
Module getStarImported(Module m) {
exists(ImportStar i |
i.getScope() = m and result = i.getModule().pointsTo().(ModuleValue).getScope()
)
}
/**
* Gets the data-flow node for a module imported with `from ... import *` inside the scope `s`.
*
* For example, given
*
* ```python
* from foo.bar import *
* from quux import *
* ```
*
* this would return the data-flow nodes corresponding to `foo.bar` and `quux`.
*/
cached
ControlFlowNode potentialImportStarBase(Scope s) {
result = any(ImportStarNode n | n.getScope() = s).getModule()
}
}

View File

@@ -1 +1 @@
known_attr = [1000]
known_attr = [1000] #$ writes=known_attr

View File

@@ -0,0 +1,2 @@
from trois import *
print(foo)

View File

@@ -0,0 +1,15 @@
| test3.py:1:17:1:19 | ControlFlowNode for ImportMember | test3.py:2:7:2:9 | ControlFlowNode for foo |
| three.py:1:1:1:3 | ControlFlowNode for foo | test1.py:2:7:2:9 | ControlFlowNode for foo |
| three.py:1:1:1:3 | ControlFlowNode for foo | test3.py:1:17:1:19 | ControlFlowNode for ImportMember |
| three.py:1:1:1:3 | ControlFlowNode for foo | test3.py:2:7:2:9 | ControlFlowNode for foo |
| three.py:1:1:1:3 | ControlFlowNode for foo | two.py:2:7:2:9 | ControlFlowNode for foo |
| three.py:1:7:1:7 | ControlFlowNode for IntegerLiteral | test1.py:2:7:2:9 | ControlFlowNode for foo |
| three.py:1:7:1:7 | ControlFlowNode for IntegerLiteral | test3.py:1:17:1:19 | ControlFlowNode for ImportMember |
| three.py:1:7:1:7 | ControlFlowNode for IntegerLiteral | test3.py:2:7:2:9 | ControlFlowNode for foo |
| three.py:1:7:1:7 | ControlFlowNode for IntegerLiteral | two.py:2:7:2:9 | ControlFlowNode for foo |
| trois.py:1:1:1:3 | ControlFlowNode for foo | deux.py:2:7:2:9 | ControlFlowNode for foo |
| trois.py:1:1:1:3 | ControlFlowNode for foo | test2.py:2:7:2:9 | ControlFlowNode for foo |
| trois.py:1:7:1:7 | ControlFlowNode for IntegerLiteral | deux.py:2:7:2:9 | ControlFlowNode for foo |
| trois.py:1:7:1:7 | ControlFlowNode for IntegerLiteral | test2.py:2:7:2:9 | ControlFlowNode for foo |
| two.py:2:7:2:9 | ControlFlowNode for foo | test3.py:1:17:1:19 | ControlFlowNode for ImportMember |
| two.py:2:7:2:9 | ControlFlowNode for foo | test3.py:2:7:2:9 | ControlFlowNode for foo |

View File

@@ -0,0 +1,19 @@
import semmle.python.dataflow.new.DataFlow
/**
* A configuration to find all flows.
* To be used on tiny programs.
*/
class AllFlowsConfig extends DataFlow::Configuration {
AllFlowsConfig() { this = "AllFlowsConfig" }
override predicate isSource(DataFlow::Node node) { any() }
override predicate isSink(DataFlow::Node node) { any() }
}
from DataFlow::CfgNode source, DataFlow::CfgNode sink
where
source != sink and
exists(AllFlowsConfig cfg | cfg.hasFlow(source, sink))
select source, sink

View File

@@ -0,0 +1 @@
from two import *

View File

@@ -0,0 +1,2 @@
from one import *
print(foo)

View File

@@ -0,0 +1,2 @@
from un import *
print(foo)

View File

@@ -0,0 +1,2 @@
from one import foo
print(foo)

View File

@@ -0,0 +1 @@
foo = 5

View File

@@ -0,0 +1 @@
foo = 6

View File

@@ -0,0 +1,2 @@
from three import *
print(foo)

View File

@@ -0,0 +1 @@
from deux import *

View File

@@ -15,4 +15,3 @@
| code/r_regressions.py:46:1:46:14 | ControlFlowNode for FunctionExpr | code/r_regressions.py:52:9:52:12 | ControlFlowNode for fail |
| code/t_type.py:3:1:3:16 | ControlFlowNode for ClassExpr | code/t_type.py:6:1:6:9 | ControlFlowNode for type() |
| code/t_type.py:3:1:3:16 | ControlFlowNode for ClassExpr | code/t_type.py:13:5:13:13 | ControlFlowNode for type() |
| code/test_package/module2.py:5:5:5:6 | ControlFlowNode for Dict | code/j_convoluted_imports.py:25:1:25:1 | ControlFlowNode for r |