Merge pull request #12244 from RasmusWL/import-refined

Python: Fix import of refined variable
This commit is contained in:
Taus
2023-03-24 13:22:19 +01:00
committed by GitHub
29 changed files with 645 additions and 41 deletions

View File

@@ -0,0 +1,4 @@
---
category: minorAnalysis
---
* Fixed module resolution so we allow imports of definitions that have had an attribute assigned to it, such as `class Foo; Foo.bar = 42`.

View File

@@ -65,31 +65,75 @@ private import semmle.python.dataflow.new.internal.DataFlowPrivate
*/
module ImportResolution {
/**
* Holds if the module `m` defines a name `name` by assigning `defn` to it. This is an
* overapproximation, as `name` may not in fact be exported (e.g. by defining an `__all__` that does
* not include `name`).
* Holds if there is an ESSA step from `defFrom` to `defTo`, which should be allowed
* for import resolution.
*/
private predicate allowedEssaImportStep(EssaDefinition defFrom, EssaDefinition defTo) {
// to handle definitions guarded by if-then-else
defFrom = defTo.(PhiFunction).getAnInput()
or
// refined variable
// example: https://github.com/nvbn/thefuck/blob/ceeaeab94b5df5a4fe9d94d61e4f6b0bbea96378/thefuck/utils.py#L25-L45
defFrom = defTo.(EssaNodeRefinement).getInput().getDefinition()
}
/**
* Holds if the module `m` defines a name `name` with the value `val`. The value
* represents the value `name` will have at the end of the module (the last place we
* have def-use flow to).
*
* Note: The handling of re-exporting imports is a bit simplistic. We assume that if
* an import is made, it will be re-exported (which will not be the case if a new
* value is assigned to the name, or it is deleted).
*/
pragma[nomagic]
predicate module_export(Module m, string name, DataFlow::CfgNode defn) {
exists(EssaVariable v, EssaDefinition essaDef |
v.getName() = name and
v.getAUse() = ImportStar::getStarImported*(m).getANormalExit() and
(
essaDef = v.getDefinition()
or
// to handle definitions guarded by if-then-else
essaDef = v.getDefinition().(PhiFunction).getAnInput()
)
predicate module_export(Module m, string name, DataFlow::Node val) {
// Definitions made inside `m` itself
//
// for code such as `foo = ...; foo.bar = ...` there will be TWO
// EssaDefinition/EssaVariable. One for `foo = ...` (AssignmentDefinition) and one
// for `foo.bar = ...`. The one for `foo.bar = ...` (EssaNodeRefinement). The
// EssaNodeRefinement is the one that will reach the end of the module (normal
// exit).
//
// However, we cannot just use the EssaNodeRefinement as the `val`, because the
// normal data-flow depends on use-use flow, and use-use flow targets CFG nodes not
// EssaNodes. So we need to go back from the EssaDefinition/EssaVariable that
// reaches the end of the module, to the first definition of the variable, and then
// track forwards using use-use flow to find a suitable CFG node that has flow into
// it from use-use flow.
exists(EssaVariable lastUseVar, EssaVariable firstDef |
lastUseVar.getName() = name and
// we ignore special variable $ introduced by our analysis (not used for anything)
// we ignore special variable * introduced by `from <pkg> import *` -- TODO: understand why we even have this?
not name in ["$", "*"] and
lastUseVar.getAUse() = m.getANormalExit() and
allowedEssaImportStep*(firstDef, lastUseVar) and
not allowedEssaImportStep(_, firstDef)
|
defn.getNode() = essaDef.(AssignmentDefinition).getValue()
not EssaFlow::defToFirstUse(firstDef, _) and
val.asVar() = firstDef
or
defn.getNode() = essaDef.(ArgumentRefinement).getArgument()
exists(ControlFlowNode mid, ControlFlowNode end |
EssaFlow::defToFirstUse(firstDef, mid) and
EssaFlow::useToNextUse*(mid, end) and
not EssaFlow::useToNextUse(end, _) and
val.asCfgNode() = end
)
)
or
// re-exports from `from <pkg> import *`
exists(Module importedFrom |
importedFrom = ImportStar::getStarImported(m) and
module_export(importedFrom, name, val) and
potential_module_export(importedFrom, name)
)
or
// re-exports from `import <pkg>` or `from <pkg> import <stuff>`
exists(Alias a |
defn.asExpr() = [a.getValue(), a.getValue().(ImportMember).getModule()] and
val.asExpr() = a.getValue() and
a.getAsname().(Name).getId() = name and
defn.getScope() = m
val.getScope() = m
)
}
@@ -263,9 +307,21 @@ module ImportResolution {
module_reexport(reexporter, attr_name, m)
)
or
// Submodules that are implicitly defined with relative imports of the form `from .foo import ...`.
// In practice, we create a definition for each module in a package, even if it is not imported.
// submodules of packages will be available as `<pkg>.<submodule>` after doing
// `import <pkg>.<submodule>` at least once in the program, or can be directly
// imported with `from <pkg> import <submodule>` (even with an empty
// `<pkg>.__init__` file).
//
// Until an import of `<pkg>.<submodule>` is executed, it is technically possible
// that `<pkg>.<submodule>` (or `from <pkg> import <submodule>`) can refer to an
// attribute set in `<pkg>.__init__`.
//
// Therefore, if there is an attribute defined in `<pkg>.__init__` with the same
// name as a submodule, we always consider that this attribute _could_ be a
// reference to the submodule, even if we don't know that the submodule has been
// imported yet.
exists(string submodule, Module package |
submodule = result.asVar().getName() and
SsaSource::init_module_submodule_defn(result.asVar().getSourceVariable(),
package.getEntryNode()) and
m = getModuleFromName(package.getPackageName() + "." + submodule)

View File

@@ -0,0 +1 @@
../coverage/argumentRoutingTest.ql

View File

@@ -0,0 +1,54 @@
# Python 2 specific tests, like the one in coverage/classes.py
#
# User-defined methods, both instance methods and class methods, can be called in many non-standard ways
# i.e. differently from simply `c.f()` or `C.f()`. For example, a user-defined `__await__` method on a
# class `C` will be called by the syntactic construct `await c` when `c` is an instance of `C`.
#
# These tests should cover all the class calls that we hope to support.
# It is based on https://docs.python.org/3/reference/datamodel.html, and headings refer there.
#
# All functions starting with "test_" should run and execute `print("OK")` exactly once.
# This can be checked by running validTest.py.
import sys
import os
sys.path.append(os.path.dirname(os.path.dirname((__file__))))
from testlib import expects
def SINK1(x):
pass
def SINK2(x):
pass
def SINK3(x):
pass
def SINK4(x):
pass
def OK():
print("OK")
# 3.3.8. Emulating numeric types
# object.__index__(self)
class With_index:
def __index__(self):
SINK1(self)
OK() # Call not found
return 0
def test_index():
import operator
with_index = With_index() #$ MISSING: arg1="SSA variable with_index" func=With_index.__index__
operator.index(with_index)

View File

@@ -0,0 +1 @@
semmle-extractor-options: --max-import-depth=1 --lang=2

View File

@@ -0,0 +1 @@
../coverage/argumentRoutingTest.ql

View File

@@ -0,0 +1,72 @@
# Python 3 specific tests, like the one in coverage/classes.py
#
# User-defined methods, both instance methods and class methods, can be called in many non-standard ways
# i.e. differently from simply `c.f()` or `C.f()`. For example, a user-defined `__await__` method on a
# class `C` will be called by the syntactic construct `await c` when `c` is an instance of `C`.
#
# These tests should cover all the class calls that we hope to support.
# It is based on https://docs.python.org/3/reference/datamodel.html, and headings refer there.
#
# All functions starting with "test_" should run and execute `print("OK")` exactly once.
# This can be checked by running validTest.py.
import sys
import os
sys.path.append(os.path.dirname(os.path.dirname((__file__))))
from testlib import expects
def SINK1(x):
pass
def SINK2(x):
pass
def SINK3(x):
pass
def SINK4(x):
pass
def OK():
print("OK")
# 3.3.7. Emulating container types
# object.__length_hint__(self)
class With_length_hint:
def __length_hint__(self):
SINK1(self)
OK()
return 0
def test_length_hint():
import operator
with_length_hint = With_length_hint() #$ arg1="SSA variable with_length_hint" func=With_length_hint.__length_hint__
operator.length_hint(with_length_hint)
# 3.3.8. Emulating numeric types
# object.__index__(self)
class With_index:
def __index__(self):
SINK1(self)
OK() # Call not found
return 0
def test_index():
import operator
with_index = With_index() #$ arg1="SSA variable with_index" func=With_index.__index__
operator.index(with_index)

View File

@@ -0,0 +1 @@
semmle-extractor-options: --max-import-depth=1 --lang=3

View File

@@ -535,21 +535,6 @@ def test_len_if():
pass
# object.__length_hint__(self)
class With_length_hint:
def __length_hint__(self):
SINK1(self)
OK() # Call not found
return 0
def test_length_hint():
import operator
with_length_hint = With_length_hint() #$ MISSING: arg1="SSA variable with_length_hint" func=With_length_hint.__length_hint__
operator.length_hint(with_length_hint)
# object.__getitem__(self, key)
class With_getitem:
def __getitem__(self, key):
@@ -1378,13 +1363,6 @@ class With_index:
return 0
def test_index():
import operator
with_index = With_index() #$ MISSING: arg1="SSA variable with_index" func=With_index.__index__
operator.index(with_index)
def test_index_slicing():
with_index = With_index() #$ MISSING: arg1="SSA variable with_index" func=With_index.__index__
[0][with_index:1]

View File

@@ -64,6 +64,8 @@ if __name__ == "__main__":
check_tests_valid("coverage.test")
check_tests_valid("coverage.argumentPassing")
check_tests_valid("coverage.datamodel")
check_tests_valid("coverage-py2.classes")
check_tests_valid("coverage-py3.classes")
check_tests_valid("variable-capture.in")
check_tests_valid("variable-capture.nonlocal")
check_tests_valid("variable-capture.dict")

View File

@@ -0,0 +1,160 @@
| attr_clash.__init__ | __file__ | attr_clash/__init__.py:6:6:6:13 | ControlFlowNode for __file__ |
| attr_clash.__init__ | __name__ | attr_clash/__init__.py:0:0:0:0 | GSSA Variable __name__ |
| attr_clash.__init__ | __package__ | attr_clash/__init__.py:0:0:0:0 | GSSA Variable __package__ |
| attr_clash.__init__ | clashing_attr | attr_clash/__init__.py:4:1:4:13 | GSSA Variable clashing_attr |
| attr_clash.__init__ | enter | attr_clash/__init__.py:2:1:2:5 | ControlFlowNode for enter |
| attr_clash.__init__ | exit | attr_clash/__init__.py:6:1:6:4 | ControlFlowNode for exit |
| attr_clash.clashing_attr | __file__ | attr_clash/clashing_attr.py:4:6:4:13 | ControlFlowNode for __file__ |
| attr_clash.clashing_attr | __name__ | attr_clash/clashing_attr.py:0:0:0:0 | GSSA Variable __name__ |
| attr_clash.clashing_attr | __package__ | attr_clash/clashing_attr.py:0:0:0:0 | GSSA Variable __package__ |
| attr_clash.clashing_attr | enter | attr_clash/clashing_attr.py:2:1:2:5 | ControlFlowNode for enter |
| attr_clash.clashing_attr | exit | attr_clash/clashing_attr.py:4:1:4:4 | ControlFlowNode for exit |
| attr_clash.non_clashing_submodule | __file__ | attr_clash/non_clashing_submodule.py:4:6:4:13 | ControlFlowNode for __file__ |
| attr_clash.non_clashing_submodule | __name__ | attr_clash/non_clashing_submodule.py:0:0:0:0 | GSSA Variable __name__ |
| attr_clash.non_clashing_submodule | __package__ | attr_clash/non_clashing_submodule.py:0:0:0:0 | GSSA Variable __package__ |
| attr_clash.non_clashing_submodule | enter | attr_clash/non_clashing_submodule.py:2:1:2:5 | ControlFlowNode for enter |
| attr_clash.non_clashing_submodule | exit | attr_clash/non_clashing_submodule.py:4:1:4:4 | ControlFlowNode for exit |
| bar | __file__ | bar.py:6:6:6:13 | ControlFlowNode for __file__ |
| bar | __name__ | bar.py:0:0:0:0 | GSSA Variable __name__ |
| bar | __package__ | bar.py:0:0:0:0 | GSSA Variable __package__ |
| bar | bar_attr | bar.py:4:1:4:8 | GSSA Variable bar_attr |
| bar | enter | bar.py:2:1:2:5 | ControlFlowNode for enter |
| bar | exit | bar.py:6:1:6:4 | ControlFlowNode for exit |
| baz | __file__ | baz.py:6:6:6:13 | ControlFlowNode for __file__ |
| baz | __name__ | baz.py:0:0:0:0 | GSSA Variable __name__ |
| baz | __package__ | baz.py:0:0:0:0 | GSSA Variable __package__ |
| baz | baz_attr | baz.py:4:1:4:8 | GSSA Variable baz_attr |
| baz | enter | baz.py:2:1:2:5 | ControlFlowNode for enter |
| baz | exit | baz.py:6:1:6:4 | ControlFlowNode for exit |
| block_flow_check | SOURCE | block_flow_check.py:12:25:12:30 | ControlFlowNode for SOURCE |
| block_flow_check | __file__ | block_flow_check.py:14:6:14:13 | ControlFlowNode for __file__ |
| block_flow_check | __name__ | block_flow_check.py:0:0:0:0 | GSSA Variable __name__ |
| block_flow_check | __package__ | block_flow_check.py:0:0:0:0 | GSSA Variable __package__ |
| block_flow_check | check | block_flow_check.py:12:1:12:5 | ControlFlowNode for check |
| block_flow_check | enter | block_flow_check.py:2:1:2:5 | ControlFlowNode for enter |
| block_flow_check | exit | block_flow_check.py:14:1:14:4 | ControlFlowNode for exit |
| block_flow_check | globals | block_flow_check.py:12:33:12:39 | ControlFlowNode for globals |
| block_flow_check | object | block_flow_check.py:4:14:4:19 | ControlFlowNode for object |
| block_flow_check | staticmethod | block_flow_check.py:0:0:0:0 | GSSA Variable staticmethod |
| foo | __file__ | foo.py:14:6:14:13 | ControlFlowNode for __file__ |
| foo | __name__ | foo.py:0:0:0:0 | GSSA Variable __name__ |
| foo | __package__ | foo.py:0:0:0:0 | GSSA Variable __package__ |
| foo | __private_foo_attr | foo.py:8:1:8:18 | GSSA Variable __private_foo_attr |
| foo | bar_reexported | foo.py:11:8:11:10 | ControlFlowNode for ImportExpr |
| foo | bar_reexported | foo.py:12:34:12:47 | ControlFlowNode for bar_reexported |
| foo | check | foo.py:12:1:12:5 | ControlFlowNode for check |
| foo | enter | foo.py:2:1:2:5 | ControlFlowNode for enter |
| foo | exit | foo.py:14:1:14:4 | ControlFlowNode for exit |
| foo | foo_attr | foo.py:5:1:5:8 | GSSA Variable foo_attr |
| foo | globals | foo.py:12:71:12:77 | ControlFlowNode for globals |
| generous_export | Exception | generous_export.py:16:11:16:19 | ControlFlowNode for Exception |
| generous_export | SOURCE | generous_export.py:15:11:15:16 | ControlFlowNode for SOURCE |
| generous_export | SOURCE | generous_export.py:20:25:20:30 | ControlFlowNode for SOURCE |
| generous_export | __file__ | generous_export.py:22:6:22:13 | ControlFlowNode for __file__ |
| generous_export | __name__ | generous_export.py:0:0:0:0 | GSSA Variable __name__ |
| generous_export | __package__ | generous_export.py:0:0:0:0 | GSSA Variable __package__ |
| generous_export | check | generous_export.py:20:1:20:5 | ControlFlowNode for check |
| generous_export | enter | generous_export.py:2:1:2:5 | ControlFlowNode for enter |
| generous_export | eval | generous_export.py:10:4:10:7 | ControlFlowNode for eval |
| generous_export | exit | generous_export.py:22:1:22:4 | ControlFlowNode for exit |
| generous_export | globals | generous_export.py:20:33:20:39 | ControlFlowNode for globals |
| generous_export | object | generous_export.py:4:14:4:19 | ControlFlowNode for object |
| generous_export | print | generous_export.py:15:5:15:9 | ControlFlowNode for print |
| generous_export | staticmethod | generous_export.py:0:0:0:0 | GSSA Variable staticmethod |
| has_defined_all | __all__ | has_defined_all.py:7:1:7:7 | GSSA Variable __all__ |
| has_defined_all | __file__ | has_defined_all.py:9:6:9:13 | ControlFlowNode for __file__ |
| has_defined_all | __name__ | has_defined_all.py:0:0:0:0 | GSSA Variable __name__ |
| has_defined_all | __package__ | has_defined_all.py:0:0:0:0 | GSSA Variable __package__ |
| has_defined_all | all_defined_bar | has_defined_all.py:5:1:5:15 | GSSA Variable all_defined_bar |
| has_defined_all | all_defined_foo | has_defined_all.py:4:1:4:15 | GSSA Variable all_defined_foo |
| has_defined_all | enter | has_defined_all.py:2:1:2:5 | ControlFlowNode for enter |
| has_defined_all | exit | has_defined_all.py:9:1:9:4 | ControlFlowNode for exit |
| has_defined_all_copy | __all__ | has_defined_all_copy.py:9:1:9:7 | GSSA Variable __all__ |
| has_defined_all_copy | __file__ | has_defined_all_copy.py:11:6:11:13 | ControlFlowNode for __file__ |
| has_defined_all_copy | __name__ | has_defined_all_copy.py:0:0:0:0 | GSSA Variable __name__ |
| has_defined_all_copy | __package__ | has_defined_all_copy.py:0:0:0:0 | GSSA Variable __package__ |
| has_defined_all_copy | all_defined_bar_copy | has_defined_all_copy.py:7:1:7:20 | GSSA Variable all_defined_bar_copy |
| has_defined_all_copy | all_defined_foo_copy | has_defined_all_copy.py:6:1:6:20 | GSSA Variable all_defined_foo_copy |
| has_defined_all_copy | enter | has_defined_all_copy.py:4:1:4:5 | ControlFlowNode for enter |
| has_defined_all_copy | exit | has_defined_all_copy.py:11:1:11:4 | ControlFlowNode for exit |
| has_defined_all_indirection | __file__ | has_defined_all_indirection.py:6:6:6:13 | ControlFlowNode for __file__ |
| has_defined_all_indirection | __name__ | has_defined_all_indirection.py:0:0:0:0 | GSSA Variable __name__ |
| has_defined_all_indirection | __package__ | has_defined_all_indirection.py:0:0:0:0 | GSSA Variable __package__ |
| has_defined_all_indirection | all_defined_foo_copy | has_defined_all_copy.py:6:1:6:20 | GSSA Variable all_defined_foo_copy |
| has_defined_all_indirection | enter | has_defined_all_indirection.py:2:1:2:5 | ControlFlowNode for enter |
| has_defined_all_indirection | exit | has_defined_all_indirection.py:6:1:6:4 | ControlFlowNode for exit |
| if_then_else | __file__ | if_then_else.py:16:6:16:13 | ControlFlowNode for __file__ |
| if_then_else | __name__ | if_then_else.py:0:0:0:0 | GSSA Variable __name__ |
| if_then_else | __package__ | if_then_else.py:0:0:0:0 | GSSA Variable __package__ |
| if_then_else | enter | if_then_else.py:2:1:2:5 | ControlFlowNode for enter |
| if_then_else | eval | if_then_else.py:11:8:11:11 | ControlFlowNode for eval |
| if_then_else | exit | if_then_else.py:16:1:16:4 | ControlFlowNode for exit |
| if_then_else | if_then_else_defined | if_then_else.py:7:5:7:24 | GSSA Variable if_then_else_defined |
| if_then_else | if_then_else_defined | if_then_else.py:12:9:12:28 | GSSA Variable if_then_else_defined |
| if_then_else | if_then_else_defined | if_then_else.py:14:9:14:28 | GSSA Variable if_then_else_defined |
| if_then_else_refined | SOURCE | if_then_else_refined.py:11:11:11:16 | ControlFlowNode for SOURCE |
| if_then_else_refined | SOURCE | if_then_else_refined.py:13:11:13:16 | ControlFlowNode for SOURCE |
| if_then_else_refined | __file__ | if_then_else_refined.py:19:6:19:13 | ControlFlowNode for __file__ |
| if_then_else_refined | __name__ | if_then_else_refined.py:0:0:0:0 | GSSA Variable __name__ |
| if_then_else_refined | __package__ | if_then_else_refined.py:0:0:0:0 | GSSA Variable __package__ |
| if_then_else_refined | check | if_then_else_refined.py:17:1:17:5 | ControlFlowNode for check |
| if_then_else_refined | enter | if_then_else_refined.py:4:1:4:5 | ControlFlowNode for enter |
| if_then_else_refined | eval | if_then_else_refined.py:10:4:10:7 | ControlFlowNode for eval |
| if_then_else_refined | exit | if_then_else_refined.py:19:1:19:4 | ControlFlowNode for exit |
| if_then_else_refined | globals | if_then_else_refined.py:17:24:17:30 | ControlFlowNode for globals |
| if_then_else_refined | src | if_then_else_refined.py:17:19:17:21 | ControlFlowNode for src |
| package.__init__ | __file__ | package/__init__.py:7:6:7:13 | ControlFlowNode for __file__ |
| package.__init__ | __name__ | package/__init__.py:0:0:0:0 | GSSA Variable __name__ |
| package.__init__ | __package__ | package/__init__.py:0:0:0:0 | GSSA Variable __package__ |
| package.__init__ | attr_used_in_subpackage | package/__init__.py:4:1:4:23 | GSSA Variable attr_used_in_subpackage |
| package.__init__ | enter | package/__init__.py:2:1:2:5 | ControlFlowNode for enter |
| package.__init__ | exit | package/__init__.py:7:1:7:4 | ControlFlowNode for exit |
| package.__init__ | package_attr | package/__init__.py:5:1:5:12 | GSSA Variable package_attr |
| package.subpackage2.__init__ | __file__ | package/subpackage2/__init__.py:6:6:6:13 | ControlFlowNode for __file__ |
| package.subpackage2.__init__ | __name__ | package/subpackage2/__init__.py:0:0:0:0 | GSSA Variable __name__ |
| package.subpackage2.__init__ | __package__ | package/subpackage2/__init__.py:0:0:0:0 | GSSA Variable __package__ |
| package.subpackage2.__init__ | enter | package/subpackage2/__init__.py:2:1:2:5 | ControlFlowNode for enter |
| package.subpackage2.__init__ | exit | package/subpackage2/__init__.py:6:1:6:4 | ControlFlowNode for exit |
| package.subpackage2.__init__ | subpackage2_attr | package/subpackage2/__init__.py:4:1:4:16 | GSSA Variable subpackage2_attr |
| package.subpackage.__init__ | __file__ | package/subpackage/__init__.py:14:6:14:13 | ControlFlowNode for __file__ |
| package.subpackage.__init__ | __name__ | package/subpackage/__init__.py:0:0:0:0 | GSSA Variable __name__ |
| package.subpackage.__init__ | __package__ | package/subpackage/__init__.py:0:0:0:0 | GSSA Variable __package__ |
| package.subpackage.__init__ | check | package/subpackage/__init__.py:12:1:12:5 | ControlFlowNode for check |
| package.subpackage.__init__ | enter | package/subpackage/__init__.py:2:1:2:5 | ControlFlowNode for enter |
| package.subpackage.__init__ | exit | package/subpackage/__init__.py:14:1:14:4 | ControlFlowNode for exit |
| package.subpackage.__init__ | globals | package/subpackage/__init__.py:12:79:12:85 | ControlFlowNode for globals |
| package.subpackage.__init__ | imported_attr | package/subpackage/__init__.py:7:16:7:55 | ControlFlowNode for ImportMember |
| package.subpackage.__init__ | imported_attr | package/subpackage/__init__.py:8:24:8:36 | ControlFlowNode for imported_attr |
| package.subpackage.__init__ | irrelevant_attr | package/subpackage/__init__.py:11:24:11:38 | ControlFlowNode for ImportMember |
| package.subpackage.__init__ | irrelevant_attr | package/subpackage/__init__.py:11:24:11:38 | GSSA Variable irrelevant_attr |
| package.subpackage.__init__ | submodule | package/subpackage/__init__.py:12:35:12:43 | ControlFlowNode for submodule |
| package.subpackage.__init__ | subpackage_attr | package/subpackage/__init__.py:4:1:4:15 | GSSA Variable subpackage_attr |
| package.subpackage.submodule | __file__ | package/subpackage/submodule.py:7:6:7:13 | ControlFlowNode for __file__ |
| package.subpackage.submodule | __name__ | package/subpackage/submodule.py:0:0:0:0 | GSSA Variable __name__ |
| package.subpackage.submodule | __package__ | package/subpackage/submodule.py:0:0:0:0 | GSSA Variable __package__ |
| package.subpackage.submodule | enter | package/subpackage/submodule.py:2:1:2:5 | ControlFlowNode for enter |
| package.subpackage.submodule | exit | package/subpackage/submodule.py:7:1:7:4 | ControlFlowNode for exit |
| package.subpackage.submodule | irrelevant_attr | package/subpackage/submodule.py:5:1:5:15 | GSSA Variable irrelevant_attr |
| package.subpackage.submodule | submodule_attr | package/subpackage/submodule.py:4:1:4:14 | GSSA Variable submodule_attr |
| refined | SOURCE | refined.py:12:25:12:30 | ControlFlowNode for SOURCE |
| refined | __file__ | refined.py:14:6:14:13 | ControlFlowNode for __file__ |
| refined | __name__ | refined.py:0:0:0:0 | GSSA Variable __name__ |
| refined | __package__ | refined.py:0:0:0:0 | GSSA Variable __package__ |
| refined | check | refined.py:12:1:12:5 | ControlFlowNode for check |
| refined | enter | refined.py:2:1:2:5 | ControlFlowNode for enter |
| refined | exit | refined.py:14:1:14:4 | ControlFlowNode for exit |
| refined | globals | refined.py:12:33:12:39 | ControlFlowNode for globals |
| refined | object | refined.py:4:14:4:19 | ControlFlowNode for object |
| simplistic_reexport | __file__ | simplistic_reexport.py:19:6:19:13 | ControlFlowNode for __file__ |
| simplistic_reexport | __name__ | simplistic_reexport.py:0:0:0:0 | GSSA Variable __name__ |
| simplistic_reexport | __package__ | simplistic_reexport.py:0:0:0:0 | GSSA Variable __package__ |
| simplistic_reexport | bar_attr | simplistic_reexport.py:6:17:6:24 | ControlFlowNode for ImportMember |
| simplistic_reexport | bar_attr | simplistic_reexport.py:10:19:10:26 | ControlFlowNode for bar_attr |
| simplistic_reexport | baz_attr | baz.py:4:1:4:8 | GSSA Variable baz_attr |
| simplistic_reexport | baz_attr | simplistic_reexport.py:17:19:17:26 | ControlFlowNode for baz_attr |
| simplistic_reexport | check | simplistic_reexport.py:17:1:17:5 | ControlFlowNode for check |
| simplistic_reexport | enter | baz.py:2:1:2:5 | ControlFlowNode for enter |
| simplistic_reexport | enter | simplistic_reexport.py:4:1:4:5 | ControlFlowNode for enter |
| simplistic_reexport | exit | baz.py:6:1:6:4 | ControlFlowNode for exit |
| simplistic_reexport | exit | simplistic_reexport.py:19:1:19:4 | ControlFlowNode for exit |
| simplistic_reexport | globals | simplistic_reexport.py:17:44:17:50 | ControlFlowNode for globals |

View File

@@ -0,0 +1,17 @@
import python
import semmle.python.dataflow.new.DataFlow
import semmle.python.dataflow.new.internal.ImportResolution
from Module m, string name, DataFlow::Node defn
where
ImportResolution::module_export(m, name, defn) and
exists(m.getLocation().getFile().getRelativePath()) and
not defn.getScope() = any(Module trace | trace.getName() = "trace") and
not m.getName() = "main" and
// Since we test on both Python 2 and Python 3, but `namespace_package` is not allowed
// on Python 2 because of the missing `__init__.py` files, we remove those results
// from Python 3 tests as well. One alternative is to only run these tests under
// Python 3, but that does not seems like a good solution -- we could easily miss a
// Python 2 only regression then :O
not m.getName() = "namespace_package.namespace_module"
select m.getName(), name, defn

View File

@@ -0,0 +1,6 @@
from trace import *
enter(__file__)
baz_attr = "baz_attr"
exit(__file__)

View File

@@ -0,0 +1,14 @@
from trace import *
enter(__file__)
class SOURCE(object):
@staticmethod
def block_flow(): pass
check("SOURCE", SOURCE, SOURCE, globals()) #$ prints=SOURCE
SOURCE.block_flow()
check("SOURCE", SOURCE, SOURCE, globals())
exit(__file__)

View File

@@ -0,0 +1,22 @@
from trace import *
enter(__file__)
class SOURCE(object):
@staticmethod
def block_flow(): pass
check("SOURCE", SOURCE, SOURCE, globals()) #$ prints=SOURCE
if eval("False"):
# With our current import resolution, this value for SOURCE will be considered to be
# a valid value at the end of this module, because it's the end of a use-use flow.
# This is clearly wrong, so our import resolution is a bit too generous on what is
# exported
print(SOURCE)
raise Exception()
SOURCE.block_flow()
check("SOURCE", SOURCE, SOURCE, globals())
exit(__file__)

View File

@@ -0,0 +1,9 @@
from trace import *
enter(__file__)
all_defined_foo = "all_defined_foo"
all_defined_bar = "all_defined_bar"
__all__ = ["all_defined_foo"]
exit(__file__)

View File

@@ -0,0 +1,11 @@
# a copy of `has_defined_all.py` that is imported by `has_defined_all_indirection.py`
# with its' own names such that we can check both `import *` without any cross-talk
from trace import *
enter(__file__)
all_defined_foo_copy = "all_defined_foo_copy"
all_defined_bar_copy = "all_defined_bar_copy"
__all__ = ["all_defined_foo_copy"]
exit(__file__)

View File

@@ -0,0 +1,6 @@
from trace import *
enter(__file__)
from has_defined_all_copy import *
exit(__file__)

View File

@@ -0,0 +1,19 @@
# combination of refined and if_then_else
from trace import *
enter(__file__)
class SOURCE(): pass
# definition based on "random" choice in this case it will always go the the if-branch,
# but our analysis is not able to figure this out
if eval("True"):
src = SOURCE
else:
src = SOURCE
src.foo = 42
check("src", src, src, globals()) #$ prints=SOURCE
exit(__file__)

View File

@@ -11,6 +11,9 @@ private class SourceString extends DataFlow::Node {
SourceString() {
this.asExpr().(StrConst).getText() = contents and
this.asExpr().getParent() instanceof Assign
or
this.asExpr().(ClassExpr).getInnerScope().getName() = "SOURCE" and
contents = "SOURCE"
}
string getContents() { result = contents }
@@ -63,6 +66,10 @@ private class ImportConfiguration extends DataFlow::Configuration {
override predicate isSink(DataFlow::Node sink) {
sink = API::moduleImport("trace").getMember("check").getACall().getArg(1)
}
override predicate isBarrier(DataFlow::Node node) {
exists(DataFlow::MethodCallNode call | call.calls(node, "block_flow"))
}
}
class ResolutionTest extends InlineExpectationsTest {

View File

@@ -84,6 +84,52 @@ from attr_clash import clashing_attr, non_clashing_submodule #$ imports=attr_cla
check("clashing_attr", clashing_attr, "clashing_attr", globals()) #$ prints=clashing_attr SPURIOUS: prints="<module attr_clash.clashing_attr>"
check("non_clashing_submodule", non_clashing_submodule, "<module attr_clash.non_clashing_submodule>", globals()) #$ prints="<module attr_clash.non_clashing_submodule>"
import attr_clash.clashing_attr as _doesnt_matter #$ imports=attr_clash.clashing_attr as=_doesnt_matter
from attr_clash import clashing_attr, non_clashing_submodule #$ imports=attr_clash.clashing_attr as=clashing_attr imports=attr_clash.non_clashing_submodule as=non_clashing_submodule
check("clashing_attr", clashing_attr, "<module attr_clash.clashing_attr>", globals()) #$ prints="<module attr_clash.clashing_attr>" SPURIOUS: prints=clashing_attr
# check that import * only imports the __all__ attributes
from has_defined_all import *
check("all_defined_foo", all_defined_foo, "all_defined_foo", globals()) #$ prints=all_defined_foo
try:
check("all_defined_bar", all_defined_bar, "all_defined_bar", globals()) #$ SPURIOUS: prints=all_defined_bar
raise Exception("Did not get expected NameError")
except NameError as e:
if "all_defined_bar" in str(e):
print("Got expected NameError:", e)
else:
raise
import has_defined_all # $ imports=has_defined_all as=has_defined_all
check("has_defined_all.all_defined_foo", has_defined_all.all_defined_foo, "all_defined_foo", globals()) #$ prints=all_defined_foo
check("has_defined_all.all_defined_bar", has_defined_all.all_defined_bar, "all_defined_bar", globals()) #$ prints=all_defined_bar
# same check as above, but going through one level of indirection (which can make a difference)
from has_defined_all_indirection import *
check("all_defined_foo_copy", all_defined_foo_copy, "all_defined_foo_copy", globals()) #$ prints=all_defined_foo_copy
try:
check("all_defined_bar_copy", all_defined_bar_copy, "all_defined_bar_copy", globals()) #$ SPURIOUS: prints=all_defined_bar_copy
raise Exception("Did not get expected NameError")
except NameError as e:
if "all_defined_bar_copy" in str(e):
print("Got expected NameError:", e)
else:
raise
# same check as above, but going through one level of indirection (which can make a difference)
import has_defined_all_indirection # $ imports=has_defined_all_indirection as=has_defined_all_indirection
check("has_defined_all_indirection.all_defined_foo_copy", has_defined_all_indirection.all_defined_foo_copy, "all_defined_foo_copy", globals()) #$ prints=all_defined_foo_copy
try:
check("has_defined_all_indirection.all_defined_bar_copy", has_defined_all_indirection.all_defined_bar_copy, "all_defined_bar_copy", globals())
raise Exception("Did not get expected AttributeError")
except AttributeError as e:
if "all_defined_bar_copy" in str(e):
print("Got expected AttributeError:", e)
else:
raise
# check that import * from an __init__ file works
from package.subpackage2 import *
@@ -93,6 +139,25 @@ check("subpackage2_attr", subpackage2_attr, "subpackage2_attr", globals()) #$ pr
from if_then_else import if_then_else_defined
check("if_then_else_defined", if_then_else_defined, "if_defined", globals()) #$ prints=if_defined prints=else_defined_1 prints=else_defined_2
# check that refined definitions are handled correctly
import refined # $ imports=refined as=refined
check("refined.SOURCE", refined.SOURCE, refined.SOURCE, globals()) #$ prints=SOURCE
import if_then_else_refined # $ imports=if_then_else_refined as=if_then_else_refined
check("if_then_else_refined.src", if_then_else_refined.src, if_then_else_refined.src, globals()) #$ prints=SOURCE
import simplistic_reexport # $ imports=simplistic_reexport as=simplistic_reexport
check("simplistic_reexport.bar_attr", simplistic_reexport.bar_attr, "overwritten", globals()) #$ prints=overwritten SPURIOUS: prints=bar_attr
check("simplistic_reexport.baz_attr", simplistic_reexport.baz_attr, "overwritten", globals()) #$ prints=overwritten SPURIOUS: prints=baz_attr
# check that we don't treat all assignments as being exports
import block_flow_check #$ imports=block_flow_check as=block_flow_check
check("block_flow_check.SOURCE", block_flow_check.SOURCE, block_flow_check.SOURCE, globals())
# show that import resolution is a bit too generous with definitions
import generous_export #$ imports=generous_export as=generous_export
check("generous_export.SOURCE", generous_export.SOURCE, generous_export.SOURCE, globals()) #$ SPURIOUS: prints=SOURCE
exit(__file__)
print()

View File

@@ -0,0 +1,14 @@
from trace import *
enter(__file__)
class SOURCE(object): pass
check("SOURCE", SOURCE, SOURCE, globals()) #$ prints=SOURCE
SOURCE.foo = 42
SOURCE.bar = 43
SOURCE.baz = 44
check("SOURCE", SOURCE, SOURCE, globals()) #$ prints=SOURCE
exit(__file__)

View File

@@ -0,0 +1,19 @@
# we might consider anything imported to also be exported, but this is not the case
from trace import *
enter(__file__)
from bar import bar_attr
check("bar_attr", bar_attr, "bar_attr", globals()) #$ prints=bar_attr
bar_attr = "overwritten"
check("bar_attr", bar_attr, "overwritten", globals()) #$ prints=overwritten
from baz import *
check("baz_attr", baz_attr, "baz_attr", globals()) #$ MISSING: prints=baz_attr
baz_attr = "overwritten"
check("baz_attr", baz_attr, "overwritten", globals()) #$ prints=overwritten
exit(__file__)

View File

@@ -10,6 +10,12 @@
| InsecureProtocol.py:19:1:19:19 | ControlFlowNode for Attribute() | Insecure SSL/TLS protocol version SSLv2 specified by $@. | InsecureProtocol.py:19:1:19:19 | ControlFlowNode for Attribute() | call to SSL.Context |
| InsecureProtocol.py:23:1:23:43 | ControlFlowNode for Attribute() | Insecure SSL/TLS protocol version SSLv2 specified by $@. | InsecureProtocol.py:23:1:23:43 | ControlFlowNode for Attribute() | call to ssl.wrap_socket |
| InsecureProtocol.py:24:1:24:35 | ControlFlowNode for SSLContext() | Insecure SSL/TLS protocol version SSLv2 specified by $@. | InsecureProtocol.py:24:1:24:35 | ControlFlowNode for SSLContext() | call to SSLContext |
| import_all_one_file.py:25:14:25:45 | ControlFlowNode for copy_completely_insecure_context | Insecure SSL/TLS protocol version TLSv1 allowed by $@. | import_all_one_file.py:9:36:9:67 | ControlFlowNode for Attribute() | call to ssl.SSLContext |
| import_all_one_file.py:25:14:25:45 | ControlFlowNode for copy_completely_insecure_context | Insecure SSL/TLS protocol version TLSv1_1 allowed by $@. | import_all_one_file.py:9:36:9:67 | ControlFlowNode for Attribute() | call to ssl.SSLContext |
| import_all_one_file.py:29:14:29:39 | ControlFlowNode for copy_also_insecure_context | Insecure SSL/TLS protocol version TLSv1_1 allowed by $@. | import_all_one_file.py:12:30:12:61 | ControlFlowNode for Attribute() | call to ssl.SSLContext |
| import_use.py:13:14:13:40 | ControlFlowNode for completely_insecure_context | Insecure SSL/TLS protocol version TLSv1 allowed by $@. | import_def.py:7:31:7:62 | ControlFlowNode for Attribute() | call to ssl.SSLContext |
| import_use.py:13:14:13:40 | ControlFlowNode for completely_insecure_context | Insecure SSL/TLS protocol version TLSv1_1 allowed by $@. | import_def.py:7:31:7:62 | ControlFlowNode for Attribute() | call to ssl.SSLContext |
| import_use.py:17:14:17:34 | ControlFlowNode for also_insecure_context | Insecure SSL/TLS protocol version TLSv1_1 allowed by $@. | import_def.py:10:25:10:56 | ControlFlowNode for Attribute() | call to ssl.SSLContext |
| pyOpenSSL_fluent.py:8:27:8:33 | ControlFlowNode for context | Insecure SSL/TLS protocol version SSLv2 allowed by $@. | pyOpenSSL_fluent.py:6:15:6:44 | ControlFlowNode for Attribute() | call to SSL.Context |
| pyOpenSSL_fluent.py:8:27:8:33 | ControlFlowNode for context | Insecure SSL/TLS protocol version SSLv3 allowed by $@. | pyOpenSSL_fluent.py:6:15:6:44 | ControlFlowNode for Attribute() | call to SSL.Context |
| pyOpenSSL_fluent.py:18:27:18:33 | ControlFlowNode for context | Insecure SSL/TLS protocol version SSLv2 allowed by $@. | pyOpenSSL_fluent.py:15:15:15:44 | ControlFlowNode for Attribute() | call to SSL.Context |

View File

@@ -0,0 +1,30 @@
# use to compare alerts without import
import ssl
copy_secure_context = ssl.SSLContext(ssl.PROTOCOL_TLS)
copy_secure_context.options |= ssl.OP_NO_TLSv1 | ssl.OP_NO_TLSv1_1
# this is just to allow us to see how un-altered exports work
copy_completely_insecure_context = ssl.SSLContext(ssl.PROTOCOL_TLS)
# and an insecure export that is refined
copy_also_insecure_context = ssl.SSLContext(ssl.PROTOCOL_TLS)
copy_also_insecure_context.options |= ssl.OP_NO_TLSv1
import socket
hostname = 'www.python.org'
with socket.create_connection((hostname, 443)) as sock:
with copy_secure_context.wrap_socket(sock, server_hostname=hostname) as ssock:
print(ssock.version())
with socket.create_connection((hostname, 443)) as sock:
with copy_completely_insecure_context.wrap_socket(sock, server_hostname=hostname) as ssock:
print(ssock.version())
with socket.create_connection((hostname, 443)) as sock:
with copy_also_insecure_context.wrap_socket(sock, server_hostname=hostname) as ssock:
print(ssock.version())

View File

@@ -0,0 +1,11 @@
import ssl
secure_context = ssl.SSLContext(ssl.PROTOCOL_TLS)
secure_context.options |= ssl.OP_NO_TLSv1 | ssl.OP_NO_TLSv1_1
# this is just to allow us to see how un-altered exports work
completely_insecure_context = ssl.SSLContext(ssl.PROTOCOL_TLS)
# and an insecure export that is refined
also_insecure_context = ssl.SSLContext(ssl.PROTOCOL_TLS)
also_insecure_context.options |= ssl.OP_NO_TLSv1

View File

@@ -0,0 +1,18 @@
# check that query works properly with imports
import socket
from import_def import secure_context, completely_insecure_context, also_insecure_context
hostname = 'www.python.org'
with socket.create_connection((hostname, 443)) as sock:
with secure_context.wrap_socket(sock, server_hostname=hostname) as ssock:
print(ssock.version())
with socket.create_connection((hostname, 443)) as sock:
with completely_insecure_context.wrap_socket(sock, server_hostname=hostname) as ssock:
print(ssock.version())
with socket.create_connection((hostname, 443)) as sock:
with also_insecure_context.wrap_socket(sock, server_hostname=hostname) as ssock:
print(ssock.version())