Python points-to: Add objects representing missing modules and their attributes.

This commit is contained in:
Mark Shannon
2019-04-26 15:53:39 +01:00
parent b10a7cd3a4
commit bf78c62594
19 changed files with 252 additions and 18 deletions

View File

@@ -250,3 +250,156 @@ class PythonModuleObjectInternal extends ModuleObjectInternal, TPythonModule {
}
class AbsentModuleObjectInternal extends ModuleObjectInternal, TAbsentModule {
override Builtin getBuiltin() {
none()
}
override string toString() {
if exists(Module m, SyntaxError se | se.getFile() = m.getFile() and m.getName() = this.getName()) then
result = "Unparsable module " + this.getName()
else
result = "Missing module " + this.getName()
}
override string getName() {
this = TAbsentModule(result)
}
override predicate introduced(ControlFlowNode node, PointsToContext context) {
missing_imported_module(node, context, this.getName())
}
override ClassDecl getClassDeclaration() {
none()
}
override Module getSourceModule() {
none()
}
PythonModuleObjectInternal getInitModule() {
none()
}
override int intValue() {
none()
}
override string strValue() {
none()
}
override predicate calleeAndOffset(Function scope, int paramOffset) {
none()
}
pragma [noinline] override predicate attribute(string name, ObjectInternal value, CfgOrigin origin) {
value = TAbsentModuleAttribute(this, name) and origin = CfgOrigin::unknown()
}
pragma [noinline] override predicate attributesUnknown() { none() }
override ControlFlowNode getOrigin() {
none()
}
override predicate isMissing() {
any()
}
}
class AbsentModuleAttributeObjectInternal extends ObjectInternal, TAbsentModuleAttribute {
override Builtin getBuiltin() {
none()
}
override string toString() {
exists(ModuleObjectInternal mod, string name |
this = TAbsentModuleAttribute(mod, name) and
result = "Missing module attribute " + mod.getName() + "." + name
)
}
override predicate introduced(ControlFlowNode node, PointsToContext context) {
exists(ModuleObjectInternal mod, string name |
this = TAbsentModuleAttribute(mod, name) |
PointsToInternal::pointsTo(node.(AttrNode).getObject(name), context, mod, _)
or
PointsToInternal::pointsTo(node.(ImportMemberNode).getModule(name), context, mod, _)
)
}
override ClassDecl getClassDeclaration() {
none()
}
PythonModuleObjectInternal getInitModule() {
none()
}
override int intValue() {
none()
}
override string strValue() {
none()
}
override predicate calleeAndOffset(Function scope, int paramOffset) {
none()
}
pragma [noinline] override predicate attribute(string name, ObjectInternal value, CfgOrigin origin) {
none()
}
pragma [noinline] override predicate attributesUnknown() { any() }
override ControlFlowNode getOrigin() {
none()
}
override predicate callResult(ObjectInternal obj, CfgOrigin origin) {
// Don't know, assume not callable.
none()
}
override predicate callResult(PointsToContext callee, ObjectInternal obj, CfgOrigin origin) {
// Don't know, assume not callable.
none()
}
override boolean isClass() { result = maybe() }
override boolean isComparable() { result = false }
override boolean booleanValue() {
result = maybe()
}
override ObjectInternal getClass() {
result = ObjectInternal::unknownClass()
}
override boolean isDescriptor() { result = false }
override predicate descriptorGetClass(ObjectInternal cls, ObjectInternal value, CfgOrigin origin) { none() }
override predicate descriptorGetInstance(ObjectInternal instance, ObjectInternal value, CfgOrigin origin) { none() }
override predicate binds(ObjectInternal instance, string name, ObjectInternal descriptor) { none() }
override int length() { none() }
override predicate subscriptUnknown() { any() }
override predicate isMissing() {
any()
}
}

View File

@@ -56,6 +56,15 @@ class Value extends TObject {
predicate isBuiltin() {
this.(ObjectInternal).isBuiltin()
}
/** Holds if this value represents an entity that is inferred to exist,
* but missing from the database.
* Most commonly, this is a module that is imported, but wasn't present during extraction.
*/
predicate isMissing() {
this.(ObjectInternal).isMissing()
}
}
class ModuleValue extends Value {

View File

@@ -111,6 +111,12 @@ class ObjectInternal extends TObject {
predicate functionAndOffset(CallableObjectInternal function, int offset) { none() }
/** Holds if this 'object' represents an entity that is inferred to exist
* but is missing from the database */
predicate isMissing() {
none()
}
}

View File

@@ -39,7 +39,9 @@ newtype TObject =
exists(moduleNameFromFile(f))
}
or
TPythonModule(Module m) { not m.isPackage() }
TPythonModule(Module m) {
not m.isPackage() and not exists(SyntaxError se | se.getFile() = m.getFile())
}
or
TTrue()
or
@@ -160,6 +162,23 @@ newtype TObject =
}
or
TSysVersionInfo()
or
TAbsentModule(string name) {
missing_imported_module(_, _, name)
}
or
TAbsentModuleAttribute(AbsentModuleObjectInternal mod, string attrname) {
(
PointsToInternal::pointsTo(any(AttrNode attr).getObject(attrname), _, mod, _)
or
PointsToInternal::pointsTo(any(ImportMemberNode imp).getModule(attrname), _, mod, _)
)
and
exists(string modname |
modname = mod.getName() and
not common_module_name(modname + "." + attrname)
)
}
private predicate is_power_2(int n) {
n = 1 or
@@ -327,6 +346,30 @@ private predicate neither_class_nor_static_method(Function f) {
)
}
predicate missing_imported_module(ControlFlowNode imp, Context ctx, string name) {
ctx.isImport() and imp.(ImportExprNode).getNode().getAnImportedModuleName() = name and
(
not exists(Module m | m.getName() = name) and
not exists(Builtin b | b.isModule() and b.getName() = name)
or
exists(Module m, SyntaxError se |
m.getName() = name and
se.getFile() = m.getFile()
)
)
or
exists(AbsentModuleObjectInternal mod |
PointsToInternal::pointsTo(imp.(ImportMemberNode).getModule(name), ctx, mod, _) and
common_module_name(mod.getName() + "." + name)
)
}
predicate common_module_name(string name) {
name = "zope.interface"
or
name = "six.moves"
}
library class ClassDecl extends @py_object {
ClassDecl() {

View File

@@ -130,7 +130,7 @@ module PointsTo {
PointsToInternal::pointsTo(f, context, value, origin) and
cls = value.getClass().getSource() |
obj = value.getSource() or
not exists(value.getSource()) and obj = origin
not exists(value.getSource()) and not value.isMissing() and obj = origin
)
or
/* Backwards compatibility for *args and **kwargs */

View File

@@ -7,12 +7,8 @@ import python
import semmle.python.security.TaintTracking
import semmle.python.security.strings.Basic
private ModuleObject theTracebackModule() {
result.getName() = "traceback"
}
private FunctionObject traceback_function(string name) {
result = theTracebackModule().attr(name)
private Value traceback_function(string name) {
result = Module::named("traceback").attr(name)
}
/**

View File

@@ -0,0 +1,7 @@
| absent.py:3:8:3:11 | ControlFlowNode for ImportExpr | Missing module xxxx |
| absent.py:4:1:4:4 | ControlFlowNode for xxxx | Missing module xxxx |
| absent.py:6:6:6:9 | ControlFlowNode for ImportExpr | Missing module xxxx |
| absent.py:6:18:6:21 | ControlFlowNode for ImportMember | Missing module attribute xxxx.open |
| absent.py:8:1:8:4 | ControlFlowNode for open | Missing module attribute xxxx.open |
| absent.py:12:8:12:13 | ControlFlowNode for ImportExpr | Module module |
| absent.py:14:1:14:6 | ControlFlowNode for module | Module module |

View File

@@ -0,0 +1,9 @@
import python
import semmle.python.objects.Modules
from Value val, ControlFlowNode f
where //val = Value::named(name) and
f.pointsTo(val)
select f, val

View File

@@ -0,0 +1,14 @@
#There is no xxxx, rely on AbsentModule
import xxxx
xxxx
from xxxx import open
open()
#This is be present, so shouldn't be missing
import module
module

View File

@@ -1,2 +1,3 @@
| 41 | ControlFlowNode for func1 | Function func1 | test.py:23 |
| 42 | ControlFlowNode for func2 | Function wrapper | test.py:10 |
| 43 | ControlFlowNode for func3 | Function wrapper | test.py:17 |

View File

@@ -341,7 +341,6 @@
| 263 | ControlFlowNode for Attribute | int 3 |
| 263 | ControlFlowNode for IntegerLiteral | int 3 |
| 263 | ControlFlowNode for self | self |
| 264 | ControlFlowNode for Attribute | int 0 |
| 264 | ControlFlowNode for Attribute | int 3 |
| 264 | ControlFlowNode for self | self |
| 267 | ControlFlowNode for Derived4 | class Derived4 |

View File

@@ -344,7 +344,6 @@
| 263 | ControlFlowNode for Attribute | int 3 | builtin-class int |
| 263 | ControlFlowNode for IntegerLiteral | int 3 | builtin-class int |
| 263 | ControlFlowNode for self | self | class G |
| 264 | ControlFlowNode for Attribute | int 0 | builtin-class int |
| 264 | ControlFlowNode for Attribute | int 3 | builtin-class int |
| 264 | ControlFlowNode for self | self | class G |
| 267 | ControlFlowNode for Derived4 | class Derived4 | builtin-class type |

View File

@@ -51,8 +51,6 @@
| test.py | 314 | ControlFlowNode for b | NoneType None | 311 |
| test.py | 332 | ControlFlowNode for Attribute | NoneType None | 322 |
| test.py | 332 | ControlFlowNode for Attribute | int 4 | 322 |
| test.py | 337 | ControlFlowNode for Attribute | NoneType None | 324 |
| test.py | 337 | ControlFlowNode for Attribute | int 3 | 324 |
| test.py | 347 | ControlFlowNode for Attribute | NoneType None | 322 |
| test.py | 347 | ControlFlowNode for Attribute | int 4 | 322 |
| test.py | 357 | ControlFlowNode for g1 | float 7.0 | 356 |

View File

@@ -51,8 +51,6 @@
| test.py | 314 | ControlFlowNode for b | NoneType None | builtin-class NoneType | 311 |
| test.py | 332 | ControlFlowNode for Attribute | NoneType None | builtin-class NoneType | 322 |
| test.py | 332 | ControlFlowNode for Attribute | int 4 | builtin-class int | 322 |
| test.py | 337 | ControlFlowNode for Attribute | NoneType None | builtin-class NoneType | 324 |
| test.py | 337 | ControlFlowNode for Attribute | int 3 | builtin-class int | 324 |
| test.py | 347 | ControlFlowNode for Attribute | NoneType None | builtin-class NoneType | 322 |
| test.py | 347 | ControlFlowNode for Attribute | int 4 | builtin-class int | 322 |
| test.py | 357 | ControlFlowNode for g1 | float 7.0 | builtin-class float | 356 |

View File

@@ -1074,6 +1074,7 @@ WARNING: Predicate points_to has been deprecated and may be removed in future (P
| t_type.py:7 | ControlFlowNode for type | builtin-class type | builtin-class type | 7 | import |
| t_type.py:7 | ControlFlowNode for type() | builtin-class module | builtin-class type | 7 | import |
| t_type.py:9 | ControlFlowNode for type | builtin-class type | builtin-class type | 9 | import |
| t_type.py:9 | ControlFlowNode for type() | *UNKNOWN TYPE* | *UNKNOWN TYPE* | 9 | import |
| t_type.py:10 | ControlFlowNode for Dict | Dict | builtin-class dict | 10 | import |
| t_type.py:10 | ControlFlowNode for Tuple | Tuple | builtin-class tuple | 10 | import |
| t_type.py:10 | ControlFlowNode for object | builtin-class object | builtin-class type | 10 | import |

View File

@@ -827,6 +827,7 @@
| t_type.py:7 | ControlFlowNode for sys | import | Module sys | builtin-class module |
| t_type.py:7 | ControlFlowNode for type | import | builtin-class type | builtin-class type |
| t_type.py:7 | ControlFlowNode for type() | import | builtin-class module | builtin-class type |
| t_type.py:8 | ControlFlowNode for ImportExpr | import | Missing module module | builtin-class module |
| t_type.py:9 | ControlFlowNode for type | import | builtin-class type | builtin-class type |
| t_type.py:10 | ControlFlowNode for Dict | import | Dict | builtin-class dict |
| t_type.py:10 | ControlFlowNode for Tuple | import | (builtin-class object) | builtin-class tuple |

View File

@@ -1,2 +1,2 @@
| 0 | 53 | 53 | 100.0 |
| 1 | 3 | 39 | 7.6923076923076925 |
| 0 | 61 | 61 | 100.0 |
| 1 | 5 | 43 | 11.627906976744185 |

View File

@@ -1,2 +1,2 @@
| 0 | 53 | 53 | 100.0 |
| 1 | 3 | 39 | 7.6923076923076925 |
| 0 | 61 | 61 | 100.0 |
| 1 | 3 | 43 | 6.976744186046512 |