Python points-to: Add generic instances and handle returns for builtin functions. Move attribute lookup handling to objects.

This commit is contained in:
Mark Shannon
2019-03-21 11:19:32 +00:00
parent ce9d0f1a06
commit 84c9866c50
10 changed files with 337 additions and 87 deletions

View File

@@ -34,6 +34,13 @@ abstract class CallableObjectInternal extends ObjectInternal {
}
abstract string getName();
override predicate attribute(string name, ObjectInternal value, CfgOrigin origin) {
none()
}
override predicate attributesUnknown() { none() }
}
@@ -108,8 +115,17 @@ class BuiltinFunctionObjectInternal extends CallableObjectInternal, TBuiltinFunc
override boolean isComparable() { result = true }
override predicate callResult(PointsToContext2 callee, ObjectInternal obj, CfgOrigin origin) {
// TO DO .. Result should be be a unknown value of a known class if the return type is known or just an unknown.
none()
exists(Builtin func, ClassObjectInternal cls |
func = this.getBuiltin() and
func != Builtin::builtin("isinstance") and
func != Builtin::builtin("issubclass") and
func != Builtin::builtin("callable")
|
cls = ObjectInternal::fromBuiltin(this.getReturnType()) and
obj = TUnknownInstance(cls)
) and
origin = CfgOrigin::unknown() and
callee_for_object(callee, this)
}
override ControlFlowNode getOrigin() {
@@ -124,6 +140,30 @@ class BuiltinFunctionObjectInternal extends CallableObjectInternal, TBuiltinFunc
result = this.getBuiltin().getName()
}
Builtin getReturnType() {
exists(Builtin func |
func = this.getBuiltin() |
/* Enumerate the types of a few builtin functions, that the CPython analysis misses.
*/
func = Builtin::builtin("hex") and result = Builtin::special("str")
or
func = Builtin::builtin("oct") and result = Builtin::special("str")
or
func = Builtin::builtin("intern") and result = Builtin::special("str")
or
/* Fix a few minor inaccuracies in the CPython analysis */
ext_rettype(func, result) and not (
func = Builtin::builtin("__import__") and result = Builtin::special("NoneType")
or
func = Builtin::builtin("compile") and result = Builtin::special("NoneType")
or
func = Builtin::builtin("sum")
or
func = Builtin::builtin("filter")
)
)
}
}

View File

@@ -31,11 +31,10 @@ abstract class ClassObjectInternal extends ObjectInternal {
result = this.getClassDeclaration().getName()
}
abstract predicate attribute(string name, ObjectInternal value, CfgOrigin origin);
boolean isSpecial() {
result = Types::getMro(this).isSpecial()
}
}
class PythonClassObjectInternal extends ClassObjectInternal, TPythonClassObject {
@@ -86,6 +85,8 @@ class PythonClassObjectInternal extends ClassObjectInternal, TPythonClassObject
)
}
override predicate attributesUnknown() { none() }
override predicate callResult(PointsToContext2 callee, ObjectInternal obj, CfgOrigin origin) {
// TO DO .. Result should (in most cases) be an instance
none()
@@ -126,9 +127,12 @@ class BuiltinClassObjectInternal extends ClassObjectInternal, TBuiltinClassObjec
}
override predicate attribute(string name, ObjectInternal value, CfgOrigin origin) {
value.getBuiltin() = this.getBuiltin().getMember(name) and origin = CfgOrigin::unknown()
value = ObjectInternal::fromBuiltin(this.getBuiltin().getMember(name)) and
origin = CfgOrigin::unknown()
}
override predicate attributesUnknown() { none() }
override predicate callResult(PointsToContext2 callee, ObjectInternal obj, CfgOrigin origin) {
// TO DO .. Result should (in most cases) be an instance
none()
@@ -180,6 +184,8 @@ class UnknownClassInternal extends ClassObjectInternal, TUnknownClass {
none()
}
override predicate attributesUnknown() { any() }
}

View File

@@ -45,6 +45,12 @@ abstract class BooleanObjectInternal extends ObjectInternal {
none()
}
override predicate attribute(string name, ObjectInternal value, CfgOrigin origin) {
none()
}
override predicate attributesUnknown() { none() }
}
class TrueObjectInternal extends BooleanObjectInternal, TTrue {
@@ -149,6 +155,12 @@ class NoneObjectInternal extends ObjectInternal, TNone {
none()
}
override predicate attribute(string name, ObjectInternal value, CfgOrigin origin) {
none()
}
override predicate attributesUnknown() { none() }
}
@@ -209,6 +221,12 @@ class IntObjectInternal extends ObjectInternal, TInt {
none()
}
override predicate attribute(string name, ObjectInternal value, CfgOrigin origin) {
none()
}
override predicate attributesUnknown() { none() }
}
@@ -268,6 +286,12 @@ class StringObjectInternal extends ObjectInternal, TString {
none()
}
override predicate attribute(string name, ObjectInternal value, CfgOrigin origin) {
none()
}
override predicate attributesUnknown() { none() }
}

View File

@@ -7,7 +7,7 @@ private import semmle.python.pointsto.PointsTo2
private import semmle.python.pointsto.PointsToContext2
private import semmle.python.types.Builtins
class InstanceInternal extends TInstance, ObjectInternal {
class SpecificInstanceInternal extends TSpecificInstance, ObjectInternal {
override string toString() {
result = "instance of " + this.getClass().(ClassObjectInternal).getName()
@@ -26,7 +26,7 @@ class InstanceInternal extends TInstance, ObjectInternal {
}
override predicate introduced(ControlFlowNode node, PointsToContext2 context) {
this = TInstance(node, _, context)
this = TSpecificInstance(node, _, context)
}
/** Gets the class declaration for this object, if it is a declared class. */
@@ -39,7 +39,7 @@ class InstanceInternal extends TInstance, ObjectInternal {
override boolean isComparable() { result = false }
override ObjectInternal getClass() {
this = TInstance(_, result, _)
this = TSpecificInstance(_, result, _)
}
/** Gets the `Builtin` for this object, if any.
@@ -56,7 +56,7 @@ class InstanceInternal extends TInstance, ObjectInternal {
* exactly one result for either this method or `getBuiltin()`.
*/
override ControlFlowNode getOrigin() {
this = TInstance(result, _, _)
this = TSpecificInstance(result, _, _)
}
/** Holds if `obj` is the result of calling `this` and `origin` is
@@ -80,4 +80,92 @@ class InstanceInternal extends TInstance, ObjectInternal {
none()
}
override predicate attribute(string name, ObjectInternal value, CfgOrigin origin) {
none()
}
override predicate attributesUnknown() { any() }
}
/** Represents a value that has a known class, but no other information */
class UnknownInstanceInternal extends TUnknownInstance, ObjectInternal {
override string toString() {
result = "instance of " + this.getClass().(ClassObjectInternal).getName()
}
/** The boolean value of this object, if it has one */
override boolean booleanValue() {
//this.getClass().instancesAlways(result)
none()
}
/** Holds if this object may be true or false when evaluated as a bool */
override predicate maybe() {
// this.getClass().instancesMaybe()
any()
}
override predicate introduced(ControlFlowNode node, PointsToContext2 context) {
none()
}
/** Gets the class declaration for this object, if it is a declared class. */
override ClassDecl getClassDeclaration() {
none()
}
override boolean isClass() { result = false }
override boolean isComparable() { result = false }
override ObjectInternal getClass() {
this = TUnknownInstance(result)
}
/** Gets the `Builtin` for this object, if any.
* All objects (except unknown and undefined values) should return
* exactly one result for either this method or `getOrigin()`.
*/
override Builtin getBuiltin() {
none()
}
/** Gets a control flow node that represents the source origin of this
* objects.
* All objects (except unknown and undefined values) should return
* exactly one result for either this method or `getBuiltin()`.
*/
override ControlFlowNode getOrigin() {
none()
}
/** Holds if `obj` is the result of calling `this` and `origin` is
* the origin of `obj`.
*/
override predicate callResult(PointsToContext2 callee, ObjectInternal obj, CfgOrigin origin) {
// In general instances aren't callable, but some are...
// TO DO -- Handle cases where class overrides __call__
none()
}
override int intValue() {
none()
}
override string strValue() {
none()
}
override predicate calleeAndOffset(Function scope, int paramOffset) {
none()
}
override predicate attribute(string name, ObjectInternal value, CfgOrigin origin) {
none()
}
override predicate attributesUnknown() { any() }
}

View File

@@ -75,6 +75,13 @@ class BuiltinModuleObjectInternal extends ModuleObjectInternal, TBuiltinModuleOb
none()
}
override predicate attribute(string name, ObjectInternal value, CfgOrigin origin) {
value = ObjectInternal::fromBuiltin(this.getBuiltin().getMember(name)) and
origin = CfgOrigin::unknown()
}
override predicate attributesUnknown() { none() }
}
class PackageObjectInternal extends ModuleObjectInternal, TPackageObject {
@@ -151,6 +158,40 @@ class PackageObjectInternal extends ModuleObjectInternal, TPackageObject {
none()
}
override predicate attribute(string name, ObjectInternal value, CfgOrigin origin) {
this.getInitModule().attribute(name, value, origin)
or
// TO DO, dollar variable...
//exists(Module init |
// init = this.getSourceModule() and
// not exists(EssaVariable var | var.getAUse() = init.getANormalExit() and var.getSourceVariable().getName() = name) and
// exists(EssaVariable var, Context context |
// isModuleStateVariable(var) and var.getAUse() = init.getANormalExit() and
// context.isImport() and
// SSA::ssa_variable_named_attribute_points_to(var, context, name, undefinedVariable(), _, origin) and
// value = this.submodule(name)
// )
//)
//or
this.hasNoInitModule() and
exists(ModuleObjectInternal mod |
mod = this.submodule(name) and
value = mod |
origin = CfgOrigin::fromModule(mod)
)
}
override predicate attributesUnknown() { none() }
}
/** Get the ESSA pseudo-variable used to retain module state
* during module initialization. Module attributes are handled
* as attributes of this variable, allowing the SSA form to track
* mutations of the module during its creation.
*/
private predicate isModuleStateVariable(EssaVariable var) {
var.getName() = "$" and var.getScope() instanceof Module
}
class PythonModuleObjectInternal extends ModuleObjectInternal, TPythonModule {
@@ -205,5 +246,25 @@ class PythonModuleObjectInternal extends ModuleObjectInternal, TPythonModule {
none()
}
override predicate attribute(string name, ObjectInternal value, CfgOrigin origin) {
exists(EssaVariable var, ControlFlowNode exit, PointsToContext2 imp |
exit = this.getSourceModule().getANormalExit() and var.getAUse() = exit and
var.getSourceVariable().getName() = name and
PointsTo2::ssa_variable_points_to(var, imp, value, origin) and
imp.isImport() and
value != ObjectInternal::undefined()
)
// TO DO, dollar variable...
//or
//not exists(EssaVariable var | var.getAUse() = m.getANormalExit() and var.getSourceVariable().getName() = name) and
//exists(EssaVariable var, PointsToContext2 imp |
// var.getAUse() = m.getANormalExit() and isModuleStateVariable(var) |
// PointsTo2::ssa_variable_named_attribute_points_to(var, imp, name, obj, origin) and
// imp.isImport() and obj != ObjectInternal::undefined()
//)
}
override predicate attributesUnknown() { none() }
}

View File

@@ -0,0 +1,25 @@
import python
private import TObject
private import ObjectInternal
private import semmle.python.pointsto.PointsTo2
private import semmle.python.pointsto.PointsToContext2
class Value extends TObject {
string toString() {
result = this.(ObjectInternal).toString()
}
ControlFlowNode getAReferent() {
PointsTo2::points_to(result, _, this, _)
}
predicate pointsTo(ControlFlowNode referent, PointsToContext2 context, ControlFlowNode origin) {
PointsTo2::points_to(referent, context, this, origin)
}
Value getClass() {
result = this.(ObjectInternal).getClass()
}
}

View File

@@ -74,6 +74,10 @@ class ObjectInternal extends TObject {
exists(this.getBuiltin())
}
abstract predicate attribute(string name, ObjectInternal value, CfgOrigin origin);
abstract predicate attributesUnknown();
}
@@ -84,7 +88,7 @@ class BuiltinOpaqueObjectInternal extends ObjectInternal, TBuiltinOpaqueObject {
}
override string toString() {
none()
result = this.getBuiltin().toString()
}
override boolean booleanValue() {
@@ -134,6 +138,13 @@ class BuiltinOpaqueObjectInternal extends ObjectInternal, TBuiltinOpaqueObject {
none()
}
override predicate attribute(string name, ObjectInternal value, CfgOrigin origin) {
value = ObjectInternal::fromBuiltin(this.getBuiltin().getMember(name)) and
origin = CfgOrigin::unknown()
}
override predicate attributesUnknown() { none() }
}
@@ -190,6 +201,12 @@ class UnknownInternal extends ObjectInternal, TUnknown {
none()
}
override predicate attribute(string name, ObjectInternal value, CfgOrigin origin) {
none()
}
override predicate attributesUnknown() { any() }
}
class UndefinedInternal extends ObjectInternal, TUndefined {
@@ -247,6 +264,12 @@ class UndefinedInternal extends ObjectInternal, TUndefined {
none()
}
override predicate attribute(string name, ObjectInternal value, CfgOrigin origin) {
none()
}
override predicate attributesUnknown() { none() }
}
module ObjectInternal {
@@ -290,5 +313,18 @@ module ObjectInternal {
result = TInt(n)
}
ObjectInternal fromBuiltin(Builtin b) {
result = TInt(b.intValue())
or
result = TString(b.strValue())
or
result = TBuiltinClassObject(b)
or
result = TBuiltinFunctionObject(b)
or
result = TBuiltinOpaqueObject(b)
or
result = TBuiltinModuleObject(b)
}
}

View File

@@ -18,6 +18,8 @@ newtype TObject =
TBuiltinOpaqueObject(Builtin bltn) {
not bltn.isClass() and not bltn.isFunction() and
not bltn.isMethod() and not bltn.isModule() and
not exists(bltn.intValue()) and
not exists(bltn.strValue()) and
not py_special_objects(bltn, _)
}
or
@@ -56,6 +58,8 @@ newtype TObject =
exists(UnaryExpr neg | neg.getOp() instanceof USub and neg.getOperand() = num)
and n = -num.getN().toInt()
)
or
n = any(Builtin b).intValue()
}
or
TString(string s) {
@@ -65,14 +69,12 @@ newtype TObject =
)
or
// Any string from the library put in the DB by the extractor.
exists(string quoted_string, Builtin bltn |
quoted_string = bltn.getName() and
s = quoted_string.regexpCapture("[bu]'([\\s\\S]*)'", 1)
)
or s = "__main__"
s = any(Builtin b).strValue()
or
s = "__main__"
}
or
TInstance(CallNode instantiation, ClassObjectInternal cls, PointsToContext2 context) {
TSpecificInstance(CallNode instantiation, ClassObjectInternal cls, PointsToContext2 context) {
PointsTo2::points_to(instantiation.getFunction(), context, cls, _) and
cls.isSpecial() = false
}
@@ -84,6 +86,8 @@ newtype TObject =
self.getClass().(ClassObjectInternal).attribute(name, function, _)
)
}
or
TUnknownInstance(ClassObjectInternal cls) { cls != TUnknownClass() }
private predicate is_power_2(int n) {
n = 1 or

View File

@@ -104,6 +104,8 @@ module PointsTo2 {
predicate points_to_candidate(ControlFlowNode f, PointsToContext2 context, ObjectInternal value, ControlFlowNode origin) {
use_points_to(f, context, value, origin)
or
attribute_load_points_to(f, context, value, origin)
or
subscript_points_to(f, context, value, origin)
or
binary_expr_points_to(f, context, value, origin)
@@ -242,6 +244,23 @@ module PointsTo2 {
result.getSourceVariable() instanceof GlobalVariable
}
/** Holds if `f` is an attribute `x.attr` and points to `(value, cls, origin)`. */
private predicate attribute_load_points_to(AttrNode f, PointsToContext2 context, ObjectInternal value, ControlFlowNode origin) {
exists(ObjectInternal object, string name, CfgOrigin orig |
points_to(f.getObject(name), context, object, _) |
object.attribute(name, value, orig) and
origin = orig.fix(f)
or
object.attributesUnknown() and origin = f and value = ObjectInternal::unknown()
)
// TO DO -- Support CustomPointsToAttribute
//or
//exists(CustomPointsToAttribute object, string name |
// points_to(f.getObject(name), context, object, _, _) and
// object.attributePointsTo(name, value, cls, origin)
//)
}
/** Holds if the ESSA definition `def` refers to `(value, origin)` given the context `context`. */
predicate ssa_definition_points_to(EssaDefinition def, PointsToContext2 context, ObjectInternal value, CfgOrigin origin) {
ssa_phi_points_to(def, context, value, origin)
@@ -514,7 +533,7 @@ module InterModulePointsTo {
//)
//or
(mod.getSourceModule() != f.getEnclosingModule() or mod.isBuiltin()) and
module_attribute_points_to(mod, name, value, orig)
mod.attribute(name, value, origin)
)
or
exists(EssaVariable var, CfgOrigin orig |
@@ -551,60 +570,6 @@ module InterModulePointsTo {
)
}
/** Holds if `mod.name` points to `(value, origin)`, where `mod` is a module object. */
predicate module_attribute_points_to(ModuleObjectInternal mod, string name, ObjectInternal value, CfgOrigin origin) {
py_module_attributes(mod.getSourceModule(), name, value, origin)
or
package_attribute_points_to(mod, name, value, origin)
or
value.getBuiltin() = mod.getBuiltin().getMember(name) and
origin = CfgOrigin::unknown()
}
/** Holds if `m.name` points to `(value, origin)`, where `m` is a (source) module. */
cached predicate py_module_attributes(Module m, string name, ObjectInternal obj, CfgOrigin origin) {
exists(EssaVariable var, ControlFlowNode exit, PointsToContext2 imp |
exit = m.getANormalExit() and var.getAUse() = exit and
var.getSourceVariable().getName() = name and
PointsTo2::ssa_variable_points_to(var, imp, obj, origin) and
imp.isImport() and
obj != ObjectInternal::undefined()
)
// TO DO, dollar variable...
//or
//not exists(EssaVariable var | var.getAUse() = m.getANormalExit() and var.getSourceVariable().getName() = name) and
//exists(EssaVariable var, PointsToContext2 imp |
// var.getAUse() = m.getANormalExit() and isModuleStateVariable(var) |
// PointsTo2::ssa_variable_named_attribute_points_to(var, imp, name, obj, origin) and
// imp.isImport() and obj != ObjectInternal::undefined()
//)
}
/** Holds if `package.name` points to `(value, origin)`, where `package` is a package object. */
cached predicate package_attribute_points_to(PackageObjectInternal package, string name, ObjectInternal value, CfgOrigin origin) {
py_module_attributes(package.getInitModule().getSourceModule(), name, value, origin)
or
// TO DO
//exists(Module init |
// init = package.getInitModule().getModule() and
// not exists(EssaVariable var | var.getAUse() = init.getANormalExit() and var.getSourceVariable().getName() = name) and
// exists(EssaVariable var, Context context |
// isModuleStateVariable(var) and var.getAUse() = init.getANormalExit() and
// context.isImport() and
// SSA::ssa_variable_named_attribute_points_to(var, context, name, undefinedVariable(), _, _) and
// origin = value and
// value = package.submodule(name)
// )
//)
//or
package.hasNoInitModule() and
exists(ModuleObjectInternal mod |
mod = package.submodule(name) and
value = mod |
origin = CfgOrigin::fromModule(mod)
)
}
/** Implicit "definition" of the names of submodules at the start of an `__init__.py` file.
*
* PointsTo isn't exactly how the interpreter works, but is the best approximation we can manage statically.
@@ -761,15 +726,6 @@ private predicate potential_builtin_points_to(NameNode f, ObjectInternal value,
)
}
/** Get the ESSA pseudo-variable used to retain module state
* during module initialization. Module attributes are handled
* as attributes of this variable, allowing the SSA form to track
* mutations of the module during its creation.
*/
private predicate isModuleStateVariable(EssaVariable var) {
var.getName() = "$" and var.getScope() instanceof Module
}
module Conditionals {
/** Holds if `expr` is the operand of a unary `not` expression. */
@@ -920,7 +876,7 @@ module Types {
}
predicate declaredAttribute(ClassObjectInternal cls, string name, ObjectInternal value, CfgOrigin origin) {
value.getBuiltin() = cls.getBuiltin().getMember(name) and origin = CfgOrigin::unknown()
value = ObjectInternal::fromBuiltin(cls.getBuiltin().getMember(name)) and origin = CfgOrigin::unknown()
or
value != ObjectInternal::undefined() and
exists(EssaVariable var |
@@ -944,12 +900,6 @@ module Types {
obj = ObjectInternal::unknown() and result = ObjectInternal::unknownClass()
)
or
exists(Builtin meta |
result.getBuiltin() = meta and
meta = cls.getBuiltin().getClass() and
meta.inheritsFromType()
)
or
exists(ControlFlowNode meta |
six_add_metaclass(_, cls, meta) and
PointsTo2::points_to(meta, _, result, _)

View File

@@ -81,6 +81,22 @@ class Builtin extends @py_cobject {
this.getClass().getName() = "wrapper_descriptor"
}
int intValue() {
(this.getClass() = Builtin::special("int") or
this.getClass() = Builtin::special("long")) and
result = this.getName().toInt()
}
string strValue() {
(this.getClass() = Builtin::special("unicode") or
this.getClass() = Builtin::special("bytes")) and
exists(string quoted_string |
quoted_string = this.getName()
and
result = quoted_string.regexpCapture("[bu]'([\\s\\S]*)'", 1)
)
}
}
module Builtin {