Files
codeql/python/ql/lib/semmle/python/objects/TObject.qll
2022-09-07 07:39:16 +02:00

526 lines
18 KiB
Plaintext

/** Contains the internal algebraic datatype backing the various values tracked by the points-to implementation. */
import python
private import semmle.python.types.Builtins
private import semmle.python.objects.ObjectInternal
private import semmle.python.pointsto.PointsTo
private import semmle.python.pointsto.PointsToContext
private import semmle.python.internal.CachedStages
/**
* Internal type backing `ObjectInternal` and `Value`
* See `ObjectInternal.qll` for an explanation of the API.
*/
cached
newtype TObject =
/** Builtin class objects */
TBuiltinClassObject(Builtin bltn) {
bltn.isClass() and
not bltn = Builtin::unknownType() and
not bltn = Builtin::special("type")
} or
/** Builtin function objects (module members) */
TBuiltinFunctionObject(Builtin bltn) { bltn.isFunction() } or
/** Builtin method objects (class members) */
TBuiltinMethodObject(Builtin bltn) { bltn.isMethod() } or
/** Builtin module objects */
TBuiltinModuleObject(Builtin bltn) { bltn.isModule() } or
/** Other builtin objects from the interpreter */
TBuiltinOpaqueObject(Builtin bltn) {
not bltn.isClass() and
not bltn.isFunction() and
not bltn.isMethod() and
not bltn.isModule() and
not bltn.getClass() = Builtin::special("tuple") and
not exists(bltn.intValue()) and
not exists(bltn.floatValue()) and
not exists(bltn.strValue()) and
not py_special_objects(bltn, _)
} or
/** Python function objects (including lambdas) */
TPythonFunctionObject(ControlFlowNode callable) { callable.getNode() instanceof CallableExpr } or
/** Python class objects */
TPythonClassObject(ControlFlowNode classexpr) { classexpr.getNode() instanceof ClassExpr } or
/** Package objects */
TPackageObject(Folder f) { isPreferredModuleForName(f, _) } or
/** Python module objects */
TPythonModule(Module m) {
not m.isPackage() and
isPreferredModuleForName(m.getFile(), _) and
not exists(SyntaxError se | se.getFile() = m.getFile())
} or
/** `True` */
TTrue() or
/** `False` */
TFalse() or
/** `None` */
TNone() or
/** Represents any value about which nothing useful is known */
TUnknown() or
/** Represents any value known to be a class, but not known to be any specific class */
TUnknownClass() or
/** Represents the absence of a value. Used by points-to for tracking undefined variables */
TUndefined() or
/** The integer `n` */
TInt(int n) {
// Powers of 2 are used for flags
is_power_2(n)
or
// And all combinations of flags up to 2^8
n in [0 .. 511]
or
// Any number explicitly mentioned in the source code.
exists(IntegerLiteral num |
n = num.getValue()
or
exists(UnaryExpr neg | neg.getOp() instanceof USub and neg.getOperand() = num) and
n = -num.getN().toInt()
)
or
n = any(Builtin b).intValue()
} or
/** The float `f` */
TFloat(float f) { f = any(FloatLiteral num).getValue() } or
/** The unicode string `s` */
TUnicode(string s) {
// Any string explicitly mentioned in the source code.
exists(StrConst str |
s = str.getText() and
str.isUnicode()
)
or
// Any string from the library put in the DB by the extractor.
exists(Builtin b |
s = b.strValue() and
b.getClass() = Builtin::special("unicode")
)
or
s = "__main__"
} or
/** The byte string `s` */
TBytes(string s) {
// Any string explicitly mentioned in the source code.
exists(StrConst str |
s = str.getText() and
not str.isUnicode()
)
or
// Any string from the library put in the DB by the extractor.
exists(Builtin b |
s = b.strValue() and
b.getClass() = Builtin::special("bytes")
)
or
s = "__main__"
} or
/** An instance of `cls`, instantiated at `instantiation` given the `context`. */
TSpecificInstance(ControlFlowNode instantiation, ClassObjectInternal cls, PointsToContext context) {
PointsToInternal::pointsTo(instantiation.(CallNode).getFunction(), context, cls, _) and
cls.isSpecial() = false
or
literal_instantiation(instantiation, cls, context)
} or
/** A non-specific instance `cls` which enters the scope at `def` given the callee `context`. */
TSelfInstance(ParameterDefinition def, PointsToContext context, PythonClassObjectInternal cls) {
self_parameter(def, context, cls)
} or
/** A bound method */
TBoundMethod(ObjectInternal self, CallableObjectInternal function) {
any(ObjectInternal obj).binds(self, _, function) and
function.isDescriptor() = true
} or
/** Represents any value whose class is known, but nothing else */
TUnknownInstance(BuiltinClassObjectInternal cls) {
cls != ObjectInternal::superType() and
cls != ObjectInternal::builtin("bool") and
cls != ObjectInternal::noneType()
} or
/** Represents an instance of `super` */
TSuperInstance(ObjectInternal self, ClassObjectInternal startclass) {
super_instantiation(_, self, startclass, _)
} or
/** Represents an instance of `classmethod` */
TClassMethod(CallNode instantiation, CallableObjectInternal function) {
class_method(instantiation, function, _)
} or
/** Represents an instance of `staticmethod` */
TStaticMethod(CallNode instantiation, CallableObjectInternal function) {
static_method(instantiation, function, _)
} or
/** Represents a builtin tuple */
TBuiltinTuple(Builtin bltn) { bltn.getClass() = Builtin::special("tuple") } or
/** Represents a tuple in the Python source */
TPythonTuple(TupleNode origin, PointsToContext context) {
exists(Scope s |
context.appliesToScope(s) and
scope_loads_tuplenode(s, origin)
)
} or
/** Varargs tuple */
TVarargsTuple(CallNode call, PointsToContext context, int offset, int length) {
InterProceduralPointsTo::varargs_tuple(call, context, _, _, offset, length)
} or
/** `type` */
TType() or
/** Represents an instance of `property` */
TProperty(CallNode call, Context ctx, CallableObjectInternal getter) {
PointsToInternal::pointsTo(call.getFunction(), ctx, ObjectInternal::property(), _) and
PointsToInternal::pointsTo(call.getArg(0), ctx, getter, _)
} or
/** Represents the `setter` or `deleter` method of a property object. */
TPropertySetterOrDeleter(PropertyInternal property, string method) {
exists(AttrNode attr | PointsToInternal::pointsTo(attr.getObject(method), _, property, _)) and
(method = "setter" or method = "deleter")
} or
/** Represents a dynamically created class */
TDynamicClass(CallNode instantiation, ClassObjectInternal metacls, PointsToContext context) {
PointsToInternal::pointsTo(instantiation.getFunction(), context, metacls, _) and
not count(instantiation.getAnArg()) = 1 and
Types::getMro(metacls).contains(TType())
} or
/** Represents `sys.version_info`. Acts like a tuple with a range of values depending on the version being analyzed. */
TSysVersionInfo() or
/** Represents a module that is inferred to perhaps exist, but is not present in the database. */
TAbsentModule(string name) { missing_imported_module(_, _, name) } or
/** Represents an attribute of a module that is inferred to perhaps exist, but is not present in the database. */
TAbsentModuleAttribute(AbsentModuleObjectInternal mod, string attrname) {
(
PointsToInternal::pointsTo(any(AttrNode attr).getObject(attrname), _, mod, _)
or
PointsToInternal::pointsTo(any(ImportMemberNode imp).getModule(attrname), _, mod, _)
) and
exists(string modname |
modname = mod.getName() and
not common_module_name(modname + "." + attrname)
)
} or
/** Opaque object representing the result of calling a decorator on a function that we don't understand */
TDecoratedFunction(CallNode call) { call.isFunctionDecoratorCall() } or
/** Represents a subscript operation applied to a type. For type-hint analysis */
TSubscriptedType(ObjectInternal generic, ObjectInternal index) {
isType(generic) and
generic.isNotSubscriptedType() and
index.isNotSubscriptedType() and
Expressions::subscriptPartsPointsTo(_, _, generic, index)
}
/** Join-order helper for TPythonTuple */
pragma[nomagic]
private predicate scope_loads_tuplenode(Scope s, TupleNode origin) {
origin.isLoad() and
origin.getScope() = s
}
/** Holds if the object `t` is a type. */
predicate isType(ObjectInternal t) {
t.isClass() = true
or
t.getOrigin().getEnclosingModule().getName().matches("%typing")
}
private predicate is_power_2(int n) {
n = 1
or
exists(int half | is_power_2(half) and n = half * 2)
}
predicate static_method(
CallNode instantiation, CallableObjectInternal function, PointsToContext context
) {
PointsToInternal::pointsTo(instantiation.getFunction(), context,
ObjectInternal::builtin("staticmethod"), _) and
PointsToInternal::pointsTo(instantiation.getArg(0), context, function, _)
}
predicate class_method(
CallNode instantiation, CallableObjectInternal function, PointsToContext context
) {
PointsToInternal::pointsTo(instantiation.getFunction(), context, ObjectInternal::classMethod(), _) and
PointsToInternal::pointsTo(instantiation.getArg(0), context, function, _)
}
/**
* Holds if the literal corresponding to the control flow node `n` has class `cls`.
*
* Helper predicate for `literal_instantiation`. Prevents a bad join with
* `PointsToContext::appliesTo` from occurring.
*/
pragma[nomagic]
private predicate literal_node_class(ControlFlowNode n, ClassObjectInternal cls) {
n instanceof ListNode and cls = ObjectInternal::builtin("list")
or
n instanceof DictNode and cls = ObjectInternal::builtin("dict")
or
n instanceof SetNode and cls = ObjectInternal::builtin("set")
or
n.getNode() instanceof ImaginaryLiteral and cls = ObjectInternal::builtin("complex")
or
n.getNode() instanceof ListComp and cls = ObjectInternal::builtin("list")
or
n.getNode() instanceof SetComp and cls = ObjectInternal::builtin("set")
or
n.getNode() instanceof DictComp and cls = ObjectInternal::builtin("dict")
}
predicate literal_instantiation(ControlFlowNode n, ClassObjectInternal cls, PointsToContext context) {
context.appliesTo(n) and
literal_node_class(n, cls)
}
predicate super_instantiation(
CallNode instantiation, ObjectInternal self, ClassObjectInternal startclass,
PointsToContext context
) {
super_2args(instantiation, self, startclass, context)
or
super_noargs(instantiation, self, startclass, context)
}
pragma[noinline]
private predicate super_2args(
CallNode instantiation, ObjectInternal self, ClassObjectInternal startclass,
PointsToContext context
) {
exists(ControlFlowNode arg0, ControlFlowNode arg1 |
super_call2(instantiation, arg0, arg1, context) and
PointsToInternal::pointsTo(arg0, context, startclass, _) and
PointsToInternal::pointsTo(arg1, context, self, _)
)
}
pragma[noinline]
private predicate super_call2(
CallNode call, ControlFlowNode arg0, ControlFlowNode arg1, PointsToContext context
) {
exists(ControlFlowNode func |
call2(call, func, arg0, arg1) and
PointsToInternal::pointsTo(func, context, ObjectInternal::superType(), _)
)
}
pragma[noinline]
private predicate super_noargs(
CallNode instantiation, ObjectInternal self, ClassObjectInternal startclass,
PointsToContext context
) {
PointsToInternal::pointsTo(instantiation.getFunction(), context, ObjectInternal::builtin("super"),
_) and
not exists(instantiation.getArg(0)) and
exists(Function func |
instantiation.getScope() = func and
/* Implicit class argument is lexically enclosing scope */
func.getScope() = startclass.(PythonClassObjectInternal).getScope() and
/* Implicit 'self' is the `self` parameter of the enclosing function */
self.(SelfInstanceInternal).getParameter().getParameter() = func.getArg(0)
)
}
predicate call2(CallNode call, ControlFlowNode func, ControlFlowNode arg0, ControlFlowNode arg1) {
not exists(call.getArg(2)) and
func = call.getFunction() and
arg0 = call.getArg(0) and
arg1 = call.getArg(1)
}
predicate call3(
CallNode call, ControlFlowNode func, ControlFlowNode arg0, ControlFlowNode arg1,
ControlFlowNode arg2
) {
not exists(call.getArg(3)) and
func = call.getFunction() and
arg0 = call.getArg(0) and
arg1 = call.getArg(1) and
arg2 = call.getArg(2)
}
bindingset[self, function]
deprecated predicate method_binding(
AttrNode instantiation, ObjectInternal self, CallableObjectInternal function,
PointsToContext context
) {
exists(ObjectInternal obj, string name | receiver(instantiation, context, obj, name) |
exists(ObjectInternal cls |
cls = obj.getClass() and
cls != ObjectInternal::superType() and
cls.attribute(name, function, _) and
self = obj
)
or
exists(SuperInstance sup, ClassObjectInternal decl |
sup = obj and
decl = Types::getMro(self.getClass()).startingAt(sup.getStartClass()).findDeclaringClass(name) and
Types::declaredAttribute(decl, name, function, _) and
self = sup.getSelf()
)
)
}
/** Helper for method_binding */
pragma[noinline]
deprecated predicate receiver(
AttrNode instantiation, PointsToContext context, ObjectInternal obj, string name
) {
PointsToInternal::pointsTo(instantiation.getObject(name), context, obj, _)
}
/** Helper self parameters: `def meth(self, ...): ...`. */
pragma[noinline]
private predicate self_parameter(
ParameterDefinition def, PointsToContext context, PythonClassObjectInternal cls
) {
def.isSelf() and
/* Exclude the special parameter name `.0` which is used for unfolded comprehensions. */
def.getName() != ".0" and
exists(Function scope |
def.getScope() = scope and
context.isRuntime() and
context.appliesToScope(scope) and
scope.getScope() = cls.getScope() and
concrete_class(cls) and
/*
* We want to allow decorated functions, otherwise we lose a lot of useful information.
* However, we want to exclude any function whose arguments are permuted by the decorator.
* In general we can't do that, but we can special case the most common ones.
*/
neither_class_nor_static_method(scope)
)
}
cached
private predicate concrete_class(PythonClassObjectInternal cls) {
cls.getClass() != abcMetaClassObject()
or
exists(Class c |
c = cls.getScope() and
not exists(c.getMetaClass())
|
forall(Function f | f.getScope() = c |
not exists(Raise r, Name ex |
r.getScope() = f and
(r.getException() = ex or r.getException().(Call).getFunc() = ex) and
ex.getId() = ["NotImplementedError", "NotImplemented"]
)
)
)
}
private PythonClassObjectInternal abcMetaClassObject() {
/* Avoid using points-to and thus negative recursion */
exists(Class abcmeta | result.getScope() = abcmeta |
abcmeta.getName() = "ABCMeta" and
abcmeta.getScope().getName() = "abc"
)
}
private predicate neither_class_nor_static_method(Function f) {
not exists(f.getADecorator())
or
exists(ControlFlowNode deco | deco = f.getADecorator().getAFlowNode() |
exists(ObjectInternal o | PointsToInternal::pointsTo(deco, _, o, _) |
o != ObjectInternal::staticMethod() and
o != ObjectInternal::classMethod()
)
or
not deco instanceof NameNode
)
}
predicate missing_imported_module(ControlFlowNode imp, Context ctx, string name) {
ctx.isImport() and
imp.(ImportExprNode).getNode().getAnImportedModuleName() = name and
(
not exists(Module m | m.getName() = name) and
not exists(Builtin b | b.isModule() and b.getName() = name)
or
exists(Module m, SyntaxError se |
m.getName() = name and
se.getFile() = m.getFile()
)
)
or
exists(AbsentModuleObjectInternal mod |
PointsToInternal::pointsTo(imp.(ImportMemberNode).getModule(name), ctx, mod, _) and
common_module_name(mod.getName() + "." + name)
)
}
/**
* Helper for missing modules to determine if name `x.y` is a module `x.y` or
* an attribute `y` of module `x`. This list should be added to as required.
*/
predicate common_module_name(string name) { name = ["zope.interface", "six.moves"] }
/**
* A declaration of a class, either a built-in class or a source definition
* This acts as a helper for ClassObjectInternal allowing some lookup without
* recursion.
*/
class ClassDecl extends @py_object {
ClassDecl() {
this.(Builtin).isClass() and not this = Builtin::unknownType()
or
this.(ControlFlowNode).getNode() instanceof ClassExpr
}
/** Gets a textual representation of this element. */
string toString() { result = "ClassDecl" }
/** Gets the class scope for Python class declarations */
Class getClass() { result = this.(ControlFlowNode).getNode().(ClassExpr).getInnerScope() }
/** Holds if this class declares the attribute `name` */
predicate declaresAttribute(string name) {
exists(this.(Builtin).getMember(name))
or
exists(SsaVariable var |
name = var.getId() and var.getAUse() = this.getClass().getANormalExit()
)
}
/** Gets the name of this class */
string getName() {
result = this.(Builtin).getName()
or
result = this.getClass().getName()
}
/**
* Whether this is a class whose instances must be treated specially, rather than as generic instances.
*/
predicate isSpecial() {
exists(string name | this = Builtin::special(name) |
name =
[
"type", "super", "bool", "NoneType", "tuple", "property", "ClassMethod", "StaticMethod",
"MethodType", "ModuleType"
]
)
}
/** Holds if for class `C`, `C()` returns an instance of `C` */
predicate callReturnsInstance() {
exists(Class pycls | pycls = this.getClass() |
/* Django does this, so we need to account for it */
not exists(Function init, LocalVariable self |
/* `self.__class__ = ...` in the `__init__` method */
pycls.getInitMethod() = init and
self.isSelf() and
self.getScope() = init and
exists(AttrNode a | a.isStore() and a.getObject("__class__") = self.getAUse())
) and
not exists(Function new | new.getName() = "__new__" and new.getScope() = pycls)
)
or
this instanceof Builtin
}
/** Holds if this class is the abstract base class */
predicate isAbstractBaseClass(string name) {
exists(Module m | m.getName() = ["_abcoll", "_collections_abc"] |
this.getClass().getScope() = m and
this.getName() = name
)
}
}