mirror of
https://github.com/github/codeql.git
synced 2026-05-01 03:35:13 +02:00
Python: Autoformat pointsto.
This commit is contained in:
@@ -4,22 +4,24 @@
|
||||
* to the possible objects it points-to the inferred types of those objects and the 'origin'
|
||||
* of those objects. The 'origin' is the point in source code that the object can be traced
|
||||
* back to.
|
||||
*
|
||||
*
|
||||
* This file contains non-layered parts of the points-to analysis.
|
||||
*/
|
||||
|
||||
import python
|
||||
import semmle.python.essa.SsaDefinitions
|
||||
private import semmle.python.types.Builtins
|
||||
|
||||
module BasePointsTo {
|
||||
/** INTERNAL -- Use n.refersTo(value, _, origin) instead */
|
||||
pragma [noinline]
|
||||
pragma[noinline]
|
||||
predicate points_to(ControlFlowNode f, Object value, ControlFlowNode origin) {
|
||||
(
|
||||
(
|
||||
f.isLiteral() and value = f and not f.getNode() instanceof ImmutableLiteral
|
||||
or
|
||||
f.isFunction() and value = f
|
||||
) and origin = f
|
||||
) and
|
||||
origin = f
|
||||
}
|
||||
}
|
||||
|
||||
@@ -35,12 +37,13 @@ predicate varargs_points_to(ControlFlowNode f, ClassObject cls) {
|
||||
cls = theTupleType()
|
||||
}
|
||||
|
||||
/** Gets the class of the object for simple cases, namely constants, functions,
|
||||
/**
|
||||
* Gets the class of the object for simple cases, namely constants, functions,
|
||||
* comprehensions and built-in objects.
|
||||
*
|
||||
* This exists primarily for internal use. Use getAnInferredType() instead.
|
||||
*/
|
||||
pragma [noinline]
|
||||
pragma[noinline]
|
||||
ClassObject simple_types(Object obj) {
|
||||
result = comprehension(obj.getOrigin())
|
||||
or
|
||||
@@ -79,37 +82,42 @@ private int tuple_index_value(Object t, int i) {
|
||||
result = t.(TupleNode).getElement(i).getNode().(Num).getN().toInt()
|
||||
or
|
||||
exists(Object item |
|
||||
py_citems(t, i, item) and
|
||||
py_citems(t, i, item) and
|
||||
result = item.(NumericObject).intValue()
|
||||
)
|
||||
}
|
||||
|
||||
pragma [noinline]
|
||||
pragma[noinline]
|
||||
int version_tuple_value(Object t) {
|
||||
not exists(tuple_index_value(t, 1)) and result = tuple_index_value(t, 0)*10
|
||||
not exists(tuple_index_value(t, 1)) and result = tuple_index_value(t, 0) * 10
|
||||
or
|
||||
not exists(tuple_index_value(t, 2)) and result = tuple_index_value(t, 0)*10 + tuple_index_value(t, 1)
|
||||
not exists(tuple_index_value(t, 2)) and
|
||||
result = tuple_index_value(t, 0) * 10 + tuple_index_value(t, 1)
|
||||
or
|
||||
tuple_index_value(t, 2) = 0 and result = tuple_index_value(t, 0)*10 + tuple_index_value(t, 1)
|
||||
tuple_index_value(t, 2) = 0 and result = tuple_index_value(t, 0) * 10 + tuple_index_value(t, 1)
|
||||
or
|
||||
tuple_index_value(t, 2) > 0 and result = tuple_index_value(t, 0)*10 + tuple_index_value(t, 1) + 1
|
||||
tuple_index_value(t, 2) > 0 and
|
||||
result = tuple_index_value(t, 0) * 10 + tuple_index_value(t, 1) + 1
|
||||
}
|
||||
|
||||
/** Choose a version numbers that represent the extreme of supported versions. */
|
||||
private int major_minor() {
|
||||
if major_version() = 3 then
|
||||
(result = 33 or result = 37) // 3.3 to 3.7
|
||||
else
|
||||
(result = 25 or result = 27) // 2.5 to 2.7
|
||||
if major_version() = 3
|
||||
then (
|
||||
result = 33 or result = 37
|
||||
) else (
|
||||
// 3.3 to 3.7
|
||||
result = 25 or result = 27
|
||||
) // 2.5 to 2.7
|
||||
}
|
||||
|
||||
/** Compares the given tuple object to both the maximum and minimum possible sys.version_info values */
|
||||
int version_tuple_compare(Object t) {
|
||||
version_tuple_value(t) < major_minor() and result = -1
|
||||
version_tuple_value(t) < major_minor() and result = -1
|
||||
or
|
||||
version_tuple_value(t) = major_minor() and result = 0
|
||||
version_tuple_value(t) = major_minor() and result = 0
|
||||
or
|
||||
version_tuple_value(t) > major_minor() and result = 1
|
||||
version_tuple_value(t) > major_minor() and result = 1
|
||||
}
|
||||
|
||||
/* Holds if `cls` is a new-style class if it were to have no explicit base classes */
|
||||
@@ -121,14 +129,15 @@ predicate baseless_is_new_style(ClassObject cls) {
|
||||
exists(cls.declaredMetaClass())
|
||||
}
|
||||
|
||||
/* The following predicates exist in order to provide
|
||||
/*
|
||||
* The following predicates exist in order to provide
|
||||
* more precise type information than the underlying
|
||||
* database relations. This help to optimise the points-to
|
||||
* analysis.
|
||||
*/
|
||||
|
||||
/** Holds if this class (not on a super-class) declares name */
|
||||
pragma [noinline]
|
||||
pragma[noinline]
|
||||
predicate class_declares_attribute(ClassObject cls, string name) {
|
||||
exists(Class defn |
|
||||
defn = cls.getPyClass() and
|
||||
@@ -136,7 +145,7 @@ predicate class_declares_attribute(ClassObject cls, string name) {
|
||||
)
|
||||
or
|
||||
exists(Builtin o |
|
||||
o = cls.asBuiltin().getMember(name) and
|
||||
o = cls.asBuiltin().getMember(name) and
|
||||
not exists(Builtin sup |
|
||||
sup = cls.asBuiltin().getBaseClass() and
|
||||
o = sup.getMember(name)
|
||||
@@ -151,9 +160,9 @@ private predicate class_defines_name(Class cls, string name) {
|
||||
|
||||
/** Gets a return value CFG node, provided that is safe to track across returns */
|
||||
ControlFlowNode safe_return_node(PyFunctionObject func) {
|
||||
result = func.getAReturnedNode()
|
||||
result = func.getAReturnedNode() and
|
||||
// Not a parameter
|
||||
and not exists(Parameter p, SsaVariable pvar |
|
||||
not exists(Parameter p, SsaVariable pvar |
|
||||
p.asName().getAFlowNode() = pvar.getDefinition() and
|
||||
result = pvar.getAUse()
|
||||
) and
|
||||
@@ -163,9 +172,11 @@ ControlFlowNode safe_return_node(PyFunctionObject func) {
|
||||
|
||||
/** Holds if it can be determined from the control flow graph alone that this function can never return */
|
||||
predicate function_can_never_return(FunctionObject func) {
|
||||
/* A Python function never returns if it has no normal exits that are not dominated by a
|
||||
/*
|
||||
* A Python function never returns if it has no normal exits that are not dominated by a
|
||||
* call to a function which itself never returns.
|
||||
*/
|
||||
|
||||
exists(Function f |
|
||||
f = func.getFunction() and
|
||||
not exists(f.getAnExitNode())
|
||||
@@ -174,27 +185,21 @@ predicate function_can_never_return(FunctionObject func) {
|
||||
func = ModuleObject::named("sys").attr("exit")
|
||||
}
|
||||
|
||||
|
||||
private newtype TIterationDefinition =
|
||||
private newtype TIterationDefinition =
|
||||
TIterationDefinition_(SsaSourceVariable var, ControlFlowNode def, ControlFlowNode sequence) {
|
||||
SsaSource::iteration_defined_variable(var, def, sequence)
|
||||
}
|
||||
|
||||
/** DEPRECATED. For backwards compatibility only.
|
||||
* A definition of a variable in a for loop `for v in ...:` */
|
||||
/**
|
||||
* DEPRECATED. For backwards compatibility only.
|
||||
* A definition of a variable in a for loop `for v in ...:`
|
||||
*/
|
||||
deprecated class IterationDefinition extends TIterationDefinition {
|
||||
string toString() { result = "IterationDefinition" }
|
||||
|
||||
string toString() {
|
||||
result = "IterationDefinition"
|
||||
}
|
||||
|
||||
ControlFlowNode getSequence() {
|
||||
this = TIterationDefinition_(_, _, result)
|
||||
}
|
||||
|
||||
ControlFlowNode getSequence() { this = TIterationDefinition_(_, _, result) }
|
||||
}
|
||||
|
||||
|
||||
/** Hold if outer contains inner, both are contained within a test and inner is a use is a plain use or an attribute lookup */
|
||||
pragma[noinline]
|
||||
predicate contains_interesting_expression_within_test(ControlFlowNode outer, ControlFlowNode inner) {
|
||||
@@ -202,7 +207,8 @@ predicate contains_interesting_expression_within_test(ControlFlowNode outer, Con
|
||||
exists(ControlFlowNode test |
|
||||
outer.getAChild*() = inner and
|
||||
test_contains(test, outer) and
|
||||
test_contains(test, inner) |
|
||||
test_contains(test, inner)
|
||||
|
|
||||
inner instanceof NameNode or
|
||||
inner instanceof AttrNode
|
||||
)
|
||||
@@ -216,10 +222,14 @@ predicate test_contains(ControlFlowNode expr, ControlFlowNode use) {
|
||||
}
|
||||
|
||||
/** Holds if `test` is a test (a branch), `use` is within that test and `def` is an edge from that test with `sense` */
|
||||
predicate refinement_test(ControlFlowNode test, ControlFlowNode use, boolean sense, PyEdgeRefinement def) {
|
||||
/* Because calls such as `len` may create a new variable, we need to go via the source variable
|
||||
predicate refinement_test(
|
||||
ControlFlowNode test, ControlFlowNode use, boolean sense, PyEdgeRefinement def
|
||||
) {
|
||||
/*
|
||||
* Because calls such as `len` may create a new variable, we need to go via the source variable
|
||||
* That is perfectly safe as we are only dealing with calls that do not mutate their arguments.
|
||||
*/
|
||||
|
||||
use = def.getInput().getSourceVariable().(Variable).getAUse() and
|
||||
test = def.getPredecessor().getLastNode() and
|
||||
test_contains(test, use) and
|
||||
@@ -227,11 +237,12 @@ predicate refinement_test(ControlFlowNode test, ControlFlowNode use, boolean sen
|
||||
}
|
||||
|
||||
/** Holds if `f` is an import of the form `from .[...] import name` and the enclosing scope is an __init__ module */
|
||||
pragma [noinline]
|
||||
pragma[noinline]
|
||||
predicate live_import_from_dot_in_init(ImportMemberNode f, EssaVariable var) {
|
||||
exists(string name |
|
||||
import_from_dot_in_init(f.getModule(name)) and
|
||||
var.getSourceVariable().getName() = name and var.getAUse() = f
|
||||
var.getSourceVariable().getName() = name and
|
||||
var.getAUse() = f
|
||||
)
|
||||
}
|
||||
|
||||
@@ -247,23 +258,23 @@ predicate import_from_dot_in_init(ImportExprNode f) {
|
||||
}
|
||||
|
||||
/** Gets the pseudo-object representing the value referred to by an undefined variable */
|
||||
Object undefinedVariable() {
|
||||
py_special_objects(result, "_semmle_undefined_value")
|
||||
}
|
||||
Object undefinedVariable() { py_special_objects(result, "_semmle_undefined_value") }
|
||||
|
||||
/** Gets the pseudo-object representing an unknown value */
|
||||
Object unknownValue() {
|
||||
result.asBuiltin() = Builtin::unknown()
|
||||
}
|
||||
Object unknownValue() { result.asBuiltin() = Builtin::unknown() }
|
||||
|
||||
BuiltinCallable theTypeNewMethod() {
|
||||
result.asBuiltin() = theTypeType().asBuiltin().getMember("__new__")
|
||||
}
|
||||
|
||||
/** Gets the `value, cls, origin` that `f` would refer to if it has not been assigned some other value */
|
||||
pragma [noinline]
|
||||
predicate potential_builtin_points_to(NameNode f, Object value, ClassObject cls, ControlFlowNode origin) {
|
||||
f.isGlobal() and f.isLoad() and origin = f and
|
||||
pragma[noinline]
|
||||
predicate potential_builtin_points_to(
|
||||
NameNode f, Object value, ClassObject cls, ControlFlowNode origin
|
||||
) {
|
||||
f.isGlobal() and
|
||||
f.isLoad() and
|
||||
origin = f and
|
||||
(
|
||||
builtin_name_points_to(f.getId(), value, cls)
|
||||
or
|
||||
@@ -271,36 +282,41 @@ predicate potential_builtin_points_to(NameNode f, Object value, ClassObject cls,
|
||||
)
|
||||
}
|
||||
|
||||
pragma [noinline]
|
||||
pragma[noinline]
|
||||
predicate builtin_name_points_to(string name, Object value, ClassObject cls) {
|
||||
value = Object::builtin(name) and cls.asBuiltin() = value.asBuiltin().getClass()
|
||||
}
|
||||
|
||||
module BaseFlow {
|
||||
|
||||
predicate reaches_exit(EssaVariable var) {
|
||||
var.getAUse() = var.getScope().getANormalExit()
|
||||
}
|
||||
predicate reaches_exit(EssaVariable var) { var.getAUse() = var.getScope().getANormalExit() }
|
||||
|
||||
/* Helper for this_scope_entry_value_transfer(...). Transfer of values from earlier scope to later on */
|
||||
cached predicate scope_entry_value_transfer_from_earlier(EssaVariable pred_var, Scope pred_scope, ScopeEntryDefinition succ_def, Scope succ_scope) {
|
||||
cached
|
||||
predicate scope_entry_value_transfer_from_earlier(
|
||||
EssaVariable pred_var, Scope pred_scope, ScopeEntryDefinition succ_def, Scope succ_scope
|
||||
) {
|
||||
exists(SsaSourceVariable var |
|
||||
reaches_exit(pred_var) and
|
||||
pred_var.getScope() = pred_scope and
|
||||
var = pred_var.getSourceVariable() and
|
||||
var = succ_def.getSourceVariable() and
|
||||
succ_def.getScope() = succ_scope
|
||||
|
|
||||
|
|
||||
pred_scope.precedes(succ_scope)
|
||||
or
|
||||
/* If an `__init__` method does not modify the global variable, then
|
||||
/*
|
||||
* If an `__init__` method does not modify the global variable, then
|
||||
* we can skip it and take the value directly from the module.
|
||||
*/
|
||||
|
||||
exists(Scope init |
|
||||
init.getName() = "__init__" and init.precedes(succ_scope) and pred_scope.precedes(init) and
|
||||
not var.(Variable).getAStore().getScope() = init and var instanceof GlobalVariable
|
||||
init.getName() = "__init__" and
|
||||
init.precedes(succ_scope) and
|
||||
pred_scope.precedes(init) and
|
||||
not var.(Variable).getAStore().getScope() = init and
|
||||
var instanceof GlobalVariable
|
||||
)
|
||||
)
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -312,15 +328,17 @@ predicate simple_points_to(ControlFlowNode f, Object value, ClassObject cls, Con
|
||||
or
|
||||
BasePointsTo::points_to(f, value, origin) and cls = simple_types(value)
|
||||
or
|
||||
value = f.getNode().(ImmutableLiteral).getLiteralObject() and cls = simple_types(value) and origin = f
|
||||
value = f.getNode().(ImmutableLiteral).getLiteralObject() and
|
||||
cls = simple_types(value) and
|
||||
origin = f
|
||||
}
|
||||
|
||||
/** Holds if `bit` is a binary expression node with a bitwise operator.
|
||||
/**
|
||||
* Holds if `bit` is a binary expression node with a bitwise operator.
|
||||
* Helper for `this_binary_expr_points_to`.
|
||||
*/
|
||||
predicate bitwise_expression_node(BinaryExprNode bit, ControlFlowNode left, ControlFlowNode right) {
|
||||
exists(Operator op |
|
||||
op = bit.getNode().getOp() |
|
||||
exists(Operator op | op = bit.getNode().getOp() |
|
||||
op instanceof BitAnd or
|
||||
op instanceof BitOr or
|
||||
op instanceof BitXor
|
||||
@@ -329,16 +347,14 @@ predicate bitwise_expression_node(BinaryExprNode bit, ControlFlowNode left, Cont
|
||||
right = bit.getRight()
|
||||
}
|
||||
|
||||
|
||||
private
|
||||
Module theCollectionsAbcModule() {
|
||||
private Module theCollectionsAbcModule() {
|
||||
result.getName() = "_abcoll"
|
||||
or
|
||||
result.getName() = "_collections_abc"
|
||||
}
|
||||
|
||||
ClassObject collectionsAbcClass(string name) {
|
||||
exists(Class cls |
|
||||
exists(Class cls |
|
||||
result.getPyClass() = cls and
|
||||
cls.getName() = name and
|
||||
cls.getScope() = theCollectionsAbcModule()
|
||||
|
||||
@@ -1,42 +1,47 @@
|
||||
/**
|
||||
* Context-sensitive call-graph.
|
||||
*
|
||||
*
|
||||
* NOTE: Since an "invocation" contains callsite information
|
||||
* and a path back to its ancestor calls, the "invocation" call-graph must be a tree.
|
||||
* This has two important consequences:
|
||||
* 1. The graph is incomplete; it has quite limited depth in order to keep the graph to a sensible size.
|
||||
* 2. The graph is precise. Since different invocations are distinct, there can be no "cross-talk" between
|
||||
* 2. The graph is precise. Since different invocations are distinct, there can be no "cross-talk" between
|
||||
* different calls to the same function.
|
||||
*/
|
||||
|
||||
import python
|
||||
private import semmle.python.pointsto.PointsToContext
|
||||
|
||||
private newtype TTInvocation = TInvocation(FunctionObject f, Context c) {
|
||||
exists(Context outer, CallNode call |
|
||||
call = f.getACall(outer) and
|
||||
c.fromCall(call, outer)
|
||||
)
|
||||
or
|
||||
c.appliesToScope(f.getFunction())
|
||||
}
|
||||
private newtype TTInvocation =
|
||||
TInvocation(FunctionObject f, Context c) {
|
||||
exists(Context outer, CallNode call |
|
||||
call = f.getACall(outer) and
|
||||
c.fromCall(call, outer)
|
||||
)
|
||||
or
|
||||
c.appliesToScope(f.getFunction())
|
||||
}
|
||||
|
||||
/** This class represents a static approximation to the
|
||||
* dynamic call-graph. A `FunctionInvocation` represents
|
||||
/**
|
||||
* This class represents a static approximation to the
|
||||
* dynamic call-graph. A `FunctionInvocation` represents
|
||||
* all calls made to a function for a given context.
|
||||
*/
|
||||
class FunctionInvocation extends TTInvocation {
|
||||
|
||||
string toString() { result = "Invocation" }
|
||||
|
||||
FunctionObject getFunction() { this = TInvocation(result, _) }
|
||||
|
||||
Context getContext() { this = TInvocation(_, result) }
|
||||
|
||||
/** Gets the callee invocation for the given callsite.
|
||||
/**
|
||||
* Gets the callee invocation for the given callsite.
|
||||
* The callsite must be within the function of this invocation.
|
||||
*/
|
||||
FunctionInvocation getCallee(CallNode call) {
|
||||
exists(FunctionObject callee, Context callee_context, FunctionObject caller, Context caller_context |
|
||||
exists(
|
||||
FunctionObject callee, Context callee_context, FunctionObject caller, Context caller_context
|
||||
|
|
||||
this = TInvocation(caller, caller_context) and
|
||||
result = TInvocation(callee, callee_context) and
|
||||
call = callee.getACall(caller_context) and
|
||||
@@ -45,12 +50,11 @@ class FunctionInvocation extends TTInvocation {
|
||||
)
|
||||
}
|
||||
|
||||
/** Gets a callee invocation.
|
||||
/**
|
||||
* Gets a callee invocation.
|
||||
* That is any invocation made from within this invocation.
|
||||
*/
|
||||
FunctionInvocation getACallee() {
|
||||
result = this.getCallee(_)
|
||||
}
|
||||
FunctionInvocation getACallee() { result = this.getCallee(_) }
|
||||
|
||||
/** Holds if this is an invocation `f` in the "runtime" context. */
|
||||
predicate runtime(FunctionObject f) {
|
||||
@@ -61,13 +65,8 @@ class FunctionInvocation extends TTInvocation {
|
||||
}
|
||||
|
||||
/** Gets the call from which this invocation was made. */
|
||||
CallNode getCall() {
|
||||
this.getContext().fromCall(result, _)
|
||||
}
|
||||
CallNode getCall() { this.getContext().fromCall(result, _) }
|
||||
|
||||
/** Gets the caller invocation of this invocation, if any. */
|
||||
FunctionInvocation getCaller() {
|
||||
this = result.getCallee(_)
|
||||
}
|
||||
|
||||
FunctionInvocation getCaller() { this = result.getCallee(_) }
|
||||
}
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
/** Helper predicates for standard tests in Python commonly
|
||||
/**
|
||||
* Helper predicates for standard tests in Python commonly
|
||||
* used to filter objects by value or by type.
|
||||
*/
|
||||
|
||||
|
||||
import python
|
||||
|
||||
/** Holds if `c` is a call to `hasattr(obj, attr)`. */
|
||||
@@ -21,13 +21,15 @@ predicate is_callable(CallNode c, ControlFlowNode obj) {
|
||||
/** Holds if `c` is a call to `isinstance(use, cls)`. */
|
||||
predicate isinstance(CallNode fc, ControlFlowNode cls, ControlFlowNode use) {
|
||||
fc.getFunction().(NameNode).getId() = "isinstance" and
|
||||
cls = fc.getArg(1) and fc.getArg(0) = use
|
||||
cls = fc.getArg(1) and
|
||||
fc.getArg(0) = use
|
||||
}
|
||||
|
||||
/** Holds if `c` is a call to `issubclass(use, cls)`. */
|
||||
predicate issubclass(CallNode fc, ControlFlowNode cls, ControlFlowNode use) {
|
||||
fc.getFunction().(NameNode).getId() = "issubclass" and
|
||||
fc.getArg(0) = use and cls = fc.getArg(1)
|
||||
fc.getArg(0) = use and
|
||||
cls = fc.getArg(1)
|
||||
}
|
||||
|
||||
/** Holds if `c` is a test comparing `x` and `y`. `is` is true if the operator is `is` or `==`, it is false if the operator is `is not` or `!=`. */
|
||||
@@ -35,11 +37,15 @@ predicate equality_test(CompareNode c, ControlFlowNode x, boolean is, ControlFlo
|
||||
exists(Cmpop op |
|
||||
c.operands(x, op, y) or
|
||||
c.operands(y, op, x)
|
||||
|
|
||||
(is = true and op instanceof Is or
|
||||
is = false and op instanceof IsNot or
|
||||
is = true and op instanceof Eq or
|
||||
is = false and op instanceof NotEq
|
||||
|
|
||||
(
|
||||
is = true and op instanceof Is
|
||||
or
|
||||
is = false and op instanceof IsNot
|
||||
or
|
||||
is = true and op instanceof Eq
|
||||
or
|
||||
is = false and op instanceof NotEq
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
/* For backwards compatibility */
|
||||
|
||||
import PointsTo::PointsTo as P
|
||||
|
||||
/** DEPRECATED: Use `PointsTo` instead */
|
||||
deprecated module FinalPointsTo = P;
|
||||
deprecated module FinalPointsTo = P;
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
/** Classes and predicates for computing the Method Resolution Order (MRO) of classes.
|
||||
/**
|
||||
* Classes and predicates for computing the Method Resolution Order (MRO) of classes.
|
||||
* Supports both old-style (diamond) inheritance and new-style (C3 linearization) inheritance.
|
||||
*/
|
||||
|
||||
@@ -17,19 +18,16 @@
|
||||
*/
|
||||
|
||||
import python
|
||||
|
||||
private import semmle.python.objects.TObject
|
||||
private import semmle.python.objects.ObjectInternal
|
||||
private import semmle.python.pointsto.PointsTo
|
||||
private import semmle.python.pointsto.PointsToContext
|
||||
private import semmle.python.types.Builtins
|
||||
|
||||
|
||||
cached newtype TClassList = Empty()
|
||||
or
|
||||
Cons(ClassObjectInternal head, TClassList tail) {
|
||||
required_cons(head, tail)
|
||||
}
|
||||
cached
|
||||
newtype TClassList =
|
||||
Empty() or
|
||||
Cons(ClassObjectInternal head, TClassList tail) { required_cons(head, tail) }
|
||||
|
||||
/* Keep ClassList finite and as small as possible */
|
||||
private predicate required_cons(ClassObjectInternal head, ClassList tail) {
|
||||
@@ -38,7 +36,7 @@ private predicate required_cons(ClassObjectInternal head, ClassList tail) {
|
||||
tail = merge_of_linearization_of_bases(head)
|
||||
or
|
||||
exists(ClassObjectInternal cls, int n |
|
||||
head = Types::getBase(cls, n) and tail = bases(cls, n+1)
|
||||
head = Types::getBase(cls, n) and tail = bases(cls, n + 1)
|
||||
)
|
||||
or
|
||||
head = ObjectInternal::builtin("object") and tail = Empty()
|
||||
@@ -52,12 +50,12 @@ private predicate required_cons(ClassObjectInternal head, ClassList tail) {
|
||||
or
|
||||
exists(ClassList list, int n |
|
||||
n = list.firstIndex(head) and
|
||||
tail = list.deduplicate(n+1)
|
||||
tail = list.deduplicate(n + 1)
|
||||
)
|
||||
or
|
||||
exists(ClassListList list, int n |
|
||||
head = list.getHead().getItem(n) and
|
||||
tail = flatten_list(list, n+1)
|
||||
tail = flatten_list(list, n + 1)
|
||||
)
|
||||
or
|
||||
tail = list_old_style_base_mros(head).flatten()
|
||||
@@ -70,16 +68,12 @@ private ClassObjectInternal sole_base(ClassObjectInternal cls) {
|
||||
|
||||
/** A list of classes, used to represent the MRO of a class */
|
||||
class ClassList extends TClassList {
|
||||
|
||||
string toString() {
|
||||
result = "[" + this.contents() + "]"
|
||||
}
|
||||
string toString() { result = "[" + this.contents() + "]" }
|
||||
|
||||
string contents() {
|
||||
this = Empty() and result = ""
|
||||
or
|
||||
exists(ClassObjectInternal head |
|
||||
head = this.getHead() |
|
||||
exists(ClassObjectInternal head | head = this.getHead() |
|
||||
this.getTail() = Empty() and result = className(head)
|
||||
or
|
||||
this.getTail() != Empty() and result = className(head) + ", " + this.getTail().contents()
|
||||
@@ -98,25 +92,19 @@ class ClassList extends TClassList {
|
||||
result = this.getTail().length() + 1
|
||||
}
|
||||
|
||||
ClassObjectInternal getHead() {
|
||||
this = Cons(result, _)
|
||||
}
|
||||
ClassObjectInternal getHead() { this = Cons(result, _) }
|
||||
|
||||
ClassList getTail() {
|
||||
this = Cons(_, result)
|
||||
}
|
||||
ClassList getTail() { this = Cons(_, result) }
|
||||
|
||||
ClassObjectInternal getItem(int n) {
|
||||
n = 0 and result = this.getHead()
|
||||
n = 0 and result = this.getHead()
|
||||
or
|
||||
result = this.getTail().getItem(n-1)
|
||||
result = this.getTail().getItem(n - 1)
|
||||
}
|
||||
|
||||
ClassObjectInternal getAnItem() {
|
||||
result = this.getItem(_)
|
||||
}
|
||||
ClassObjectInternal getAnItem() { result = this.getItem(_) }
|
||||
|
||||
pragma [inline]
|
||||
pragma[inline]
|
||||
ClassList removeHead(ClassObjectInternal cls) {
|
||||
this.getHead() = cls and result = this.getTail()
|
||||
or
|
||||
@@ -159,18 +147,15 @@ class ClassList extends TClassList {
|
||||
}
|
||||
|
||||
ClassObjectInternal findDeclaringClass(string name) {
|
||||
exists(ClassDecl head |
|
||||
head = this.getHead().getClassDeclaration() |
|
||||
if head.declaresAttribute(name) then
|
||||
result = this.getHead()
|
||||
else
|
||||
result = this.getTail().findDeclaringClass(name)
|
||||
exists(ClassDecl head | head = this.getHead().getClassDeclaration() |
|
||||
if head.declaresAttribute(name)
|
||||
then result = this.getHead()
|
||||
else result = this.getTail().findDeclaringClass(name)
|
||||
)
|
||||
}
|
||||
|
||||
predicate lookup(string name, ObjectInternal value, CfgOrigin origin) {
|
||||
exists(ClassObjectInternal decl |
|
||||
decl = this.findDeclaringClass(name) |
|
||||
exists(ClassObjectInternal decl | decl = this.findDeclaringClass(name) |
|
||||
Types::declaredAttribute(decl, name, value, origin)
|
||||
)
|
||||
}
|
||||
@@ -182,83 +167,63 @@ class ClassList extends TClassList {
|
||||
}
|
||||
|
||||
ClassList startingAt(ClassObjectInternal cls) {
|
||||
exists(ClassObjectInternal head |
|
||||
head = this.getHead() |
|
||||
if head = cls then
|
||||
result = this
|
||||
else
|
||||
result = this.getTail().startingAt(cls)
|
||||
exists(ClassObjectInternal head | head = this.getHead() |
|
||||
if head = cls then result = this else result = this.getTail().startingAt(cls)
|
||||
)
|
||||
}
|
||||
|
||||
ClassList deduplicate() {
|
||||
result = this.deduplicate(0)
|
||||
}
|
||||
ClassList deduplicate() { result = this.deduplicate(0) }
|
||||
|
||||
/* Helpers for `deduplicate()` */
|
||||
|
||||
int firstIndex(ClassObjectInternal cls) {
|
||||
result = this.firstIndex(cls, 0)
|
||||
}
|
||||
int firstIndex(ClassObjectInternal cls) { result = this.firstIndex(cls, 0) }
|
||||
|
||||
/* Helper for firstIndex(cls), getting the first index of `cls` where result >= n */
|
||||
private int firstIndex(ClassObjectInternal cls, int n) {
|
||||
this.getItem(n) = cls and result = n
|
||||
or
|
||||
this.getItem(n) != cls and result = this.firstIndex(cls, n+1)
|
||||
this.getItem(n) != cls and result = this.firstIndex(cls, n + 1)
|
||||
}
|
||||
|
||||
/** Holds if the class at `n` is a duplicate of an earlier position. */
|
||||
private predicate duplicate(int n) {
|
||||
exists(ClassObjectInternal cls |
|
||||
cls = this.getItem(n) and this.firstIndex(cls) < n
|
||||
)
|
||||
exists(ClassObjectInternal cls | cls = this.getItem(n) and this.firstIndex(cls) < n)
|
||||
}
|
||||
|
||||
/** Gets a class list which is the de-duplicated form of the list containing elements of
|
||||
/**
|
||||
* Gets a class list which is the de-duplicated form of the list containing elements of
|
||||
* this list from `n` onwards.
|
||||
*/
|
||||
ClassList deduplicate(int n) {
|
||||
n = this.length() and result = Empty()
|
||||
or
|
||||
this.duplicate(n) and result = this.deduplicate(n+1)
|
||||
this.duplicate(n) and result = this.deduplicate(n + 1)
|
||||
or
|
||||
exists(ClassObjectInternal cls |
|
||||
n = this.firstIndex(cls) and
|
||||
result = Cons(cls, this.deduplicate(n+1))
|
||||
result = Cons(cls, this.deduplicate(n + 1))
|
||||
)
|
||||
}
|
||||
|
||||
predicate isEmpty() {
|
||||
this = Empty()
|
||||
}
|
||||
predicate isEmpty() { this = Empty() }
|
||||
|
||||
ClassList reverse() {
|
||||
reverse_step(this, Empty(), result)
|
||||
}
|
||||
ClassList reverse() { reverse_step(this, Empty(), result) }
|
||||
|
||||
/** Holds if this MRO contains a class whose instances we treat specially, rather than as a generic instance.
|
||||
/**
|
||||
* Holds if this MRO contains a class whose instances we treat specially, rather than as a generic instance.
|
||||
* For example, `type` or `int`.
|
||||
*/
|
||||
boolean containsSpecial() {
|
||||
this = Empty() and result = false
|
||||
or
|
||||
exists(ClassDecl decl |
|
||||
decl = this.getHead().getClassDeclaration() |
|
||||
if decl.isSpecial() then
|
||||
result = true
|
||||
else
|
||||
result = this.getTail().containsSpecial()
|
||||
exists(ClassDecl decl | decl = this.getHead().getClassDeclaration() |
|
||||
if decl.isSpecial() then result = true else result = this.getTail().containsSpecial()
|
||||
)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private newtype TClassListList =
|
||||
EmptyList() or
|
||||
ConsList(TClassList head, TClassListList tail) {
|
||||
required_list(head, tail)
|
||||
}
|
||||
ConsList(TClassList head, TClassListList tail) { required_list(head, tail) }
|
||||
|
||||
/* Keep ClassListList finite and as small as possible */
|
||||
private predicate required_list(ClassList head, ClassListList tail) {
|
||||
@@ -268,26 +233,22 @@ private predicate required_list(ClassList head, ClassListList tail) {
|
||||
or
|
||||
exists(ClassObjectInternal cls, int n |
|
||||
head = Mro::newStyleMro(Types::getBase(cls, n)) and
|
||||
tail = list_of_linearization_of_bases_plus_bases(cls, n+1)
|
||||
tail = list_of_linearization_of_bases_plus_bases(cls, n + 1)
|
||||
)
|
||||
or
|
||||
exists(ClassObjectInternal cls, int n |
|
||||
head = Mro::oldStyleMro(Types::getBase(cls, n)) and
|
||||
tail = list_old_style_base_mros(cls, n+1)
|
||||
tail = list_old_style_base_mros(cls, n + 1)
|
||||
)
|
||||
}
|
||||
|
||||
private class ClassListList extends TClassListList {
|
||||
|
||||
string toString() {
|
||||
result = "[" + this.contents() + "]"
|
||||
}
|
||||
string toString() { result = "[" + this.contents() + "]" }
|
||||
|
||||
string contents() {
|
||||
this = EmptyList() and result = ""
|
||||
or
|
||||
exists(ClassList head |
|
||||
head = this.getHead() |
|
||||
exists(ClassList head | head = this.getHead() |
|
||||
this.getTail() = EmptyList() and result = head.toString()
|
||||
or
|
||||
this.getTail() != EmptyList() and result = head.toString() + ", " + this.getTail().contents()
|
||||
@@ -300,18 +261,14 @@ private class ClassListList extends TClassListList {
|
||||
result = this.getTail().length() + 1
|
||||
}
|
||||
|
||||
ClassList getHead() {
|
||||
this = ConsList(result, _)
|
||||
}
|
||||
ClassList getHead() { this = ConsList(result, _) }
|
||||
|
||||
ClassListList getTail() {
|
||||
this = ConsList(_, result)
|
||||
}
|
||||
ClassListList getTail() { this = ConsList(_, result) }
|
||||
|
||||
ClassList getItem(int n) {
|
||||
n = 0 and result = this.getHead()
|
||||
or
|
||||
result = this.getTail().getItem(n-1)
|
||||
result = this.getTail().getItem(n - 1)
|
||||
}
|
||||
|
||||
private ClassObjectInternal getAHead() {
|
||||
@@ -320,7 +277,7 @@ private class ClassListList extends TClassListList {
|
||||
result = this.getTail().getAHead()
|
||||
}
|
||||
|
||||
pragma [nomagic]
|
||||
pragma[nomagic]
|
||||
ClassList merge() {
|
||||
exists(ClassList reversed |
|
||||
merge_step(reversed, EmptyList(), this) and
|
||||
@@ -331,13 +288,17 @@ private class ClassListList extends TClassListList {
|
||||
}
|
||||
|
||||
/* Join ordering helper */
|
||||
pragma [noinline]
|
||||
predicate removedClassParts(ClassObjectInternal cls, ClassList removed_head, ClassListList removed_tail, int n) {
|
||||
cls = this.bestMergeCandidate() and n = this.length()-1 and
|
||||
removed_head = this.getItem(n).removeHead(cls) and removed_tail = EmptyList()
|
||||
pragma[noinline]
|
||||
predicate removedClassParts(
|
||||
ClassObjectInternal cls, ClassList removed_head, ClassListList removed_tail, int n
|
||||
) {
|
||||
cls = this.bestMergeCandidate() and
|
||||
n = this.length() - 1 and
|
||||
removed_head = this.getItem(n).removeHead(cls) and
|
||||
removed_tail = EmptyList()
|
||||
or
|
||||
exists(ClassList prev_head, ClassListList prev_tail |
|
||||
this.removedClassParts(cls, prev_head, prev_tail, n+1) and
|
||||
this.removedClassParts(cls, prev_head, prev_tail, n + 1) and
|
||||
removed_head = this.getItem(n).removeHead(cls) and
|
||||
removed_tail = ConsList(prev_head, prev_tail)
|
||||
)
|
||||
@@ -356,12 +317,10 @@ private class ClassListList extends TClassListList {
|
||||
cls = this.getAHead() and n = this.length()
|
||||
or
|
||||
this.getItem(n).legalMergeHead(cls) and
|
||||
this.legalMergeCandidate(cls, n+1)
|
||||
this.legalMergeCandidate(cls, n + 1)
|
||||
}
|
||||
|
||||
predicate legalMergeCandidate(ClassObjectInternal cls) {
|
||||
this.legalMergeCandidate(cls, 0)
|
||||
}
|
||||
predicate legalMergeCandidate(ClassObjectInternal cls) { this.legalMergeCandidate(cls, 0) }
|
||||
|
||||
predicate illegalMergeCandidate(ClassObjectInternal cls) {
|
||||
cls = this.getAHead() and
|
||||
@@ -369,20 +328,17 @@ private class ClassListList extends TClassListList {
|
||||
}
|
||||
|
||||
ClassObjectInternal bestMergeCandidate(int n) {
|
||||
exists(ClassObjectInternal head |
|
||||
head = this.getItem(n).getHead()
|
||||
|
|
||||
exists(ClassObjectInternal head | head = this.getItem(n).getHead() |
|
||||
legalMergeCandidate(head) and result = head
|
||||
or
|
||||
illegalMergeCandidate(head) and result = this.bestMergeCandidate(n+1)
|
||||
illegalMergeCandidate(head) and result = this.bestMergeCandidate(n + 1)
|
||||
)
|
||||
}
|
||||
|
||||
ClassObjectInternal bestMergeCandidate() {
|
||||
result = this.bestMergeCandidate(0)
|
||||
}
|
||||
ClassObjectInternal bestMergeCandidate() { result = this.bestMergeCandidate(0) }
|
||||
|
||||
/** Gets a ClassList representing the this list of list flattened into a single list.
|
||||
/**
|
||||
* Gets a ClassList representing the this list of list flattened into a single list.
|
||||
* Used for old-style MRO computation.
|
||||
*/
|
||||
ClassList flatten() {
|
||||
@@ -390,17 +346,14 @@ private class ClassListList extends TClassListList {
|
||||
or
|
||||
result = flatten_list(this, 0)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private ClassList flatten_list(ClassListList list, int n) {
|
||||
need_flattening(list) and
|
||||
exists(ClassList head, ClassListList tail |
|
||||
list = ConsList(head, tail)
|
||||
|
|
||||
exists(ClassList head, ClassListList tail | list = ConsList(head, tail) |
|
||||
n = head.length() and result = tail.flatten()
|
||||
or
|
||||
result = Cons(head.getItem(n), flatten_list(list, n+1))
|
||||
result = Cons(head.getItem(n), flatten_list(list, n + 1))
|
||||
)
|
||||
}
|
||||
|
||||
@@ -414,12 +367,10 @@ private predicate need_flattening(ClassListList list) {
|
||||
)
|
||||
}
|
||||
|
||||
private ClassList bases(ClassObjectInternal cls) {
|
||||
result = bases(cls, 0)
|
||||
}
|
||||
private ClassList bases(ClassObjectInternal cls) { result = bases(cls, 0) }
|
||||
|
||||
private ClassList bases(ClassObjectInternal cls, int n) {
|
||||
result = Cons(Types::getBase(cls, n), bases(cls, n+1))
|
||||
result = Cons(Types::getBase(cls, n), bases(cls, n + 1))
|
||||
or
|
||||
result = Empty() and n = Types::base_count(cls)
|
||||
}
|
||||
@@ -432,7 +383,7 @@ private ClassListList list_of_linearization_of_bases_plus_bases(ClassObjectInter
|
||||
result = ConsList(bases(cls), EmptyList()) and n = Types::base_count(cls) and n > 1
|
||||
or
|
||||
exists(ClassListList partial |
|
||||
partial = list_of_linearization_of_bases_plus_bases(cls, n+1) and
|
||||
partial = list_of_linearization_of_bases_plus_bases(cls, n + 1) and
|
||||
result = ConsList(Mro::newStyleMro(Types::getBase(cls, n)), partial)
|
||||
)
|
||||
}
|
||||
@@ -445,18 +396,23 @@ private ClassListList list_old_style_base_mros(ClassObjectInternal cls) {
|
||||
result = list_old_style_base_mros(cls, 0)
|
||||
}
|
||||
|
||||
pragma [nomagic]
|
||||
pragma[nomagic]
|
||||
private ClassListList list_old_style_base_mros(ClassObjectInternal cls, int n) {
|
||||
n = Types::base_count(cls) and result = EmptyList()
|
||||
or
|
||||
result = ConsList(Mro::oldStyleMro(Types::getBase(cls, n)), list_old_style_base_mros(cls, n+1))
|
||||
result = ConsList(Mro::oldStyleMro(Types::getBase(cls, n)), list_old_style_base_mros(cls, n + 1))
|
||||
}
|
||||
|
||||
/** Holds if the pair `reversed_mro`, `remaining_list` represents a step in the C3 merge operation
|
||||
/**
|
||||
* Holds if the pair `reversed_mro`, `remaining_list` represents a step in the C3 merge operation
|
||||
* of computing the C3 linearization of `original`.
|
||||
*/
|
||||
private predicate merge_step(ClassList reversed_mro, ClassListList remaining_list, ClassListList original) {
|
||||
remaining_list = list_of_linearization_of_bases_plus_bases(_) and reversed_mro = Empty() and remaining_list = original
|
||||
private predicate merge_step(
|
||||
ClassList reversed_mro, ClassListList remaining_list, ClassListList original
|
||||
) {
|
||||
remaining_list = list_of_linearization_of_bases_plus_bases(_) and
|
||||
reversed_mro = Empty() and
|
||||
remaining_list = original
|
||||
or
|
||||
/* Removes the best merge candidate from `remaining_list` and prepends it to `reversed_mro` */
|
||||
exists(ClassObjectInternal head, ClassList prev_reverse_mro, ClassListList prev_list |
|
||||
@@ -470,7 +426,6 @@ private predicate merge_step(ClassList reversed_mro, ClassListList remaining_lis
|
||||
}
|
||||
|
||||
/* Helpers for `ClassList.reverse()` */
|
||||
|
||||
private predicate needs_reversing(ClassList lst) {
|
||||
merge_step(lst, EmptyList(), _)
|
||||
or
|
||||
@@ -487,8 +442,8 @@ private predicate reverse_step(ClassList lst, ClassList remainder, ClassList rev
|
||||
}
|
||||
|
||||
module Mro {
|
||||
|
||||
cached ClassList newStyleMro(ClassObjectInternal cls) {
|
||||
cached
|
||||
ClassList newStyleMro(ClassObjectInternal cls) {
|
||||
cls = ObjectInternal::builtin("object") and result = Cons(cls, Empty())
|
||||
or
|
||||
result = Cons(cls, merge_of_linearization_of_bases(cls))
|
||||
@@ -496,9 +451,9 @@ module Mro {
|
||||
result = Cons(cls, newStyleMro(sole_base(cls)))
|
||||
}
|
||||
|
||||
cached ClassList oldStyleMro(ClassObjectInternal cls) {
|
||||
cached
|
||||
ClassList oldStyleMro(ClassObjectInternal cls) {
|
||||
Types::isOldStyle(cls) and
|
||||
result = Cons(cls, list_old_style_base_mros(cls).flatten()).(ClassList).deduplicate()
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,45 +1,45 @@
|
||||
|
||||
/*
|
||||
*
|
||||
* ## Points-to analysis for Python
|
||||
*
|
||||
*
|
||||
*
|
||||
*
|
||||
* The purpose of points-to analysis is to determine what values a variable might hold at runtime.
|
||||
* This allows us to write useful queries to check for the misuse of those values.
|
||||
* In the academic and technical literature, points-to analysis (AKA pointer analysis) attempts to determine which variables can refer to which heap allocated objects.
|
||||
* From the point of view of Python we can treat all Python objects as "heap allocated objects".
|
||||
*
|
||||
*
|
||||
*
|
||||
*
|
||||
* The output of the points-to analysis consists of a large set of relations which provide not only points-to information, but call-graph, pruned flow-graph and exception-raising information.
|
||||
*
|
||||
* These relations are computed by a large set of mutually recursive predicates which infer the flow of values through the program.
|
||||
*
|
||||
* These relations are computed by a large set of mutually recursive predicates which infer the flow of values through the program.
|
||||
* Our analysis is inter-procedural use contexts to maintain the precision of an intra-procedural analysis.
|
||||
*
|
||||
*
|
||||
* ### Precision
|
||||
*
|
||||
*
|
||||
* In conventional points-to, the computed points-to set should be a super-set of the real points-to set (were it possible to determine such a thing).
|
||||
* However for our purposes we want the points-to set to be a sub-set of the real points-to set.
|
||||
* This is simply because conventional points-to is used to determine compiler optimisations, so the points-to set needs to be a conservative over-estimate of what is possible.
|
||||
* However for our purposes we want the points-to set to be a sub-set of the real points-to set.
|
||||
* This is simply because conventional points-to is used to determine compiler optimisations, so the points-to set needs to be a conservative over-estimate of what is possible.
|
||||
* We have the opposite concern; we want to eliminate false positives where possible.
|
||||
*
|
||||
* This should be born in mind when reading the literature about points-to analysis. In conventional points-to, a precise analysis produces as small a points-to set as possible.
|
||||
*
|
||||
* This should be born in mind when reading the literature about points-to analysis. In conventional points-to, a precise analysis produces as small a points-to set as possible.
|
||||
* Our analysis is precise (or very close to it). Instead of seeking to maximise precision, we seek to maximise *recall* and produce as large a points-to set as possible (whilst remaining precise).
|
||||
*
|
||||
*
|
||||
* When it comes to designing the inference, we always choose precision over recall.
|
||||
* We want to minimise false positives so it is important to avoid making incorrect inferences, even if it means losing a lot of potential information.
|
||||
* If a potential new points-to fact would increase the number of values we are able to infer, but decrease precision, then we omit it.
|
||||
*
|
||||
*
|
||||
* ###Objects
|
||||
*
|
||||
* In convention points-to an 'object' is generally considered to be any static instantiation. E.g. in Java this is simply anything looking like `new X(..)`.
|
||||
* However, in Python as there is no `new` expression we cannot known what is a class merely from the syntax.
|
||||
*
|
||||
* In convention points-to an 'object' is generally considered to be any static instantiation. E.g. in Java this is simply anything looking like `new X(..)`.
|
||||
* However, in Python as there is no `new` expression we cannot known what is a class merely from the syntax.
|
||||
* Consequently, we must start with only with the simplest objects and extend to instance creation as we can infer classes.
|
||||
*
|
||||
*
|
||||
* To perform points-to analysis we start with the set of built-in objects, all literal constants, and class and function definitions.
|
||||
* From there we can propagate those values. Whenever we see a call `x()` we add a new object if `x` refers to some class.
|
||||
*
|
||||
*
|
||||
* In the `PointsTo::points_to` relation, the second argument, `Object value` is the "value" referred to by the ControlFlowNode (which will correspond to an rvalue in the source code).
|
||||
* The set of "values" used will change as the library continues to improve, but currently include the following:
|
||||
*
|
||||
*
|
||||
* * Classes (both in the source and builtin)
|
||||
* * Functions (both in the source and builtin)
|
||||
* * Literal constants defined in the source (string and numbers)
|
||||
@@ -49,56 +49,56 @@
|
||||
* * Bound methods, static- and class-methods, and properties.
|
||||
* * Instances of `super`.
|
||||
* * Missing modules, where no concrete module is found for an import.
|
||||
*
|
||||
*
|
||||
* A number of constructs that might create a new object, such as binary operations, are omitted if there is no useful information to can be attached to them and they would just increase the size of the database.
|
||||
*
|
||||
*
|
||||
* ###Contexts
|
||||
*
|
||||
* In order to better handle value tracking in functions, we introduce context to the points-to relation.
|
||||
*
|
||||
* In order to better handle value tracking in functions, we introduce context to the points-to relation.
|
||||
* There is one `default` context, equivalent to having no context, a `main` context for scripts and any number of call-site contexts.
|
||||
*
|
||||
* Adding context to a conventional points-to analysis can significantly improve its precision. Whereas, for our points-to analysis adding context significantly improves the recall of our analysis.
|
||||
* The consensus in the academic literature is that "object sensitivity" is superior to "call-site sensitivity".
|
||||
*
|
||||
* Adding context to a conventional points-to analysis can significantly improve its precision. Whereas, for our points-to analysis adding context significantly improves the recall of our analysis.
|
||||
* The consensus in the academic literature is that "object sensitivity" is superior to "call-site sensitivity".
|
||||
* However, since we are seeking to maximise not minimise our points-to set, it is entirely possible that the reverse is true for us.
|
||||
* We use "call-site sensitivity" at the moment, although the exact set of contexts used will change.
|
||||
*
|
||||
*
|
||||
* ### Points-to analysis over the ESSA dataflow graph
|
||||
*
|
||||
*
|
||||
* In order to perform points-to analysis on the dataflow graph, we
|
||||
* need to understand the many implicit "definitions" that occur within Python code.
|
||||
*
|
||||
*
|
||||
* These are:
|
||||
*
|
||||
*
|
||||
* 1. Implicit definition as "undefined" for any local or global variable at the start of its scope.
|
||||
* Many of these will be dead and will be eliminated during construction of the dataflow graph.
|
||||
* 2. Implicit definition of `__name__`, `__package__` and `__module__` at the start of the relevant scopes.
|
||||
* 3. Implicit definition of all submodules as global variables at the start of an `__init__` module
|
||||
*
|
||||
*
|
||||
* In addition, there are the "artificial", data-flow definitions:
|
||||
*
|
||||
*
|
||||
* 1. Phi functions
|
||||
* 2. Pi (guard, or filter) functions.
|
||||
* 3. "Refinements" of a variable. These are not definitions of the variable, but may modify the object referred to by the variable,
|
||||
* possibly changing some inferred facts about the object.
|
||||
* 4. Definition of any variable that escapes the scope, at entry, exit and at all call-sites.
|
||||
*
|
||||
*
|
||||
* As an example, consider:
|
||||
* ```python
|
||||
* if a:
|
||||
* float = "global"
|
||||
* #float can now be either the class 'float' or the string "global"
|
||||
*
|
||||
*
|
||||
* class C2:
|
||||
* if b:
|
||||
* float = "local"
|
||||
* float
|
||||
*
|
||||
*
|
||||
* float #Cannot be "local"
|
||||
* ```
|
||||
*
|
||||
*
|
||||
* Ignoring `__name__` and `__package__`, the data-flow graph looks something like this, noting that there are two variables named "float"
|
||||
* in the scope `C2`, the local and the global.
|
||||
*
|
||||
*
|
||||
* ```
|
||||
* a_0 = undefined
|
||||
* b_0 = undefined
|
||||
@@ -112,14 +112,14 @@
|
||||
* float_6 = phi(float_4, float_5) |
|
||||
* float_7 = float_3 (transfer values in global 'float', but not local, back to module scope).
|
||||
* ```
|
||||
*
|
||||
*
|
||||
* ### Implementation
|
||||
*
|
||||
*
|
||||
* <b>This section is for information purposes only. Any or all details may change without notice.</b>
|
||||
*
|
||||
*
|
||||
* QL, being based on Datalog, has fixed-point semantics which makes it impossible to make negative statements that are recursive.
|
||||
* To work around this we need to define many predicates over boolean variables. Suppose we have a predicate with determines whether a test can be true or false at runtime.
|
||||
* We might naively implement this as `predicate test_is_true(ControlFlowNode test, Context ctx)` but this would lead to negative recursion if we want to know when the test can be false.
|
||||
* Instead we implement it as `boolean test_result(ControlFlowNode test, Context ctx)` where the absence of a value indicates merely that we do (yet) know what value the test may have.
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,6 +1,7 @@
|
||||
import python
|
||||
private import semmle.python.pointsto.PointsTo
|
||||
private import semmle.python.objects.ObjectInternal
|
||||
|
||||
/*
|
||||
* A note on 'cost'. Cost doesn't represent the cost to compute,
|
||||
* but (a vague estimate of) the cost to compute per value gained.
|
||||
@@ -14,7 +15,7 @@ private int given_cost() {
|
||||
)
|
||||
}
|
||||
|
||||
pragma [noinline]
|
||||
pragma[noinline]
|
||||
private int max_context_cost() {
|
||||
not py_flags_versioned("context.cost", _, _) and result = 7
|
||||
or
|
||||
@@ -22,13 +23,13 @@ private int max_context_cost() {
|
||||
}
|
||||
|
||||
private int syntactic_call_count(Scope s) {
|
||||
exists(Function f |
|
||||
f = s and f.getName() != "__init__" |
|
||||
result = count(CallNode call |
|
||||
call.getFunction().(NameNode).getId() = f.getName()
|
||||
or
|
||||
call.getFunction().(AttrNode).getName() = f.getName()
|
||||
)
|
||||
exists(Function f | f = s and f.getName() != "__init__" |
|
||||
result =
|
||||
count(CallNode call |
|
||||
call.getFunction().(NameNode).getId() = f.getName()
|
||||
or
|
||||
call.getFunction().(AttrNode).getName() = f.getName()
|
||||
)
|
||||
)
|
||||
or
|
||||
s.getName() = "__init__" and result = 1
|
||||
@@ -37,11 +38,13 @@ private int syntactic_call_count(Scope s) {
|
||||
}
|
||||
|
||||
private int incoming_call_cost(Scope s) {
|
||||
/* Syntactic call count will often be a considerable overestimate
|
||||
/*
|
||||
* Syntactic call count will often be a considerable overestimate
|
||||
* of the actual number of calls, so we use the square root.
|
||||
* Cost = log(sqrt(call-count))
|
||||
*/
|
||||
result = ((syntactic_call_count(s)+1).log(2)*0.5).floor()
|
||||
|
||||
result = ((syntactic_call_count(s) + 1).log(2) * 0.5).floor()
|
||||
}
|
||||
|
||||
private int context_cost(TPointsToContext ctx) {
|
||||
@@ -55,15 +58,10 @@ private int context_cost(TPointsToContext ctx) {
|
||||
}
|
||||
|
||||
private int call_cost(CallNode call) {
|
||||
if call.getScope().inSource() then
|
||||
result = 2
|
||||
else
|
||||
result = 3
|
||||
if call.getScope().inSource() then result = 2 else result = 3
|
||||
}
|
||||
|
||||
private int outgoing_calls(Scope s) {
|
||||
result = strictcount(CallNode call | call.getScope() = s)
|
||||
}
|
||||
private int outgoing_calls(Scope s) { result = strictcount(CallNode call | call.getScope() = s) }
|
||||
|
||||
predicate super_method_call(CallNode call) {
|
||||
call.getFunction().(AttrNode).getObject().(CallNode).getFunction().(NameNode).getId() = "super"
|
||||
@@ -74,74 +72,65 @@ private int outgoing_call_cost(CallNode c) {
|
||||
result = outgoing_calls(c.getScope()).log(2).floor()
|
||||
}
|
||||
|
||||
/** Cost of contexts for a call, the more callers the
|
||||
/**
|
||||
* Cost of contexts for a call, the more callers the
|
||||
* callee of call has the more expensive it is to add contexts for it.
|
||||
* This seems to be an effective heuristics for preventing an explosion
|
||||
* in the number of contexts while retaining good results.
|
||||
*/
|
||||
private int splay_cost(CallNode c) {
|
||||
if super_method_call(c) then
|
||||
result = 0
|
||||
else
|
||||
result = outgoing_call_cost(c) + incoming_call_cost(c.getScope())
|
||||
if super_method_call(c)
|
||||
then result = 0
|
||||
else result = outgoing_call_cost(c) + incoming_call_cost(c.getScope())
|
||||
}
|
||||
|
||||
private predicate call_to_init_or_del(CallNode call) {
|
||||
exists(string mname |
|
||||
mname = "__init__" or mname = "__del__" |
|
||||
exists(string mname | mname = "__init__" or mname = "__del__" |
|
||||
mname = call.getFunction().(AttrNode).getName()
|
||||
)
|
||||
}
|
||||
|
||||
/** Total cost estimate */
|
||||
private int total_call_cost(CallNode call) {
|
||||
/* We want to always follow __init__ and __del__ calls as they tell us about object construction,
|
||||
/*
|
||||
* We want to always follow __init__ and __del__ calls as they tell us about object construction,
|
||||
* but we need to be aware of cycles, so they must have a non-zero cost.
|
||||
*/
|
||||
if call_to_init_or_del(call) then
|
||||
result = 1
|
||||
else
|
||||
result = call_cost(call) + splay_cost(call)
|
||||
|
||||
if call_to_init_or_del(call) then result = 1 else result = call_cost(call) + splay_cost(call)
|
||||
}
|
||||
|
||||
pragma [noinline]
|
||||
pragma[noinline]
|
||||
private int total_cost(CallNode call, PointsToContext ctx) {
|
||||
ctx.appliesTo(call) and
|
||||
result = total_call_cost(call) + context_cost(ctx)
|
||||
}
|
||||
|
||||
private cached newtype TPointsToContext =
|
||||
TMainContext()
|
||||
or
|
||||
TRuntimeContext()
|
||||
or
|
||||
TImportContext()
|
||||
or
|
||||
cached
|
||||
private newtype TPointsToContext =
|
||||
TMainContext() or
|
||||
TRuntimeContext() or
|
||||
TImportContext() or
|
||||
TCallContext(ControlFlowNode call, PointsToContext outerContext, int cost) {
|
||||
total_cost(call, outerContext) = cost and
|
||||
cost <= max_context_cost()
|
||||
}
|
||||
or
|
||||
} or
|
||||
TObjectContext(SelfInstanceInternal object)
|
||||
|
||||
module Context {
|
||||
|
||||
PointsToContext forObject(ObjectInternal object) {
|
||||
result = TObjectContext(object)
|
||||
}
|
||||
|
||||
PointsToContext forObject(ObjectInternal object) { result = TObjectContext(object) }
|
||||
}
|
||||
|
||||
|
||||
/** Points-to context. Context can be one of:
|
||||
/**
|
||||
* Points-to context. Context can be one of:
|
||||
* * "main": Used for scripts.
|
||||
* * "import": Use for non-script modules.
|
||||
* * "default": Use for functions and methods without caller context.
|
||||
* * All other contexts are call contexts and consist of a pair of call-site and caller context.
|
||||
*/
|
||||
class PointsToContext extends TPointsToContext {
|
||||
|
||||
cached string toString() {
|
||||
cached
|
||||
string toString() {
|
||||
this = TMainContext() and result = "main"
|
||||
or
|
||||
this = TRuntimeContext() and result = "runtime"
|
||||
@@ -166,10 +155,8 @@ class PointsToContext extends TPointsToContext {
|
||||
this = TCallContext(call, caller, _)
|
||||
}
|
||||
|
||||
/** Gets the caller context for this callee context. */
|
||||
PointsToContext getOuter() {
|
||||
this = TCallContext(_, result, _)
|
||||
}
|
||||
/** Gets the caller context for this callee context. */
|
||||
PointsToContext getOuter() { this = TCallContext(_, result, _) }
|
||||
|
||||
/** Holds if this context is relevant to the given scope. */
|
||||
predicate appliesToScope(Scope s) {
|
||||
@@ -182,9 +169,12 @@ class PointsToContext extends TPointsToContext {
|
||||
this = TRuntimeContext() and executes_in_runtime_context(s)
|
||||
or
|
||||
/* Called functions, regardless of their name */
|
||||
exists(PythonFunctionObjectInternal callable, ControlFlowNode call, TPointsToContext outerContext |
|
||||
exists(
|
||||
PythonFunctionObjectInternal callable, ControlFlowNode call, TPointsToContext outerContext
|
||||
|
|
||||
call = callable.getACall(outerContext) and
|
||||
this = TCallContext(call, outerContext, _) |
|
||||
this = TCallContext(call, outerContext, _)
|
||||
|
|
||||
s = callable.getScope()
|
||||
)
|
||||
or
|
||||
@@ -192,30 +182,20 @@ class PointsToContext extends TPointsToContext {
|
||||
}
|
||||
|
||||
/** Holds if this context can apply to the CFG node `n`. */
|
||||
pragma [inline]
|
||||
predicate appliesTo(ControlFlowNode n) {
|
||||
this.appliesToScope(n.getScope())
|
||||
}
|
||||
pragma[inline]
|
||||
predicate appliesTo(ControlFlowNode n) { this.appliesToScope(n.getScope()) }
|
||||
|
||||
/** Holds if this context is a call context. */
|
||||
predicate isCall() {
|
||||
this = TCallContext(_, _, _)
|
||||
}
|
||||
predicate isCall() { this = TCallContext(_, _, _) }
|
||||
|
||||
/** Holds if this is the "main" context. */
|
||||
predicate isMain() {
|
||||
this = TMainContext()
|
||||
}
|
||||
predicate isMain() { this = TMainContext() }
|
||||
|
||||
/** Holds if this is the "import" context. */
|
||||
predicate isImport() {
|
||||
this = TImportContext()
|
||||
}
|
||||
predicate isImport() { this = TImportContext() }
|
||||
|
||||
/** Holds if this is the "default" context. */
|
||||
predicate isRuntime() {
|
||||
this = TRuntimeContext()
|
||||
}
|
||||
predicate isRuntime() { this = TRuntimeContext() }
|
||||
|
||||
/** Holds if this context or one of its caller contexts is the default context. */
|
||||
predicate fromRuntime() {
|
||||
@@ -231,19 +211,13 @@ class PointsToContext extends TPointsToContext {
|
||||
result = this.getOuter().getDepth() + 1
|
||||
}
|
||||
|
||||
int getCost() {
|
||||
result = context_cost(this)
|
||||
}
|
||||
int getCost() { result = context_cost(this) }
|
||||
|
||||
CallNode getCall() {
|
||||
this = TCallContext(result, _, _)
|
||||
}
|
||||
CallNode getCall() { this = TCallContext(result, _, _) }
|
||||
|
||||
/** Holds if a call would be too expensive to create a new context for */
|
||||
pragma [nomagic]
|
||||
predicate untrackableCall(CallNode call) {
|
||||
total_cost(call, this) > max_context_cost()
|
||||
}
|
||||
pragma[nomagic]
|
||||
predicate untrackableCall(CallNode call) { total_cost(call, this) > max_context_cost() }
|
||||
|
||||
CallNode getRootCall() {
|
||||
this = TCallContext(result, TImportContext(), _)
|
||||
@@ -252,35 +226,30 @@ class PointsToContext extends TPointsToContext {
|
||||
}
|
||||
|
||||
/** Gets a version of Python that this context includes */
|
||||
pragma [inline]
|
||||
pragma[inline]
|
||||
Version getAVersion() {
|
||||
/* Currently contexts do not include any version information, but may do in the future */
|
||||
result = major_version()
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private predicate in_source(Scope s) {
|
||||
exists(s.getEnclosingModule().getFile().getRelativePath())
|
||||
}
|
||||
private predicate in_source(Scope s) { exists(s.getEnclosingModule().getFile().getRelativePath()) }
|
||||
|
||||
/** Holds if this scope can be executed in the default context.
|
||||
/**
|
||||
* Holds if this scope can be executed in the default context.
|
||||
* All modules and classes executed at import time and
|
||||
* all "public" functions and methods, including those invoked by the VM.
|
||||
*/
|
||||
predicate executes_in_runtime_context(Function f) {
|
||||
/* "Public" scope, i.e. functions whose name starts not with an underscore, or special methods */
|
||||
(f.getName().charAt(0) != "_" or f.isSpecialMethod() or f.isInitMethod())
|
||||
and
|
||||
(f.getName().charAt(0) != "_" or f.isSpecialMethod() or f.isInitMethod()) and
|
||||
in_source(f)
|
||||
}
|
||||
|
||||
private predicate maybe_main(Module m) {
|
||||
exists(If i, Compare cmp, Name name, StrConst main |
|
||||
m.getAStmt() = i and i.getTest() = cmp |
|
||||
exists(If i, Compare cmp, Name name, StrConst main | m.getAStmt() = i and i.getTest() = cmp |
|
||||
cmp.compares(name, any(Eq eq), main) and
|
||||
name.getId() = "__name__" and
|
||||
main.getText() = "__main__"
|
||||
)
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user