mirror of
https://github.com/github/codeql.git
synced 2026-02-15 06:23:42 +01:00
For now, these have just been made into `private` imports. After doing
this, I went through all of the (now not compiling) files and added in
private imports to the modules that they actually depended on.
I also added an explicit import of `LegacyPointsTo` (even though it may
be unnecessary) in cases where the points-to dependency was somewhat
surprising (and one we want to get rid of). This was primarily inside
the various SSA layers.
For modules inside `semmle.python.{types, objects, pointsto}` I did not
bother, as these are fairly clearly related to points-to.
264 lines
8.4 KiB
Plaintext
264 lines
8.4 KiB
Plaintext
import python
|
|
private import semmle.python.pointsto.PointsTo
|
|
private import semmle.python.objects.ObjectInternal
|
|
private import semmle.python.types.ImportTime
|
|
private import semmle.python.types.Version
|
|
|
|
/*
|
|
* A note on 'cost'. Cost doesn't represent the cost to compute,
|
|
* but (a vague estimate of) the cost to compute per value gained.
|
|
* This is constantly evolving, so see the various cost functions below for more details.
|
|
*/
|
|
|
|
private int given_cost() {
|
|
exists(string depth |
|
|
py_flags_versioned("context.cost", depth, _) and
|
|
result = depth.toInt()
|
|
)
|
|
}
|
|
|
|
pragma[noinline]
|
|
private int max_context_cost() {
|
|
not py_flags_versioned("context.cost", _, _) and result = 7
|
|
or
|
|
result = max(int cost | cost = given_cost() | cost)
|
|
}
|
|
|
|
private int syntactic_call_count(Scope s) {
|
|
exists(Function f, string name | f = s and name = f.getName() and name != "__init__" |
|
|
result = count(function_call(name)) + count(method_call(name))
|
|
)
|
|
or
|
|
s.getName() = "__init__" and result = 1
|
|
or
|
|
not s instanceof Function and result = 0
|
|
}
|
|
|
|
pragma[nomagic]
|
|
private CallNode function_call(string name) { result.getFunction().(NameNode).getId() = name }
|
|
|
|
pragma[nomagic]
|
|
private CallNode method_call(string name) { result.getFunction().(AttrNode).getName() = name }
|
|
|
|
private int incoming_call_cost(Scope s) {
|
|
/*
|
|
* Syntactic call count will often be a considerable overestimate
|
|
* of the actual number of calls, so we use the square root.
|
|
* Cost = log(sqrt(call-count))
|
|
*/
|
|
|
|
result = ((syntactic_call_count(s) + 1).log(2) * 0.5).floor()
|
|
}
|
|
|
|
private int context_cost(TPointsToContext ctx) {
|
|
ctx = TMainContext() and result = 0
|
|
or
|
|
ctx = TRuntimeContext() and result = 0
|
|
or
|
|
ctx = TImportContext() and result = 0
|
|
or
|
|
ctx = TCallContext(_, _, result)
|
|
}
|
|
|
|
private int call_cost(CallNode call) {
|
|
if call.getScope().inSource() then result = 2 else result = 3
|
|
}
|
|
|
|
private int outgoing_calls(Scope s) { result = strictcount(CallNode call | call.getScope() = s) }
|
|
|
|
predicate super_method_call(CallNode call) {
|
|
call.getFunction().(AttrNode).getObject().(CallNode).getFunction().(NameNode).getId() = "super"
|
|
}
|
|
|
|
private int outgoing_call_cost(CallNode c) {
|
|
/* Cost = log(outgoing-call-count) */
|
|
result = outgoing_calls(c.getScope()).log(2).floor()
|
|
}
|
|
|
|
/**
|
|
* Cost of contexts for a call, the more callers the
|
|
* callee of call has the more expensive it is to add contexts for it.
|
|
* This seems to be an effective heuristics for preventing an explosion
|
|
* in the number of contexts while retaining good results.
|
|
*/
|
|
private int splay_cost(CallNode c) {
|
|
if super_method_call(c)
|
|
then result = 0
|
|
else result = outgoing_call_cost(c) + incoming_call_cost(c.getScope())
|
|
}
|
|
|
|
private predicate call_to_init_or_del(CallNode call) {
|
|
exists(string mname | mname = "__init__" or mname = "__del__" |
|
|
mname = call.getFunction().(AttrNode).getName()
|
|
)
|
|
}
|
|
|
|
/** Total cost estimate */
|
|
private int total_call_cost(CallNode call) {
|
|
/*
|
|
* We want to always follow __init__ and __del__ calls as they tell us about object construction,
|
|
* but we need to be aware of cycles, so they must have a non-zero cost.
|
|
*/
|
|
|
|
if call_to_init_or_del(call) then result = 1 else result = call_cost(call) + splay_cost(call)
|
|
}
|
|
|
|
pragma[nomagic]
|
|
private int relevant_call_cost(PointsToContext ctx, CallNode call) {
|
|
ctx.appliesTo(call) and result = total_call_cost(call)
|
|
}
|
|
|
|
pragma[noinline]
|
|
private int total_cost(CallNode call, PointsToContext ctx) {
|
|
result = relevant_call_cost(ctx, call) + context_cost(ctx)
|
|
}
|
|
|
|
cached
|
|
private newtype TPointsToContext =
|
|
TMainContext() or
|
|
TRuntimeContext() or
|
|
TImportContext() or
|
|
TCallContext(ControlFlowNode call, PointsToContext outerContext, int cost) {
|
|
total_cost(call, outerContext) = cost and
|
|
cost <= max_context_cost()
|
|
} or
|
|
TObjectContext(SelfInstanceInternal object)
|
|
|
|
/**
|
|
* A points-to context. Context can be one of:
|
|
* * "main": Used for scripts.
|
|
* * "import": Use for non-script modules.
|
|
* * "default": Use for functions and methods without caller context.
|
|
* * All other contexts are call contexts and consist of a pair of call-site and caller context.
|
|
*/
|
|
class PointsToContext extends TPointsToContext {
|
|
/** Gets a textual representation of this element. */
|
|
cached
|
|
string toString() {
|
|
this = TMainContext() and result = "main"
|
|
or
|
|
this = TRuntimeContext() and result = "runtime"
|
|
or
|
|
this = TImportContext() and result = "import"
|
|
or
|
|
exists(CallNode callsite, PointsToContext outerContext |
|
|
this = TCallContext(callsite, outerContext, _) and
|
|
result = callsite.getLocation() + " from " + outerContext.toString()
|
|
)
|
|
}
|
|
|
|
/** Holds if `call` is the call-site from which this context was entered and `outer` is the caller's context. */
|
|
predicate fromCall(CallNode call, PointsToContext caller) {
|
|
caller.appliesTo(call) and
|
|
this = TCallContext(call, caller, _)
|
|
}
|
|
|
|
/** Holds if `call` is the call-site from which this context was entered and `caller` is the caller's context. */
|
|
predicate fromCall(CallNode call, PythonFunctionObjectInternal callee, PointsToContext caller) {
|
|
call = callee.getACall(caller) and
|
|
this = TCallContext(call, caller, _)
|
|
}
|
|
|
|
/** Gets the caller context for this callee context. */
|
|
PointsToContext getOuter() { this = TCallContext(_, result, _) }
|
|
|
|
/** Holds if this context is relevant to the given scope. */
|
|
predicate appliesToScope(Scope s) {
|
|
/* Scripts */
|
|
this = TMainContext() and maybe_main(s)
|
|
or
|
|
/* Modules and classes evaluated at import */
|
|
s instanceof ImportTimeScope and this = TImportContext()
|
|
or
|
|
this = TRuntimeContext() and executes_in_runtime_context(s)
|
|
or
|
|
/* Called functions, regardless of their name */
|
|
exists(
|
|
PythonFunctionObjectInternal callable, ControlFlowNode call, TPointsToContext outerContext
|
|
|
|
|
call = callable.getACall(outerContext) and
|
|
this = TCallContext(call, outerContext, _)
|
|
|
|
|
s = callable.getScope()
|
|
)
|
|
or
|
|
InterProceduralPointsTo::callsite_calls_function(_, _, s, this, _)
|
|
}
|
|
|
|
/** Holds if this context can apply to the CFG node `n`. */
|
|
pragma[inline]
|
|
predicate appliesTo(ControlFlowNode n) {
|
|
exists(Scope s |
|
|
this.appliesToScope(pragma[only_bind_into](s)) and pragma[only_bind_into](s) = n.getScope()
|
|
)
|
|
}
|
|
|
|
/** Holds if this context is a call context. */
|
|
predicate isCall() { this = TCallContext(_, _, _) }
|
|
|
|
/** Holds if this is the "main" context. */
|
|
predicate isMain() { this = TMainContext() }
|
|
|
|
/** Holds if this is the "import" context. */
|
|
predicate isImport() { this = TImportContext() }
|
|
|
|
/** Holds if this is the "default" context. */
|
|
predicate isRuntime() { this = TRuntimeContext() }
|
|
|
|
/** Holds if this context or one of its caller contexts is the default context. */
|
|
predicate fromRuntime() {
|
|
this.isRuntime()
|
|
or
|
|
this.getOuter().fromRuntime()
|
|
}
|
|
|
|
/** Gets the depth (number of calls) for this context. */
|
|
int getDepth() {
|
|
not exists(this.getOuter()) and result = 0
|
|
or
|
|
result = this.getOuter().getDepth() + 1
|
|
}
|
|
|
|
int getCost() { result = context_cost(this) }
|
|
|
|
CallNode getCall() { this = TCallContext(result, _, _) }
|
|
|
|
/** Holds if a call would be too expensive to create a new context for */
|
|
pragma[nomagic]
|
|
predicate untrackableCall(CallNode call) { total_cost(call, this) > max_context_cost() }
|
|
|
|
CallNode getRootCall() {
|
|
this = TCallContext(result, TImportContext(), _)
|
|
or
|
|
result = this.getOuter().getRootCall()
|
|
}
|
|
|
|
/** Gets a version of Python that this context includes */
|
|
pragma[inline]
|
|
Version getAVersion() {
|
|
/* Currently contexts do not include any version information, but may do in the future */
|
|
result = major_version()
|
|
}
|
|
}
|
|
|
|
private predicate in_source(Scope s) { exists(s.getEnclosingModule().getFile().getRelativePath()) }
|
|
|
|
/**
|
|
* Holds if this scope can be executed in the default context.
|
|
* All modules and classes executed at import time and
|
|
* all "public" functions and methods, including those invoked by the VM.
|
|
*/
|
|
predicate executes_in_runtime_context(Function f) {
|
|
/* "Public" scope, i.e. functions whose name starts not with an underscore, or special methods */
|
|
(f.getName().charAt(0) != "_" or f.isSpecialMethod() or f.isInitMethod()) and
|
|
in_source(f)
|
|
}
|
|
|
|
private predicate maybe_main(Module m) {
|
|
exists(If i, Compare cmp, Name name, StringLiteral main | m.getAStmt() = i and i.getTest() = cmp |
|
|
cmp.compares(name, any(Eq eq), main) and
|
|
name.getId() = "__name__" and
|
|
main.getText() = "__main__"
|
|
)
|
|
}
|