mirror of
https://github.com/github/codeql.git
synced 2026-02-23 18:33:42 +01:00
Uses the same trick as for `ExtractedArgumentNode`, wherein we postpone the global restriction on the charpred to instead be in the `argumentOf` predicate (which is global anyway). In addition to this, we also converted `CapturedVariablesArgumentNode` into a proper synthetic node, and added an explicit post-update node for it. These nodes just act as wrappers for the function part of call nodes. Thus, to make them work with the variable capture machinery, we simply map them to the closure node for the corresponding control-flow or post-update node.
1924 lines
67 KiB
Plaintext
1924 lines
67 KiB
Plaintext
/**
|
|
* INTERNAL: Do not use.
|
|
*
|
|
* TypeTracker based call-graph.
|
|
*
|
|
* The overall scheme for resolving calls, is to notice that Python has different kinds
|
|
* of callables, and resolve those with different strategies. Currently we handle these
|
|
* completely separately:
|
|
* 1. plain functions (and lambdas)
|
|
* 2. methods on classes
|
|
* 3. class instantiation
|
|
*
|
|
* So we have type-trackers for each of the 3 categories above, with some considerable
|
|
* effort to handle different kinds of methods on classes (staticmethod, classmethod,
|
|
* normal), and resolving methods correctly in regards to MRO.
|
|
*
|
|
*
|
|
* A goal of this library is to support modeling calls that happens by third-party
|
|
* libraries. For example `call_later(func, arg0, arg1, foo=val)`, and the fact that the
|
|
* library might inject it's own arguments, for example a context that will always be
|
|
* passed as the actual first argument to the function. Currently the aim is to provide
|
|
* enough predicates for such `call_later` function to be modeled by providing
|
|
* additional data-flow steps for the arguments/parameters. This means we cannot have
|
|
* any special logic that requires an AST call to be made before we care to figure out
|
|
* what callable this call might end up targeting.
|
|
*
|
|
* Specifically this means that we cannot use type-backtrackers from the function of a
|
|
* `CallNode`, since there is no `CallNode` to backtrack from for `func` in the example
|
|
* above.
|
|
*
|
|
* Note: This hasn't been 100% realized yet, so we don't currently expose a predicate to
|
|
* ask what targets any data-flow node has. But it's still the plan to do this!
|
|
*/
|
|
|
|
private import python
|
|
private import DataFlowPublic
|
|
private import DataFlowPrivate
|
|
private import FlowSummaryImpl as FlowSummaryImpl
|
|
private import semmle.python.internal.CachedStages
|
|
private import semmle.python.dataflow.new.internal.TypeTrackingImpl::CallGraphConstruction as CallGraphConstruction
|
|
|
|
newtype TParameterPosition =
|
|
/** Used for `self` in methods, and `cls` in classmethods. */
|
|
TSelfParameterPosition() or
|
|
/**
|
|
* This is used for tracking flow through captured variables, and
|
|
* we use separate parameter/argument positions in order to distinguish
|
|
* "lambda self" from "normal self", as lambdas may also access outer `self`
|
|
* variables (through variable capture).
|
|
*/
|
|
TLambdaSelfParameterPosition() or
|
|
TPositionalParameterPosition(int index) {
|
|
index = any(Parameter p).getPosition()
|
|
or
|
|
// since synthetic parameters are made for a synthetic summary callable, based on
|
|
// what Argument positions they have flow for, we need to make sure we have such
|
|
// parameter positions available.
|
|
FlowSummaryImpl::ParsePositions::isParsedPositionalArgumentPosition(_, index)
|
|
} or
|
|
TPositionalParameterLowerBoundPosition(int pos) {
|
|
FlowSummaryImpl::ParsePositions::isParsedArgumentLowerBoundPosition(_, pos)
|
|
} or
|
|
TKeywordParameterPosition(string name) {
|
|
name = any(Parameter p).getName()
|
|
or
|
|
// see comment for TPositionalParameterPosition
|
|
FlowSummaryImpl::ParsePositions::isParsedKeywordArgumentPosition(_, name)
|
|
} or
|
|
TStarArgsParameterPosition(int index) {
|
|
// since `.getPosition` does not work for `*args`, we need *args parameter positions
|
|
// at index 1 larger than the largest positional parameter position (and 0 must be
|
|
// included as well). This is a bit of an over-approximation.
|
|
index = 0 or
|
|
index = any(Parameter p).getPosition() + 1
|
|
} or
|
|
TSynthStarArgsElementParameterPosition(int index) { exists(TStarArgsParameterPosition(index)) } or
|
|
TDictSplatParameterPosition() or
|
|
// To get flow from a **kwargs argument to a keyword parameter, we add a read-step
|
|
// from a synthetic **kwargs parameter. We need this separate synthetic ParameterNode,
|
|
// since we clear content of the normal **kwargs parameter for the names that
|
|
// correspond to normal keyword parameters. Since we cannot re-use the same parameter
|
|
// position for multiple parameter nodes in the same callable, we introduce this
|
|
// synthetic parameter position.
|
|
TSynthDictSplatParameterPosition()
|
|
|
|
/** A parameter position. */
|
|
class ParameterPosition extends TParameterPosition {
|
|
/** Holds if this position represents a `self`/`cls` parameter. */
|
|
predicate isSelf() { this = TSelfParameterPosition() }
|
|
|
|
/** Holds if this position represents a reference to a lambda itself. Only used for tracking flow through captured variables. */
|
|
predicate isLambdaSelf() { this = TLambdaSelfParameterPosition() }
|
|
|
|
/** Holds if this position represents a positional parameter at (0-based) `index`. */
|
|
predicate isPositional(int index) { this = TPositionalParameterPosition(index) }
|
|
|
|
/** Holds if this position represents any positional parameter starting from position `pos`. */
|
|
predicate isPositionalLowerBound(int pos) { this = TPositionalParameterLowerBoundPosition(pos) }
|
|
|
|
/** Holds if this position represents a keyword parameter named `name`. */
|
|
predicate isKeyword(string name) { this = TKeywordParameterPosition(name) }
|
|
|
|
/** Holds if this position represents a `*args` parameter at (0-based) `index`. */
|
|
predicate isStarArgs(int index) { this = TStarArgsParameterPosition(index) }
|
|
|
|
/**
|
|
* Holds if this position represents a synthetic parameter at or after (0-based)
|
|
* position `index`, from which there will be made a store step to the real
|
|
* `*args` parameter.
|
|
*/
|
|
predicate isSynthStarArgsElement(int index) {
|
|
this = TSynthStarArgsElementParameterPosition(index)
|
|
}
|
|
|
|
/** Holds if this position represents a `**kwargs` parameter. */
|
|
predicate isDictSplat() { this = TDictSplatParameterPosition() }
|
|
|
|
/**
|
|
* Holds if this position represents a **synthetic** `**kwargs` parameter
|
|
* (see comment for `TSynthDictSplatParameterPosition`).
|
|
*/
|
|
predicate isSynthDictSplat() { this = TSynthDictSplatParameterPosition() }
|
|
|
|
/** Gets a textual representation of this element. */
|
|
string toString() {
|
|
this.isSelf() and result = "self"
|
|
or
|
|
this.isLambdaSelf() and result = "lambda self"
|
|
or
|
|
exists(int index | this.isPositional(index) and result = "position " + index)
|
|
or
|
|
exists(int pos | this.isPositionalLowerBound(pos) and result = "position " + pos + "..")
|
|
or
|
|
exists(string name | this.isKeyword(name) and result = "keyword " + name)
|
|
or
|
|
exists(int index | this.isStarArgs(index) and result = "*args at " + index)
|
|
or
|
|
exists(int index |
|
|
this.isSynthStarArgsElement(index) and
|
|
result = "synthetic *args element at (or after) " + index
|
|
)
|
|
or
|
|
this.isDictSplat() and result = "**"
|
|
or
|
|
this.isSynthDictSplat() and result = "synthetic **"
|
|
}
|
|
}
|
|
|
|
newtype TArgumentPosition =
|
|
/** Used for `self` in methods, and `cls` in classmethods. */
|
|
TSelfArgumentPosition() or
|
|
/**
|
|
* This is used for tracking flow through captured variables, and
|
|
* we use separate parameter/argument positions in order to distinguish
|
|
* "lambda self" from "normal self", as lambdas may also access outer `self`
|
|
* variables (through variable capture).
|
|
*/
|
|
TLambdaSelfArgumentPosition() or
|
|
TPositionalArgumentPosition(int index) {
|
|
exists(any(CallNode c).getArg(index))
|
|
or
|
|
// since synthetic calls within a summarized callable could use a unique argument
|
|
// position, we need to ensure we make these available (these are specified as
|
|
// parameters in the flow-summary spec)
|
|
FlowSummaryImpl::ParsePositions::isParsedPositionalParameterPosition(_, index)
|
|
or
|
|
// the generated function inside a comprehension has a positional argument at index 0
|
|
exists(Comp c) and
|
|
index = 0
|
|
} or
|
|
TKeywordArgumentPosition(string name) {
|
|
exists(any(CallNode c).getArgByName(name))
|
|
or
|
|
// see comment for TPositionalArgumentPosition
|
|
FlowSummaryImpl::ParsePositions::isParsedKeywordParameterPosition(_, name)
|
|
} or
|
|
TStarArgsArgumentPosition(int index) {
|
|
exists(Call c | c.getPositionalArg(index) instanceof Starred)
|
|
} or
|
|
TDictSplatArgumentPosition()
|
|
|
|
/** An argument position. */
|
|
class ArgumentPosition extends TArgumentPosition {
|
|
/** Holds if this position represents a `self`/`cls` argument. */
|
|
predicate isSelf() { this = TSelfArgumentPosition() }
|
|
|
|
/** Holds if this position represents a lambda `self` argument. Only used for tracking flow through captured variables. */
|
|
predicate isLambdaSelf() { this = TLambdaSelfArgumentPosition() }
|
|
|
|
/** Holds if this position represents a positional argument at (0-based) `index`. */
|
|
predicate isPositional(int index) { this = TPositionalArgumentPosition(index) }
|
|
|
|
/** Holds if this position represents a keyword argument named `name`. */
|
|
predicate isKeyword(string name) { this = TKeywordArgumentPosition(name) }
|
|
|
|
/** Holds if this position represents a `*args` argument at (0-based) `index`. */
|
|
predicate isStarArgs(int index) { this = TStarArgsArgumentPosition(index) }
|
|
|
|
/** Holds if this position represents a `**kwargs` argument. */
|
|
predicate isDictSplat() { this = TDictSplatArgumentPosition() }
|
|
|
|
/** Gets a textual representation of this element. */
|
|
string toString() {
|
|
this.isSelf() and result = "self"
|
|
or
|
|
this.isLambdaSelf() and result = "lambda self"
|
|
or
|
|
exists(int pos | this.isPositional(pos) and result = "position " + pos)
|
|
or
|
|
exists(string name | this.isKeyword(name) and result = "keyword " + name)
|
|
or
|
|
exists(int index | this.isStarArgs(index) and result = "*args at " + index)
|
|
or
|
|
this.isDictSplat() and result = "**"
|
|
}
|
|
}
|
|
|
|
/** Holds if arguments at position `apos` match parameters at position `ppos`. */
|
|
predicate parameterMatch(ParameterPosition ppos, ArgumentPosition apos) {
|
|
ppos.isSelf() and apos.isSelf()
|
|
or
|
|
ppos.isLambdaSelf() and apos.isLambdaSelf()
|
|
or
|
|
exists(int index | ppos.isPositional(index) and apos.isPositional(index))
|
|
or
|
|
exists(int index1, int index2 |
|
|
ppos.isPositionalLowerBound(index1) and apos.isPositional(index2) and index2 >= index1
|
|
)
|
|
or
|
|
exists(string name | ppos.isKeyword(name) and apos.isKeyword(name))
|
|
or
|
|
exists(int index | ppos.isStarArgs(index) and apos.isStarArgs(index))
|
|
or
|
|
exists(int paramIndex, int argIndex | argIndex >= paramIndex |
|
|
ppos.isSynthStarArgsElement(paramIndex) and apos.isPositional(argIndex)
|
|
)
|
|
or
|
|
ppos.isDictSplat() and apos.isDictSplat()
|
|
or
|
|
ppos.isSynthDictSplat() and apos.isDictSplat()
|
|
}
|
|
|
|
// =============================================================================
|
|
// Helper predicates
|
|
// =============================================================================
|
|
/**
|
|
* Holds if the function `func` is a staticmethod -- either by having a
|
|
* `@staticmethod` decorator or by convention
|
|
* (like a `__new__` method on a class is a classmethod even without the decorator).
|
|
*/
|
|
predicate isStaticmethod(Function func) {
|
|
exists(NameNode id | id.getId() = "staticmethod" and id.isGlobal() |
|
|
func.getADecorator() = id.getNode()
|
|
)
|
|
}
|
|
|
|
/**
|
|
* Holds if the function `func` is a classmethod -- either by having a
|
|
* `@classmethod` decorator or by convention
|
|
* (like a `__new__` method on a class is a classmethod even without the decorator).
|
|
*/
|
|
predicate isClassmethod(Function func) {
|
|
exists(NameNode id | id.getId() = "classmethod" and id.isGlobal() |
|
|
func.getADecorator() = id.getNode()
|
|
)
|
|
or
|
|
exists(Class cls |
|
|
cls.getAMethod() = func and
|
|
func.getName() in [
|
|
"__new__", // https://docs.python.org/3.10/reference/datamodel.html#object.__new__
|
|
"__init_subclass__", // https://docs.python.org/3.10/reference/datamodel.html#object.__init_subclass__
|
|
"__class_getitem__", // https://docs.python.org/3.10/reference/datamodel.html#object.__class_getitem__
|
|
]
|
|
)
|
|
}
|
|
|
|
/** Holds if the function `func` has a `property` decorator. */
|
|
predicate hasPropertyDecorator(Function func) {
|
|
exists(NameNode id | id.getId() = "property" and id.isGlobal() |
|
|
func.getADecorator() = id.getNode()
|
|
)
|
|
}
|
|
|
|
/**
|
|
* Holds if the function `func` has a `contextlib.contextmanager`.
|
|
*/
|
|
predicate hasContextmanagerDecorator(Function func) {
|
|
exists(ControlFlowNode contextmanager |
|
|
contextmanager.(NameNode).getId() = "contextmanager" and contextmanager.(NameNode).isGlobal()
|
|
or
|
|
contextmanager.(AttrNode).getObject("contextmanager").(NameNode).getId() = "contextlib"
|
|
|
|
|
func.getADecorator() = contextmanager.getNode()
|
|
)
|
|
}
|
|
|
|
// =============================================================================
|
|
// Callables
|
|
// =============================================================================
|
|
/** A callable defined in library code, identified by a unique string. */
|
|
abstract class LibraryCallable extends string {
|
|
bindingset[this]
|
|
LibraryCallable() { any() }
|
|
|
|
/** Gets a call to this library callable. */
|
|
abstract CallCfgNode getACall();
|
|
|
|
/** Same as `getACall` but without referring to the call graph or API graph. */
|
|
CallCfgNode getACallSimple() { none() }
|
|
|
|
/** Gets a data-flow node, where this library callable is used as a call-back. */
|
|
abstract ArgumentNode getACallback();
|
|
}
|
|
|
|
newtype TDataFlowCallable =
|
|
/**
|
|
* Is used as the target for all calls: plain functions, lambdas, methods on classes,
|
|
* class instantiations, and (in the future) special methods.
|
|
*/
|
|
TFunction(Function func) {
|
|
// Functions with an explicit definition
|
|
exists(func.getDefinition())
|
|
or
|
|
// For generators/list-comprehensions we create a synthetic function.
|
|
exists(Comp c | c.getFunction() = func)
|
|
} or
|
|
/** see QLDoc for `DataFlowModuleScope` for why we need this. */
|
|
TModule(Module m) or
|
|
TLibraryCallable(LibraryCallable callable)
|
|
|
|
/** A callable. */
|
|
abstract class DataFlowCallable extends TDataFlowCallable {
|
|
/** Gets a textual representation of this element. */
|
|
abstract string toString();
|
|
|
|
/** Gets qualified name for this callable, if any. */
|
|
abstract string getQualifiedName();
|
|
|
|
/** Gets the scope of this callable */
|
|
abstract Scope getScope();
|
|
|
|
/** Gets the parameter at position `ppos`, if any. */
|
|
abstract ParameterNode getParameter(ParameterPosition ppos);
|
|
|
|
/** Gets the underlying library callable, if any. */
|
|
LibraryCallable asLibraryCallable() { this = TLibraryCallable(result) }
|
|
|
|
/** Gets the location of this dataflow callable. */
|
|
abstract Location getLocation();
|
|
}
|
|
|
|
/** A callable function. */
|
|
abstract class DataFlowFunction extends DataFlowCallable, TFunction {
|
|
Function func;
|
|
|
|
DataFlowFunction() {
|
|
this = TFunction(func) and
|
|
// TODO: Handle @property decorators
|
|
not hasPropertyDecorator(func)
|
|
}
|
|
|
|
override string toString() { result = func.toString() }
|
|
|
|
override string getQualifiedName() { result = func.getQualifiedName() }
|
|
|
|
override Function getScope() { result = func }
|
|
|
|
override Location getLocation() { result = func.getLocation() }
|
|
|
|
/** Gets the positional parameter offset, to take into account self/cls parameters. */
|
|
int positionalOffset() { result = 0 }
|
|
|
|
override ParameterNode getParameter(ParameterPosition ppos) {
|
|
// Do not handle lower bound positions (such as `[1..]`) here
|
|
// they are handled by parameter matching and would create
|
|
// inconsistencies here as multiple parameters could match such a position.
|
|
exists(int index | ppos.isPositional(index) |
|
|
result.getParameter() = func.getArg(index + this.positionalOffset())
|
|
)
|
|
or
|
|
exists(string name | ppos.isKeyword(name) | result.getParameter() = func.getArgByName(name))
|
|
or
|
|
// `*args`
|
|
exists(int index |
|
|
(
|
|
ppos.isStarArgs(index) and
|
|
result.getParameter() = func.getVararg()
|
|
or
|
|
ppos.isSynthStarArgsElement(index) and
|
|
result = TSynthStarArgsElementParameterNode(this)
|
|
)
|
|
|
|
|
// a `*args` parameter comes after the last positional parameter. We need to take
|
|
// self parameter into account, so for
|
|
// `def func(foo, bar, *args)` it should be index 2 (pos-param-count == 2)
|
|
// `class A: def func(self, foo, bar, *args)` it should be index 2 (pos-param-count - 1 == 3 - 1)
|
|
index = func.getPositionalParameterCount() - this.positionalOffset()
|
|
or
|
|
// no positional argument
|
|
not exists(func.getArg(_)) and index = 0
|
|
)
|
|
or
|
|
// `**kwargs`
|
|
ppos.isDictSplat() and result.getParameter() = func.getKwarg()
|
|
or
|
|
ppos.isSynthDictSplat() and result = TSynthDictSplatParameterNode(this)
|
|
}
|
|
}
|
|
|
|
/** A plain (non-method) function. */
|
|
class DataFlowPlainFunction extends DataFlowFunction {
|
|
DataFlowPlainFunction() { not this instanceof DataFlowMethod }
|
|
}
|
|
|
|
/** A method. */
|
|
class DataFlowMethod extends DataFlowFunction {
|
|
Class cls;
|
|
|
|
DataFlowMethod() { cls.getAMethod() = func }
|
|
|
|
/** Gets the class this function is a method of. */
|
|
Class getClass() { result = cls }
|
|
|
|
override int positionalOffset() { result = 1 }
|
|
|
|
override ParameterNode getParameter(ParameterPosition ppos) {
|
|
ppos.isSelf() and result.getParameter() = func.getArg(0)
|
|
or
|
|
result = super.getParameter(ppos)
|
|
}
|
|
}
|
|
|
|
/** A classmethod. */
|
|
class DataFlowClassmethod extends DataFlowMethod {
|
|
DataFlowClassmethod() { isClassmethod(func) }
|
|
}
|
|
|
|
/** A staticmethod. */
|
|
class DataFlowStaticmethod extends DataFlowMethod, DataFlowFunction {
|
|
DataFlowStaticmethod() { isStaticmethod(func) }
|
|
|
|
override int positionalOffset() { result = 0 }
|
|
|
|
override ParameterNode getParameter(ParameterPosition ppos) {
|
|
result = DataFlowFunction.super.getParameter(ppos)
|
|
}
|
|
}
|
|
|
|
/**
|
|
* A module. This is not actually a callable, but we need this so a
|
|
* `ModuleVariableNode` have an enclosing callable.
|
|
*/
|
|
class DataFlowModuleScope extends DataFlowCallable, TModule {
|
|
Module mod;
|
|
|
|
DataFlowModuleScope() { this = TModule(mod) }
|
|
|
|
override string toString() { result = mod.toString() }
|
|
|
|
override string getQualifiedName() { result = mod.getName() }
|
|
|
|
override Module getScope() { result = mod }
|
|
|
|
override Location getLocation() { result = mod.getLocation() }
|
|
|
|
override ParameterNode getParameter(ParameterPosition ppos) { none() }
|
|
}
|
|
|
|
class LibraryCallableValue extends DataFlowCallable, TLibraryCallable {
|
|
LibraryCallable callable;
|
|
|
|
LibraryCallableValue() { this = TLibraryCallable(callable) }
|
|
|
|
override string toString() { result = "LibraryCallableValue: " + callable.toString() }
|
|
|
|
override string getQualifiedName() { result = callable.toString() }
|
|
|
|
/** Gets a data-flow node, where this library callable is used as a call-back. */
|
|
ArgumentNode getACallback() { result = callable.getACallback() }
|
|
|
|
override Scope getScope() { none() }
|
|
|
|
override ParameterNode getParameter(ParameterPosition ppos) { none() }
|
|
|
|
override LibraryCallable asLibraryCallable() { result = callable }
|
|
|
|
override Location getLocation() { none() }
|
|
}
|
|
|
|
// =============================================================================
|
|
// Type trackers used to resolve calls.
|
|
// =============================================================================
|
|
/** Gets a call to `type`. */
|
|
private CallCfgNode getTypeCall() {
|
|
exists(NameNode id | id.getId() = "type" and id.isGlobal() |
|
|
result.getFunction().asCfgNode() = id
|
|
)
|
|
}
|
|
|
|
/** Gets a call to `super`. */
|
|
private CallCfgNode getSuperCall() {
|
|
// While it is possible to reference super and call it later, it's almost never done in
|
|
// practice. From looking at top 1000 projects, there were a few uses around mocking (see
|
|
// link below), but otherwise only 2 edgecases. Overall it seems ok to ignore this complexity.
|
|
//
|
|
// https://github.com/python/cpython/blob/18b1782192f85bd26db89f5bc850f8bee4247c1a/Lib/unittest/mock.py#L48-L50
|
|
exists(NameNode id | id.getId() = "super" and id.isGlobal() |
|
|
result.getFunction().asCfgNode() = id
|
|
)
|
|
}
|
|
|
|
/**
|
|
* Holds if the file `f` should be ignored when computing the call-graph.
|
|
*
|
|
* We currently see a performance problem when analyzing the `sympy` PyPI package,
|
|
* which can be part of the database when dependencies are installed and extracted.
|
|
* From what we can understand, SymPy is using Python in a exotic way, so the fact that
|
|
* our analysis currently does not handle this project has nothing to say about our
|
|
* ability to handle normal Python code. Furthermore, SymPy does not look to be relevant
|
|
* in a security context, so we should not lose out on any security results by doing
|
|
* this.
|
|
*/
|
|
private predicate ignoreForCallGraph(File f) {
|
|
f.getAbsolutePath().matches("%/site-packages/sympy/%")
|
|
}
|
|
|
|
private module TrackFunctionInput implements CallGraphConstruction::Simple::InputSig {
|
|
class State = Function;
|
|
|
|
predicate start(Node start, Function func) {
|
|
start.asExpr() = func.getDefinition()
|
|
or
|
|
// when a function is decorated, it's the result of the (last) decorator call that
|
|
// is used
|
|
start.asExpr() = func.getDefinition().(FunctionExpr).getADecoratorCall()
|
|
}
|
|
|
|
predicate filter(Node n) { ignoreForCallGraph(n.getLocation().getFile()) }
|
|
}
|
|
|
|
/**
|
|
* Gets a reference to the function `func`.
|
|
*/
|
|
Node functionTracker(Function func) {
|
|
CallGraphConstruction::Simple::Make<TrackFunctionInput>::track(func)
|
|
.(LocalSourceNode)
|
|
.flowsTo(result)
|
|
}
|
|
|
|
private module TrackClassInput implements CallGraphConstruction::Simple::InputSig {
|
|
class State = Class;
|
|
|
|
predicate start(Node start, Class cls) {
|
|
start.asExpr() = cls.getParent()
|
|
or
|
|
// when a class is decorated, it's the result of the (last) decorator call that
|
|
// is used
|
|
start.asExpr() = cls.getParent().getADecoratorCall()
|
|
or
|
|
// `type(obj)`, where obj is an instance of this class
|
|
start = getTypeCall() and
|
|
start.(CallCfgNode).getArg(0) = classInstanceTracker(cls)
|
|
}
|
|
|
|
predicate filter(Node n) {
|
|
ignoreForCallGraph(n.getLocation().getFile())
|
|
or
|
|
n.(ParameterNodeImpl).isParameterOf(_, any(ParameterPosition pp | pp.isSelf()))
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Gets a reference to the class `cls`.
|
|
*/
|
|
Node classTracker(Class cls) {
|
|
CallGraphConstruction::Simple::Make<TrackClassInput>::track(cls).(LocalSourceNode).flowsTo(result)
|
|
}
|
|
|
|
private module TrackClassInstanceInput implements CallGraphConstruction::Simple::InputSig {
|
|
class State = Class;
|
|
|
|
predicate start(Node start, Class cls) {
|
|
exists(Annotation ann |
|
|
ann = classTracker(cls).asExpr() and
|
|
start.asExpr() = ann.getAnnotatedExpression()
|
|
)
|
|
or
|
|
resolveClassCall(start.(CallCfgNode).asCfgNode(), cls)
|
|
or
|
|
// result of `super().__new__` as used in a `__new__` method implementation
|
|
exists(Class classUsedInSuper |
|
|
fromSuperNewCall(start.(CallCfgNode).asCfgNode(), classUsedInSuper, _, _) and
|
|
classUsedInSuper = getADirectSuperclass*(cls)
|
|
)
|
|
}
|
|
|
|
predicate filter(Node n) {
|
|
ignoreForCallGraph(n.getLocation().getFile())
|
|
or
|
|
n.(ParameterNodeImpl).isParameterOf(_, any(ParameterPosition pp | pp.isSelf()))
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Gets a reference to an instance of the class `cls`.
|
|
*/
|
|
Node classInstanceTracker(Class cls) {
|
|
CallGraphConstruction::Simple::Make<TrackClassInstanceInput>::track(cls)
|
|
.(LocalSourceNode)
|
|
.flowsTo(result)
|
|
}
|
|
|
|
private module TrackSelfInput implements CallGraphConstruction::Simple::InputSig {
|
|
class State = Class;
|
|
|
|
predicate start(Node start, Class classWithMethod) {
|
|
exists(Function func |
|
|
func = classWithMethod.getAMethod() and
|
|
not isStaticmethod(func) and
|
|
not isClassmethod(func)
|
|
|
|
|
start.asExpr() = func.getArg(0)
|
|
)
|
|
}
|
|
|
|
predicate filter(Node n) {
|
|
ignoreForCallGraph(n.getLocation().getFile())
|
|
or
|
|
n.(ParameterNodeImpl).isParameterOf(_, any(ParameterPosition pp | pp.isSelf()))
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Gets a reference to the `self` argument of a method on class `classWithMethod`.
|
|
* The method cannot be a `staticmethod` or `classmethod`.
|
|
*/
|
|
Node selfTracker(Class classWithMethod) {
|
|
CallGraphConstruction::Simple::Make<TrackSelfInput>::track(classWithMethod)
|
|
.(LocalSourceNode)
|
|
.flowsTo(result)
|
|
}
|
|
|
|
private module TrackClsArgumentInput implements CallGraphConstruction::Simple::InputSig {
|
|
class State = Class;
|
|
|
|
predicate start(Node start, Class classWithMethod) {
|
|
exists(Function func |
|
|
func = classWithMethod.getAMethod() and
|
|
isClassmethod(func)
|
|
|
|
|
start.asExpr() = func.getArg(0)
|
|
)
|
|
or
|
|
// type(self)
|
|
start = getTypeCall() and
|
|
start.(CallCfgNode).getArg(0) = selfTracker(classWithMethod)
|
|
}
|
|
|
|
predicate filter(Node n) {
|
|
ignoreForCallGraph(n.getLocation().getFile())
|
|
or
|
|
n.(ParameterNodeImpl).isParameterOf(_, any(ParameterPosition pp | pp.isSelf()))
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Gets a reference to the enclosing class `classWithMethod` from within one of its
|
|
* methods, either through the `cls` argument from a `classmethod` or from `type(self)`
|
|
* from a normal method.
|
|
*/
|
|
Node clsArgumentTracker(Class classWithMethod) {
|
|
CallGraphConstruction::Simple::Make<TrackClsArgumentInput>::track(classWithMethod)
|
|
.(LocalSourceNode)
|
|
.flowsTo(result)
|
|
}
|
|
|
|
private module TrackSuperCallNoArgumentInput implements CallGraphConstruction::Simple::InputSig {
|
|
class State = Function;
|
|
|
|
predicate start(Node start, Function func) {
|
|
not isStaticmethod(func) and
|
|
exists(CallCfgNode call | start = call |
|
|
call = getSuperCall() and
|
|
not exists(call.getArg(_)) and
|
|
call.getScope() = func
|
|
)
|
|
}
|
|
|
|
predicate filter(Node n) {
|
|
ignoreForCallGraph(n.getLocation().getFile())
|
|
or
|
|
n.(ParameterNodeImpl).isParameterOf(_, any(ParameterPosition pp | pp.isSelf()))
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Gets a reference to the result of calling `super` without any argument, where the
|
|
* call happened in the method `func` (either a method or a classmethod).
|
|
*/
|
|
Node superCallNoArgumentTracker(Function func) {
|
|
CallGraphConstruction::Simple::Make<TrackSuperCallNoArgumentInput>::track(func)
|
|
.(LocalSourceNode)
|
|
.flowsTo(result)
|
|
}
|
|
|
|
private module TrackSuperCallTwoArgumentInput implements CallGraphConstruction::Simple::InputSig {
|
|
additional predicate superCall(CallCfgNode call, Class cls, Node obj) {
|
|
call = getSuperCall() and
|
|
call.getArg(0) = classTracker(cls) and
|
|
call.getArg(1) = obj
|
|
}
|
|
|
|
class State = CallCfgNode;
|
|
|
|
predicate start(Node start, CallCfgNode call) {
|
|
superCall(call, _, _) and
|
|
start = call
|
|
}
|
|
|
|
predicate filter(Node n) {
|
|
ignoreForCallGraph(n.getLocation().getFile())
|
|
or
|
|
n.(ParameterNodeImpl).isParameterOf(_, any(ParameterPosition pp | pp.isSelf()))
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Gets a reference to the result of calling `super` with 2 arguments, where the
|
|
* first is a reference to the class `cls`, and the second argument is `obj`.
|
|
*/
|
|
Node superCallTwoArgumentTracker(Class cls, Node obj) {
|
|
exists(CallCfgNode call |
|
|
TrackSuperCallTwoArgumentInput::superCall(call, cls, obj) and
|
|
CallGraphConstruction::Simple::Make<TrackSuperCallTwoArgumentInput>::track(call)
|
|
.(LocalSourceNode)
|
|
.flowsTo(result)
|
|
)
|
|
}
|
|
|
|
// =============================================================================
|
|
// MRO
|
|
// =============================================================================
|
|
/**
|
|
* Gets a direct superclass of the argument `cls`, if any.
|
|
*
|
|
* For `A` with the class definition `class A(B, C)` it will have results `B` and `C`.
|
|
*/
|
|
Class getADirectSuperclass(Class cls) { cls.getABase() = classTracker(result).asExpr() }
|
|
|
|
/**
|
|
* Gets a direct subclass of the argument `cls`, if any.
|
|
*
|
|
*For `B` with the class definition `class A(B)` it will have result `A`.
|
|
*/
|
|
Class getADirectSubclass(Class cls) { cls = getADirectSuperclass(result) }
|
|
|
|
/**
|
|
* Gets a class that, from an approximated MRO calculation, might be the next class used
|
|
* for member-lookup when `super().attr` is used inside the class `cls`.
|
|
*
|
|
* In the example below, with `cls=B`, this predicate will have `A` and `C` as results.
|
|
* ```py
|
|
* class A: pass
|
|
* class B(A): pass
|
|
* class C(A): pass
|
|
* class D(B, C): pass
|
|
* ```
|
|
*
|
|
* NOTE: This approximation does not handle all cases correctly, and in the example
|
|
* below, with `cls=A` will not have any results, although it should include `Y`.
|
|
*
|
|
* ```py
|
|
* class A: pass
|
|
* class B(A): pass
|
|
* class X: pass
|
|
* class Y(X): pass
|
|
* class Ex(B, Y): pass
|
|
* ```
|
|
*
|
|
* NOTE for debugging the results of this predicate: Since a class can be part of
|
|
* multiple MROs, results from this predicate might only be valid in some, but not all,
|
|
* inheritance chains: This is the case with the result `C` for `cls=B` in the first
|
|
* example -- if `B` and `C` are defined in the same file, but `D` in a different file,
|
|
* this might make the results from this predicate difficult to comprehend at first.
|
|
*
|
|
* For more info on the C3 MRO used in Python see:
|
|
* - https://docs.python.org/3/glossary.html#term-method-resolution-order
|
|
* - https://www.python.org/download/releases/2.3/mro/
|
|
* - https://opendylan.org/_static/c3-linearization.pdf
|
|
*/
|
|
private Class getNextClassInMro(Class cls) {
|
|
// class A(B, ...):
|
|
// `B` must be the next class after `A` in the MRO for A.
|
|
cls.getBase(0) = classTracker(result).asExpr()
|
|
or
|
|
// class A(B, C, D):
|
|
// - `C` could be the next class after `B` in MRO.
|
|
// - `D` could be the next class after `C` in MRO.
|
|
exists(Class sub, int i |
|
|
sub.getBase(i) = classTracker(cls).asExpr() and
|
|
sub.getBase(i + 1) = classTracker(result).asExpr() and
|
|
not result = cls
|
|
)
|
|
// There are three important properties for MRO computed with C3 in Python:
|
|
//
|
|
// 1) monotonicity: if C1 precedes C2 in the MRO of C, then C1 precedes C2 in the MRO
|
|
// of any subclass of C.
|
|
// 2) local precedence ordering: if C1 precedes C2 in the list of superclasses for C,
|
|
// they will keep the same order in the MRO for C (and due to monotonicity, any
|
|
// subclass).
|
|
// 3) consistency with the extended precedence graph: if A and B (that are part of the
|
|
// class hierarchy of C) do not have a subclass/superclass relationship on their
|
|
// own, the ordering of A and B in the MRO of C will be determined by the local
|
|
// precedence ordering in the classes that use both A and B, either directly or
|
|
// through a subclass. (see paper for more details)
|
|
//
|
|
// Note that not all class hierarchies are allowed with C3, see the Python 2.3 article
|
|
// for examples.
|
|
}
|
|
|
|
/**
|
|
* Gets a potential definition of the function `name` according to our approximation of
|
|
* MRO for the class `cls` (see `getNextClassInMro` for more information).
|
|
*/
|
|
Function findFunctionAccordingToMro(Class cls, string name) {
|
|
result = cls.getAMethod() and
|
|
result.getName() = name
|
|
or
|
|
not class_has_method(cls, name) and
|
|
result = findFunctionAccordingToMro(getNextClassInMro(cls), name)
|
|
}
|
|
|
|
/**
|
|
* Join-order helper for `findFunctionAccordingToMro` and `findFunctionAccordingToMroKnownStartingClass`.
|
|
*/
|
|
pragma[nomagic]
|
|
private predicate class_has_method(Class cls, string name) { cls.getAMethod().getName() = name }
|
|
|
|
/**
|
|
* Gets a class that, from an approximated MRO calculation, might be the next class
|
|
* after `cls` in the MRO for `startingClass`.
|
|
*
|
|
* Note: this is almost the same as `getNextClassInMro`, except we know the
|
|
* `startingClass`, which can give slightly more precise results.
|
|
*
|
|
* See QLDoc for `getNextClassInMro`.
|
|
*/
|
|
Class getNextClassInMroKnownStartingClass(Class cls, Class startingClass) {
|
|
cls.getBase(0) = classTracker(result).asExpr() and
|
|
cls = getADirectSuperclass*(startingClass)
|
|
or
|
|
exists(Class sub, int i | sub = getADirectSuperclass*(startingClass) |
|
|
sub.getBase(i) = classTracker(cls).asExpr() and
|
|
sub.getBase(i + 1) = classTracker(result).asExpr() and
|
|
not result = cls
|
|
)
|
|
}
|
|
|
|
/**
|
|
* Gets a potential definition of the function `name` of the class `cls` according to our approximation of
|
|
* MRO for the class `startingCls` (see `getNextClassInMroKnownStartingClass` for more information).
|
|
*
|
|
* Note: this is almost the same as `findFunctionAccordingToMro`, except we know the
|
|
* `startingClass`, which can give slightly more precise results.
|
|
*/
|
|
Function findFunctionAccordingToMroKnownStartingClass(Class cls, Class startingClass, string name) {
|
|
result = cls.getAMethod() and
|
|
result.getName() = name and
|
|
cls = getADirectSuperclass*(startingClass)
|
|
or
|
|
not class_has_method(cls, name) and
|
|
result =
|
|
findFunctionAccordingToMroKnownStartingClass(getNextClassInMroKnownStartingClass(cls,
|
|
startingClass), startingClass, name)
|
|
}
|
|
|
|
/**
|
|
* Gets a potential definition of the function `name` according to our approximation of
|
|
* MRO for the class `startingCls` (see `getNextClassInMroKnownStartingClass` for more information).
|
|
*
|
|
* Note: this is almost the same as `findFunctionAccordingToMro`, except we know the
|
|
* `startingClass`, which can give slightly more precise results.
|
|
*/
|
|
pragma[inline]
|
|
Function findFunctionAccordingToMroKnownStartingClass(Class startingClass, string name) {
|
|
result = findFunctionAccordingToMroKnownStartingClass(startingClass, startingClass, name)
|
|
}
|
|
|
|
// =============================================================================
|
|
// attribute trackers
|
|
// =============================================================================
|
|
private module TrackAttrReadInput implements CallGraphConstruction::Simple::InputSig {
|
|
class State = AttrRead;
|
|
|
|
predicate start(Node start, AttrRead attr) {
|
|
start = attr and
|
|
pragma[only_bind_into](attr.getObject()) in [
|
|
classTracker(_), classInstanceTracker(_), selfTracker(_), clsArgumentTracker(_),
|
|
superCallNoArgumentTracker(_), superCallTwoArgumentTracker(_, _)
|
|
]
|
|
}
|
|
|
|
predicate filter(Node n) {
|
|
ignoreForCallGraph(n.getLocation().getFile())
|
|
or
|
|
n.(ParameterNodeImpl).isParameterOf(_, any(ParameterPosition pp | pp.isSelf()))
|
|
}
|
|
}
|
|
|
|
/** Gets a reference to the attribute read `attr` */
|
|
Node attrReadTracker(AttrRead attr) {
|
|
CallGraphConstruction::Simple::Make<TrackAttrReadInput>::track(attr)
|
|
.(LocalSourceNode)
|
|
.flowsTo(result)
|
|
}
|
|
|
|
// =============================================================================
|
|
// call and argument resolution
|
|
// =============================================================================
|
|
newtype TCallType =
|
|
/** A call to a function that is not part of a class. */
|
|
CallTypePlainFunction() or
|
|
/**
|
|
* A call to an "normal" method on a class instance.
|
|
* Does not include staticmethods or classmethods.
|
|
*/
|
|
CallTypeNormalMethod() or
|
|
/** A call to a staticmethod. */
|
|
CallTypeStaticMethod() or
|
|
/** A call to a classmethod. */
|
|
CallTypeClassMethod() or
|
|
/**
|
|
* A call to method on a class, not going through an instance method, such as
|
|
*
|
|
* ```py
|
|
* class Foo:
|
|
* def method(self, arg):
|
|
* pass
|
|
*
|
|
* foo = Foo()
|
|
* Foo.method(foo, 42)
|
|
* ```
|
|
*/
|
|
CallTypeMethodAsPlainFunction() or
|
|
/** A call to a class. */
|
|
CallTypeClass() or
|
|
/** A call on a class instance, that goes to the `__call__` method of the class */
|
|
CallTypeClassInstanceCall()
|
|
|
|
/** A type of call. */
|
|
class CallType extends TCallType {
|
|
string toString() {
|
|
this instanceof CallTypePlainFunction and
|
|
result = "CallTypePlainFunction"
|
|
or
|
|
this instanceof CallTypeNormalMethod and
|
|
result = "CallTypeNormalMethod"
|
|
or
|
|
this instanceof CallTypeStaticMethod and
|
|
result = "CallTypeStaticMethod"
|
|
or
|
|
this instanceof CallTypeClassMethod and
|
|
result = "CallTypeClassMethod"
|
|
or
|
|
this instanceof CallTypeMethodAsPlainFunction and
|
|
result = "CallTypeMethodAsPlainFunction"
|
|
or
|
|
this instanceof CallTypeClass and
|
|
result = "CallTypeClass"
|
|
or
|
|
this instanceof CallTypeClassInstanceCall and
|
|
result = "CallTypeClassInstanceCall"
|
|
}
|
|
}
|
|
|
|
// -------------------------------------
|
|
// method call resolution
|
|
// -------------------------------------
|
|
private module MethodCalls {
|
|
/**
|
|
* Holds if `call` is a call to a method `target` on an instance or class, where the
|
|
* instance or class is not derived from an implicit `self`/`cls` argument to a method
|
|
* -- for that, see `callWithinMethodImplicitSelfOrCls`.
|
|
*
|
|
* It is found by making an attribute read `attr` with the name `functionName` on a
|
|
* reference to the class `cls`, or to an instance of the class `cls`. The reference the
|
|
* attribute-read is made on is `self`.
|
|
*/
|
|
pragma[nomagic]
|
|
private predicate directCall(
|
|
CallNode call, Function target, string functionName, Class cls, AttrRead attr, Node self
|
|
) {
|
|
target = findFunctionAccordingToMroKnownStartingClass(cls, functionName) and
|
|
directCall_join(call, functionName, cls, attr, self)
|
|
}
|
|
|
|
/** Extracted to give good join order */
|
|
pragma[nomagic]
|
|
private predicate directCall_join(
|
|
CallNode call, string functionName, Class cls, AttrRead attr, Node self
|
|
) {
|
|
call.getFunction() = attrReadTracker(attr).asCfgNode() and
|
|
attr.accesses(self, functionName) and
|
|
self in [classTracker(cls), classInstanceTracker(cls)]
|
|
}
|
|
|
|
/**
|
|
* Holds if `call` is a call to a method `target` derived from an implicit `self`/`cls`
|
|
* argument to a method within the class `classWithMethod`.
|
|
*
|
|
* It is found by making an attribute read `attr` with the name `functionName` on a
|
|
* reference to an implicit `self`/`cls` argument. The reference the attribute-read is
|
|
* made on is `self`.
|
|
*/
|
|
pragma[nomagic]
|
|
private predicate callWithinMethodImplicitSelfOrCls(
|
|
CallNode call, Function target, string functionName, Class classWithMethod, AttrRead attr,
|
|
Node self
|
|
) {
|
|
target = findFunctionAccordingToMro(getADirectSubclass*(classWithMethod), functionName) and
|
|
callWithinMethodImplicitSelfOrCls_join(call, functionName, classWithMethod, attr, self)
|
|
}
|
|
|
|
/** Extracted to give good join order */
|
|
pragma[nomagic]
|
|
private predicate callWithinMethodImplicitSelfOrCls_join(
|
|
CallNode call, string functionName, Class classWithMethod, AttrRead attr, Node self
|
|
) {
|
|
call.getFunction() = attrReadTracker(attr).asCfgNode() and
|
|
attr.accesses(self, functionName) and
|
|
self in [clsArgumentTracker(classWithMethod), selfTracker(classWithMethod)]
|
|
}
|
|
|
|
/**
|
|
* Like `fromSuper`, but only for `__new__`, and without requirement for being able to
|
|
* resolve the call to a known target (since the only super class might be the
|
|
* builtin `object`, so we never have the implementation of `__new__` in the DB).
|
|
*/
|
|
predicate fromSuperNewCall(CallNode call, Class classUsedInSuper, AttrRead attr, Node self) {
|
|
fromSuper_join(call, "__new__", classUsedInSuper, attr, self) and
|
|
self in [classTracker(_), clsArgumentTracker(_)]
|
|
}
|
|
|
|
/**
|
|
* Holds if `call` is a call to a method `target`, derived from a use of `super`, either
|
|
* as:
|
|
*
|
|
* (1) `super(SomeClass, obj)`, where the first argument is a reference to the class
|
|
* `classUsedInSuper`, and the second argument is `self`.
|
|
*
|
|
* (2) `super()`. This implicit version can only happen within a method in a class.
|
|
* The implicit first argument is the class the call happens within `classUsedInSuper`.
|
|
* The implicit second argument is the `self`/`cls` parameter of the method this happens
|
|
* within.
|
|
*
|
|
* The method call is found by making an attribute read `attr` with the name
|
|
* `functionName` on the return value from the `super` call.
|
|
*/
|
|
pragma[nomagic]
|
|
predicate fromSuper(
|
|
CallNode call, Function target, string functionName, Class classUsedInSuper, AttrRead attr,
|
|
Node self
|
|
) {
|
|
target = findFunctionAccordingToMro(getNextClassInMro(classUsedInSuper), functionName) and
|
|
fromSuper_join(call, functionName, classUsedInSuper, attr, self)
|
|
}
|
|
|
|
/** Extracted to give good join order */
|
|
pragma[nomagic]
|
|
private predicate fromSuper_join(
|
|
CallNode call, string functionName, Class classUsedInSuper, AttrRead attr, Node self
|
|
) {
|
|
call.getFunction() = attrReadTracker(attr).asCfgNode() and
|
|
(
|
|
exists(Function func |
|
|
attr.accesses(superCallNoArgumentTracker(func), functionName) and
|
|
// Requiring enclosing scope of function to be a class is a little too
|
|
// restrictive, since it is possible to use `super()` in a function defined inside
|
|
// the method, where the first argument to the nested-function will be used as
|
|
// implicit self argument. In practice I don't expect this to be a problem, and we
|
|
// did not support this with points-to either.
|
|
func.getEnclosingScope() = classUsedInSuper and
|
|
self.(ParameterNode).getParameter() = func.getArg(0)
|
|
)
|
|
or
|
|
attr.accesses(superCallTwoArgumentTracker(classUsedInSuper, self), functionName)
|
|
)
|
|
}
|
|
|
|
predicate resolveMethodCall(CallNode call, Function target, CallType type, Node self) {
|
|
(
|
|
directCall(call, target, _, _, _, self)
|
|
or
|
|
callWithinMethodImplicitSelfOrCls(call, target, _, _, _, self)
|
|
or
|
|
fromSuper(call, target, _, _, _, self)
|
|
) and
|
|
(
|
|
// normal method call
|
|
type instanceof CallTypeNormalMethod and
|
|
(
|
|
self = classInstanceTracker(_)
|
|
or
|
|
self = selfTracker(_)
|
|
) and
|
|
not isStaticmethod(target) and
|
|
not isClassmethod(target)
|
|
or
|
|
// method as plain function call
|
|
type instanceof CallTypeMethodAsPlainFunction and
|
|
self = classTracker(_) and
|
|
not isStaticmethod(target) and
|
|
not isClassmethod(target)
|
|
or
|
|
// staticmethod call
|
|
type instanceof CallTypeStaticMethod and
|
|
isStaticmethod(target)
|
|
or
|
|
// classmethod call
|
|
type instanceof CallTypeClassMethod and
|
|
isClassmethod(target)
|
|
)
|
|
}
|
|
}
|
|
|
|
import MethodCalls
|
|
|
|
// -------------------------------------
|
|
// class call resolution
|
|
// -------------------------------------
|
|
/**
|
|
* Holds when `call` is a call to the class `cls`.
|
|
*
|
|
* NOTE: We have this predicate mostly to be able to compare with old point-to
|
|
* call-graph resolution. So it could be removed in the future.
|
|
*/
|
|
predicate resolveClassCall(CallNode call, Class cls) {
|
|
call.getFunction() = classTracker(cls).asCfgNode()
|
|
or
|
|
// `cls()` inside a classmethod (which also contains `type(self)()` inside a method)
|
|
exists(Class classWithMethod |
|
|
call.getFunction() = clsArgumentTracker(classWithMethod).asCfgNode() and
|
|
getADirectSuperclass*(cls) = classWithMethod
|
|
)
|
|
}
|
|
|
|
/**
|
|
* Gets a function, either `__init__` or `__new__` as specified by `funcName`, that will
|
|
* be invoked when `cls` is constructed -- where the function lookup is based on our MRO
|
|
* calculation.
|
|
*/
|
|
Function invokedFunctionFromClassConstruction(Class cls, string funcName) {
|
|
// as described in https://docs.python.org/3/reference/datamodel.html#object.__new__
|
|
// __init__ will only be called when __new__ returns an instance of the class (which
|
|
// is not a requirement). However, for simplicity, we assume that __init__ will always
|
|
// be called.
|
|
funcName in ["__init__", "__new__"] and
|
|
result = findFunctionAccordingToMroKnownStartingClass(cls, funcName)
|
|
}
|
|
|
|
/**
|
|
* Holds when `call` is a call on a class instance, that goes to the `__call__` method
|
|
* of the class.
|
|
*
|
|
* See https://docs.python.org/3/reference/datamodel.html#object.__call__
|
|
*/
|
|
predicate resolveClassInstanceCall(CallNode call, Function target, Node self) {
|
|
exists(Class cls |
|
|
call.getFunction() = classInstanceTracker(cls).asCfgNode() and
|
|
target = findFunctionAccordingToMroKnownStartingClass(cls, "__call__")
|
|
or
|
|
call.getFunction() = selfTracker(cls).asCfgNode() and
|
|
target = findFunctionAccordingToMro(getADirectSubclass*(cls), "__call__")
|
|
|
|
|
self.asCfgNode() = call.getFunction()
|
|
)
|
|
}
|
|
|
|
// -------------------------------------
|
|
// overall call resolution
|
|
// -------------------------------------
|
|
/**
|
|
* Holds if `call` is a call to the `target`, with call-type `type`.
|
|
*/
|
|
cached
|
|
predicate resolveCall(CallNode call, Function target, CallType type) {
|
|
Stages::DataFlow::ref() and
|
|
(
|
|
type instanceof CallTypePlainFunction and
|
|
call.getFunction() = functionTracker(target).asCfgNode() and
|
|
not exists(Class cls | cls.getAMethod() = target)
|
|
or
|
|
resolveMethodCall(call, target, type, _)
|
|
or
|
|
type instanceof CallTypeClass and
|
|
exists(Class cls |
|
|
resolveClassCall(call, cls) and
|
|
target = invokedFunctionFromClassConstruction(cls, _)
|
|
)
|
|
or
|
|
type instanceof CallTypeClassInstanceCall and
|
|
resolveClassInstanceCall(call, target, _)
|
|
)
|
|
}
|
|
|
|
// =============================================================================
|
|
// Argument resolution
|
|
// =============================================================================
|
|
/**
|
|
* Holds if the argument of `call` at position `apos` is `arg`. This is just a helper
|
|
* predicate that maps ArgumentPositions to the arguments of the underlying `CallNode`.
|
|
*/
|
|
cached
|
|
predicate normalCallArg(CallNode call, Node arg, ArgumentPosition apos) {
|
|
exists(int index |
|
|
apos.isPositional(index) and
|
|
arg.asCfgNode() = call.getArg(index)
|
|
)
|
|
or
|
|
exists(string name |
|
|
apos.isKeyword(name) and
|
|
arg.asCfgNode() = call.getArgByName(name)
|
|
)
|
|
or
|
|
// the first `*args`
|
|
exists(int index |
|
|
apos.isStarArgs(index) and
|
|
arg.asCfgNode() = call.getStarArg() and
|
|
// since `CallNode.getArg` doesn't include `*args`, we need to drop to the AST level
|
|
// to get the index. Notice that we only use the AST for getting the index, so we
|
|
// don't need to check for dominance in regards to splitting.
|
|
call.getStarArg().getNode() = call.getNode().getPositionalArg(index).(Starred).getValue()
|
|
)
|
|
or
|
|
apos.isDictSplat() and
|
|
(
|
|
arg.asCfgNode() = call.getKwargs()
|
|
or
|
|
arg = TSynthDictSplatArgumentNode(call)
|
|
)
|
|
}
|
|
|
|
/**
|
|
* Gets the argument `arg` of `call` at position `apos`, if any. Requires that we can
|
|
* resolve `call` to `target` with CallType `type`.
|
|
*
|
|
* It might seem like it's enough to know the CallType to resolve arguments. The reason
|
|
* we also need the `target`, is to avoid cross-talk. In the example below, assuming
|
|
* that `Foo` and `Bar` define their own `meth` methods, we might end up passing _both_
|
|
* `foo` and `bar` to both `Foo.meth` and `Bar.meth`, which is wrong. Since the
|
|
* attribute access uses the same name, we need to also distinguish on the resolved
|
|
* target, to know which of the two objects to pass as the self argument.
|
|
*
|
|
*
|
|
* ```py
|
|
* foo = Foo()
|
|
* bar = Bar()
|
|
* if cond:
|
|
* func = foo.meth
|
|
* else:
|
|
* func = bar.meth
|
|
* func(42)
|
|
* ```
|
|
*
|
|
* Note: If `Bar.meth` and `Foo.meth` resolves to the same function, we will end up
|
|
* sending both `self` arguments to that function, which is by definition the right thing to do.
|
|
*
|
|
* ### Bound methods
|
|
*
|
|
* For bound methods, such as `bm = x.m; bm()`, it's a little unclear whether we should
|
|
* still use the object in the attribute lookup (`x.m`) as the self argument in the
|
|
* call (`bm()`). We currently do this, but there might also be cases where we don't
|
|
* want to do this.
|
|
*
|
|
* In the example below, we want to clear taint from the list before it reaches the
|
|
* sink, but because we don't have a use of `l` in the `clear()` call, we currently
|
|
* don't have any way to achieve our goal. (Note that this is a contrived example)
|
|
*
|
|
* ```py
|
|
* l = list()
|
|
* clear = l.clear
|
|
* l.append(tainted)
|
|
* clear()
|
|
* sink(l)
|
|
* ```
|
|
*
|
|
* To make the above even worse, bound-methods have a `__self__` property that refers to
|
|
* the object of the bound-method, so we can re-write the code as:
|
|
*
|
|
* ```py
|
|
* l = list()
|
|
* clear = l.clear
|
|
* clear.__self__.append(tainted)
|
|
* clear()
|
|
* sink(l)
|
|
* ```
|
|
*
|
|
* One idea to solve this is to track the object in a synthetic data-flow node every
|
|
* time the bound method is used, such that the `clear()` call would essentially be
|
|
* translated into `l.clear()`, and we can still have use-use flow.
|
|
*/
|
|
cached
|
|
predicate getCallArg(CallNode call, Function target, CallType type, Node arg, ArgumentPosition apos) {
|
|
Stages::DataFlow::ref() and
|
|
resolveCall(call, target, type) and
|
|
(
|
|
type instanceof CallTypePlainFunction and
|
|
normalCallArg(call, arg, apos)
|
|
or
|
|
// self argument for normal method calls -- see note above about bound methods
|
|
type instanceof CallTypeNormalMethod and
|
|
apos.isSelf() and
|
|
resolveMethodCall(call, target, type, arg) and
|
|
// the dataflow library has a requirement that arguments and calls are in same enclosing
|
|
// callable. This requirement would be broken if we used `my_obj` as the self
|
|
// argument in the `f()` call in the example below:
|
|
// ```py
|
|
// def call_func(f):
|
|
// f()
|
|
//
|
|
// call_func(my_obj.some_method)
|
|
// ```
|
|
exists(CfgNode cfgNode | cfgNode.getNode() = call | sameEnclosingCallable(cfgNode, arg))
|
|
or
|
|
// cls argument for classmethod calls -- see note above about bound methods
|
|
type instanceof CallTypeClassMethod and
|
|
apos.isSelf() and
|
|
resolveMethodCall(call, target, type, arg) and
|
|
(arg = classTracker(_) or arg = clsArgumentTracker(_)) and
|
|
// dataflow lib has requirement that arguments and calls are in same enclosing callable.
|
|
exists(CfgNode cfgNode | cfgNode.getNode() = call | sameEnclosingCallable(cfgNode, arg))
|
|
or
|
|
// normal arguments for method calls
|
|
(
|
|
type instanceof CallTypeNormalMethod or
|
|
type instanceof CallTypeStaticMethod or
|
|
type instanceof CallTypeClassMethod
|
|
) and
|
|
normalCallArg(call, arg, apos)
|
|
or
|
|
// method as plain function call.
|
|
//
|
|
// argument index 0 of call has position self (and MUST be given as positional
|
|
// argument in call). This also means that call-arguments are shifted by 1, such
|
|
// that argument index 1 of call has argument position 0
|
|
type instanceof CallTypeMethodAsPlainFunction and
|
|
(
|
|
apos.isSelf() and arg.asCfgNode() = call.getArg(0)
|
|
or
|
|
not apos.isPositional(_) and normalCallArg(call, arg, apos)
|
|
or
|
|
exists(ArgumentPosition normalPos, int index |
|
|
apos.isPositional(index - 1) and
|
|
normalPos.isPositional(index) and
|
|
normalCallArg(call, arg, normalPos)
|
|
)
|
|
)
|
|
or
|
|
// class call
|
|
type instanceof CallTypeClass and
|
|
(
|
|
// only pass synthetic node for created object to __init__, and not __new__ since
|
|
// __new__ is a classmethod.
|
|
target = invokedFunctionFromClassConstruction(_, "__init__") and
|
|
apos.isSelf() and
|
|
arg = TSyntheticPreUpdateNode(call)
|
|
or
|
|
normalCallArg(call, arg, apos)
|
|
)
|
|
or
|
|
// call on class instance, which goes to `__call__` method
|
|
type instanceof CallTypeClassInstanceCall and
|
|
(
|
|
apos.isSelf() and
|
|
resolveClassInstanceCall(call, target, arg)
|
|
or
|
|
normalCallArg(call, arg, apos)
|
|
)
|
|
)
|
|
}
|
|
|
|
/**
|
|
* join-order helper for getCallArg, since otherwise we would do cartesian product of
|
|
* the enclosing callables
|
|
*/
|
|
bindingset[node1, node2]
|
|
pragma[inline_late]
|
|
private predicate sameEnclosingCallable(Node node1, Node node2) {
|
|
node1.getEnclosingCallable() = node2.getEnclosingCallable()
|
|
}
|
|
|
|
// =============================================================================
|
|
// DataFlowCall
|
|
// =============================================================================
|
|
newtype TDataFlowCall =
|
|
TNormalCall(CallNode call, Function target, CallType type) { resolveCall(call, target, type) } or
|
|
/** A call to the generated function inside a comprehension */
|
|
TComprehensionCall(Comp c) or
|
|
TPotentialLibraryCall(CallNode call) or
|
|
/** A synthesized call inside a summarized callable */
|
|
TSummaryCall(
|
|
FlowSummaryImpl::Public::SummarizedCallable c, FlowSummaryImpl::Private::SummaryNode receiver
|
|
) {
|
|
FlowSummaryImpl::Private::summaryCallbackRange(c, receiver)
|
|
}
|
|
|
|
/** A call that is taken into account by the global data flow computation. */
|
|
abstract class DataFlowCall extends TDataFlowCall {
|
|
/** Gets a textual representation of this element. */
|
|
abstract string toString();
|
|
|
|
/** Get the callable to which this call goes. */
|
|
abstract DataFlowCallable getCallable();
|
|
|
|
/** Gets the argument at position `apos`, if any. */
|
|
abstract ArgumentNode getArgument(ArgumentPosition apos);
|
|
|
|
/** Get the control flow node representing this call, if any. */
|
|
abstract ControlFlowNode getNode();
|
|
|
|
/** Gets the enclosing callable of this call. */
|
|
DataFlowCallable getEnclosingCallable() { result = getCallableScope(this.getScope()) }
|
|
|
|
/** Gets the scope of this node, if any. */
|
|
abstract Scope getScope();
|
|
|
|
/** Gets the location of this dataflow call. */
|
|
abstract Location getLocation();
|
|
|
|
/**
|
|
* Holds if this element is at the specified location.
|
|
* The location spans column `startcolumn` of line `startline` to
|
|
* column `endcolumn` of line `endline` in file `filepath`.
|
|
* For more information, see
|
|
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
|
|
*/
|
|
predicate hasLocationInfo(
|
|
string filepath, int startline, int startcolumn, int endline, int endcolumn
|
|
) {
|
|
this.getLocation().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
|
|
}
|
|
}
|
|
|
|
/** A call found in the program source (as opposed to a synthesised call). */
|
|
abstract class ExtractedDataFlowCall extends DataFlowCall {
|
|
override Location getLocation() { result = this.getNode().getLocation() }
|
|
}
|
|
|
|
/**
|
|
* A resolved call in source code with an underlying `CallNode`.
|
|
*
|
|
* This is considered normal, compared with special calls such as `obj[0]` calling the
|
|
* `__getitem__` method on the object. However, this also includes calls that go to the
|
|
* `__call__` special method.
|
|
*/
|
|
class NormalCall extends ExtractedDataFlowCall, TNormalCall {
|
|
CallNode call;
|
|
Function target;
|
|
CallType type;
|
|
|
|
NormalCall() { this = TNormalCall(call, target, type) }
|
|
|
|
override string toString() {
|
|
// note: if we used toString directly on the CallNode we would get
|
|
// `ControlFlowNode for func()`
|
|
// but the `ControlFlowNode` part is just clutter, so we go directly to the AST node
|
|
// instead.
|
|
result = call.getNode().toString()
|
|
}
|
|
|
|
override ControlFlowNode getNode() { result = call }
|
|
|
|
override Scope getScope() { result = call.getScope() }
|
|
|
|
override DataFlowCallable getCallable() { result.(DataFlowFunction).getScope() = target }
|
|
|
|
override ArgumentNode getArgument(ArgumentPosition apos) {
|
|
getCallArg(call, target, type, result, apos)
|
|
}
|
|
|
|
/** Gets the `CallType` of this call. */
|
|
CallType getCallType() { result = type }
|
|
}
|
|
|
|
/** A call to the generated function inside a comprhension */
|
|
class ComprehensionCall extends ExtractedDataFlowCall, TComprehensionCall {
|
|
Comp c;
|
|
Function target;
|
|
|
|
ComprehensionCall() {
|
|
this = TComprehensionCall(c) and
|
|
target = c.getFunction()
|
|
}
|
|
|
|
Comp getComprehension() { result = c }
|
|
|
|
override string toString() { result = "comprehension call" }
|
|
|
|
override ControlFlowNode getNode() { result.getNode() = c }
|
|
|
|
override Scope getScope() { result = c.getScope() }
|
|
|
|
override DataFlowCallable getCallable() { result.(DataFlowFunction).getScope() = target }
|
|
|
|
override ArgumentNode getArgument(ArgumentPosition apos) {
|
|
result.asExpr() = c.getIterable() and
|
|
apos.isPositional(0)
|
|
}
|
|
|
|
override Location getLocation() { result = c.getLocation() }
|
|
}
|
|
|
|
/**
|
|
* A potential call to a summarized callable, a `LibraryCallable`.
|
|
*
|
|
* We currently exclude all resolved calls. This means that a call to, say, `map`, which
|
|
* is a `ClassCall`, cannot currently be given a summary.
|
|
* We hope to lift this restriction in the future and include all potential calls to summaries
|
|
* in this class.
|
|
*/
|
|
class PotentialLibraryCall extends ExtractedDataFlowCall, TPotentialLibraryCall {
|
|
CallNode call;
|
|
|
|
PotentialLibraryCall() { this = TPotentialLibraryCall(call) }
|
|
|
|
override string toString() {
|
|
// note: if we used toString directly on the CallNode we would get
|
|
// `ControlFlowNode for func()`
|
|
// but the `ControlFlowNode` part is just clutter, so we go directly to the AST node
|
|
// instead.
|
|
result = call.getNode().toString()
|
|
}
|
|
|
|
// We cannot refer to a `PotentialLibraryCall` here,
|
|
// as that could in turn refer to type tracking.
|
|
// This call will be tied to a `PotentialLibraryCall` via
|
|
// `viableCallable` when the global data flow is assembled.
|
|
override DataFlowCallable getCallable() { none() }
|
|
|
|
override ArgumentNode getArgument(ArgumentPosition apos) {
|
|
normalCallArg(call, result, apos)
|
|
or
|
|
// potential self argument, from `foo.bar()` -- note that this could also just be a
|
|
// module reference, but we really don't have a good way of knowing :|
|
|
apos.isSelf() and
|
|
result.asCfgNode() = call.getFunction().(AttrNode).getObject()
|
|
}
|
|
|
|
override ControlFlowNode getNode() { result = call }
|
|
|
|
override Scope getScope() { result = call.getScope() }
|
|
}
|
|
|
|
/**
|
|
* A synthesized call inside a callable with a flow summary.
|
|
*
|
|
* For example, in
|
|
* ```python
|
|
* map(lambda x: x + 1, [1, 2, 3])
|
|
* ```
|
|
*
|
|
* there is a synthesized call to the lambda argument inside `map`.
|
|
*/
|
|
class SummaryCall extends DataFlowCall, TSummaryCall {
|
|
private FlowSummaryImpl::Public::SummarizedCallable c;
|
|
private FlowSummaryImpl::Private::SummaryNode receiver;
|
|
|
|
SummaryCall() { this = TSummaryCall(c, receiver) }
|
|
|
|
/** Gets the data flow node that this call targets. */
|
|
FlowSummaryImpl::Private::SummaryNode getReceiver() { result = receiver }
|
|
|
|
override DataFlowCallable getEnclosingCallable() { result.asLibraryCallable() = c }
|
|
|
|
override Scope getScope() { none() }
|
|
|
|
override DataFlowCallable getCallable() { none() }
|
|
|
|
override ArgumentNode getArgument(ArgumentPosition apos) { none() }
|
|
|
|
override ControlFlowNode getNode() { none() }
|
|
|
|
override string toString() { result = "[summary] call to " + receiver + " in " + c }
|
|
|
|
override Location getLocation() { none() }
|
|
}
|
|
|
|
/**
|
|
* The value of a parameter at function entry, viewed as a node in a data
|
|
* flow graph.
|
|
*/
|
|
abstract class ParameterNodeImpl extends Node {
|
|
/** Gets the `Parameter` this `ParameterNode` represents. */
|
|
abstract Parameter getParameter();
|
|
|
|
/**
|
|
* Holds if this node is the parameter of callable `c` at the
|
|
* position `ppos`.
|
|
*/
|
|
predicate isParameterOf(DataFlowCallable c, ParameterPosition ppos) {
|
|
this = c.getParameter(ppos)
|
|
}
|
|
}
|
|
|
|
/**
|
|
* A synthetic parameter representing the values of the variables captured
|
|
* by the callable being called. This parameter represents a single object
|
|
* where all the values are stored as attributes.
|
|
* This is also known as the environment part of a closure.
|
|
*
|
|
* This is used for tracking flow through captured variables.
|
|
*/
|
|
class SynthCapturedVariablesParameterNode extends ParameterNodeImpl,
|
|
TSynthCapturedVariablesParameterNode
|
|
{
|
|
private Function callable;
|
|
|
|
SynthCapturedVariablesParameterNode() { this = TSynthCapturedVariablesParameterNode(callable) }
|
|
|
|
final Function getCallable() { result = callable }
|
|
|
|
override Parameter getParameter() { none() }
|
|
|
|
override predicate isParameterOf(DataFlowCallable c, ParameterPosition pos) {
|
|
c = TFunction(callable) and
|
|
pos.isLambdaSelf()
|
|
}
|
|
|
|
override Scope getScope() { result = callable }
|
|
|
|
override Location getLocation() { result = callable.getLocation() }
|
|
|
|
override string toString() { result = "lambda self in " + callable }
|
|
}
|
|
|
|
/** A parameter for a library callable with a flow summary. */
|
|
class SummaryParameterNode extends ParameterNodeImpl, FlowSummaryNode {
|
|
SummaryParameterNode() {
|
|
FlowSummaryImpl::Private::summaryParameterNode(this.getSummaryNode(), _)
|
|
}
|
|
|
|
private ParameterPosition getPosition() {
|
|
FlowSummaryImpl::Private::summaryParameterNode(this.getSummaryNode(), result)
|
|
}
|
|
|
|
override Parameter getParameter() { none() }
|
|
|
|
override predicate isParameterOf(DataFlowCallable c, ParameterPosition ppos) {
|
|
this.getSummarizedCallable() = c.asLibraryCallable() and ppos = this.getPosition()
|
|
}
|
|
}
|
|
|
|
/** A data-flow node used to model flow summaries. */
|
|
class FlowSummaryNode extends Node, TFlowSummaryNode {
|
|
FlowSummaryImpl::Private::SummaryNode getSummaryNode() { this = TFlowSummaryNode(result) }
|
|
|
|
FlowSummaryImpl::Public::SummarizedCallable getSummarizedCallable() {
|
|
result = this.getSummaryNode().getSummarizedCallable()
|
|
}
|
|
|
|
override DataFlowCallable getEnclosingCallable() {
|
|
result.asLibraryCallable() = this.getSummarizedCallable()
|
|
}
|
|
|
|
override string toString() { result = this.getSummaryNode().toString() }
|
|
|
|
// Hack to return "empty location"
|
|
deprecated override predicate hasLocationInfo(
|
|
string file, int startline, int startcolumn, int endline, int endcolumn
|
|
) {
|
|
file = "" and
|
|
startline = 0 and
|
|
startcolumn = 0 and
|
|
endline = 0 and
|
|
endcolumn = 0
|
|
}
|
|
}
|
|
|
|
private class SummaryReturnNode extends FlowSummaryNode, ReturnNode {
|
|
private ReturnKind rk;
|
|
|
|
SummaryReturnNode() { FlowSummaryImpl::Private::summaryReturnNode(this.getSummaryNode(), rk) }
|
|
|
|
override ReturnKind getKind() { result = rk }
|
|
}
|
|
|
|
private class SummaryArgumentNode extends FlowSummaryNode, ArgumentNode {
|
|
private SummaryCall call_;
|
|
private ArgumentPosition pos_;
|
|
|
|
SummaryArgumentNode() {
|
|
FlowSummaryImpl::Private::summaryArgumentNode(call_.getReceiver(), this.getSummaryNode(), pos_)
|
|
}
|
|
|
|
override predicate argumentOf(DataFlowCall call, ArgumentPosition pos) {
|
|
call = call_ and pos = pos_
|
|
}
|
|
}
|
|
|
|
private class SummaryPostUpdateNode extends FlowSummaryNode, PostUpdateNodeImpl {
|
|
private FlowSummaryNode pre;
|
|
|
|
SummaryPostUpdateNode() {
|
|
FlowSummaryImpl::Private::summaryPostUpdateNode(this.getSummaryNode(), pre.getSummaryNode())
|
|
}
|
|
|
|
override Node getPreUpdateNode() { result = pre }
|
|
}
|
|
|
|
/**
|
|
* A synthetic argument representing the values of the variables captured
|
|
* by the callable being called. This argument represents a single object
|
|
* where all the values are stored as attributes.
|
|
* This is also known as the environment part of a closure.
|
|
*
|
|
* This is used for tracking flow through captured variables.
|
|
*/
|
|
class SynthCapturedVariablesArgumentNode extends Node, TSynthCapturedVariablesArgumentNode {
|
|
ControlFlowNode callable;
|
|
|
|
SynthCapturedVariablesArgumentNode() { this = TSynthCapturedVariablesArgumentNode(callable) }
|
|
|
|
/** Gets the `CallNode` corresponding to this captured variables argument node. */
|
|
CallNode getCallNode() { result.getFunction() = callable }
|
|
|
|
/** Gets the `CfgNode` that corresponds to this synthetic node. */
|
|
CfgNode getUnderlyingNode() { result.asCfgNode() = callable }
|
|
|
|
override Scope getScope() { result = callable.getScope() }
|
|
|
|
override Location getLocation() { result = callable.getLocation() }
|
|
|
|
override string toString() { result = "Capturing closure argument" }
|
|
}
|
|
|
|
/** A captured variables argument node viewed as an argument node. Needed because `argumentOf` is a global predicate. */
|
|
class CapturedVariablesArgumentNodeAsArgumentNode extends ArgumentNode,
|
|
SynthCapturedVariablesArgumentNode
|
|
{
|
|
override predicate argumentOf(DataFlowCall call, ArgumentPosition pos) {
|
|
exists(CallNode callNode | callNode = this.getCallNode() |
|
|
callNode = call.getNode() and
|
|
exists(Function target | resolveCall(callNode, target, _) |
|
|
target = any(VariableCapture::CapturedVariable v).getACapturingScope()
|
|
) and
|
|
pos.isLambdaSelf()
|
|
)
|
|
}
|
|
}
|
|
|
|
/** A synthetic node representing the values of captured variables after the output has been computed. */
|
|
class SynthCapturedVariablesArgumentPostUpdateNode extends PostUpdateNodeImpl,
|
|
TSynthCapturedVariablesArgumentPostUpdateNode
|
|
{
|
|
ControlFlowNode callable;
|
|
|
|
SynthCapturedVariablesArgumentPostUpdateNode() {
|
|
this = TSynthCapturedVariablesArgumentPostUpdateNode(callable)
|
|
}
|
|
|
|
/** Gets the `PostUpdateNode` (for a `CfgNode`) that corresponds to this synthetic node. */
|
|
PostUpdateNode getUnderlyingNode() { result.getPreUpdateNode().asCfgNode() = callable }
|
|
|
|
override string toString() { result = "[post] Capturing closure argument" }
|
|
|
|
override Scope getScope() { result = callable.getScope() }
|
|
|
|
override Location getLocation() { result = callable.getLocation() }
|
|
|
|
override SynthCapturedVariablesArgumentNode getPreUpdateNode() {
|
|
result = TSynthCapturedVariablesArgumentNode(callable)
|
|
}
|
|
}
|
|
|
|
/** A synthetic node representing the values of variables captured by a comprehension. */
|
|
class SynthCompCapturedVariablesArgumentNode extends Node, TSynthCompCapturedVariablesArgumentNode {
|
|
Comp comp;
|
|
|
|
SynthCompCapturedVariablesArgumentNode() { this = TSynthCompCapturedVariablesArgumentNode(comp) }
|
|
|
|
override string toString() { result = "Capturing closure argument (comp)" }
|
|
|
|
override Scope getScope() { result = comp.getScope() }
|
|
|
|
override Location getLocation() { result = comp.getLocation() }
|
|
|
|
Comp getComprehension() { result = comp }
|
|
}
|
|
|
|
class SynthCompCapturedVariablesArgumentNodeAsArgumentNode extends SynthCompCapturedVariablesArgumentNode,
|
|
ArgumentNode
|
|
{
|
|
override predicate argumentOf(DataFlowCall call, ArgumentPosition pos) {
|
|
call.(ComprehensionCall).getComprehension() = comp and
|
|
pos.isLambdaSelf()
|
|
}
|
|
}
|
|
|
|
/** A synthetic node representing the values of variables captured by a comprehension after the output has been computed. */
|
|
class SynthCompCapturedVariablesArgumentPostUpdateNode extends PostUpdateNodeImpl,
|
|
TSynthCompCapturedVariablesArgumentPostUpdateNode
|
|
{
|
|
Comp comp;
|
|
|
|
SynthCompCapturedVariablesArgumentPostUpdateNode() {
|
|
this = TSynthCompCapturedVariablesArgumentPostUpdateNode(comp)
|
|
}
|
|
|
|
override string toString() { result = "[post] Capturing closure argument (comp)" }
|
|
|
|
override Scope getScope() { result = comp.getScope() }
|
|
|
|
override Location getLocation() { result = comp.getLocation() }
|
|
|
|
override Node getPreUpdateNode() { result = TSynthCompCapturedVariablesArgumentNode(comp) }
|
|
}
|
|
|
|
/** Gets a viable run-time target for the call `call`. */
|
|
DataFlowCallable viableCallable(DataFlowCall call) {
|
|
call instanceof ExtractedDataFlowCall and
|
|
result = call.getCallable()
|
|
or
|
|
// A call to a library callable with a flow summary
|
|
// In this situation we can not resolve the callable from the call,
|
|
// as that would make data flow depend on type tracking.
|
|
// Instead we resolve the call from the summary.
|
|
exists(LibraryCallable callable |
|
|
result = TLibraryCallable(callable) and
|
|
call.getNode() = callable.getACall().getNode() and
|
|
call instanceof PotentialLibraryCall
|
|
)
|
|
}
|
|
|
|
// =============================================================================
|
|
// Remaining required data-flow things
|
|
// =============================================================================
|
|
private newtype TReturnKind = TNormalReturnKind()
|
|
|
|
/**
|
|
* A return kind. A return kind describes how a value can be returned
|
|
* from a callable. For Python, this is simply a method return.
|
|
*/
|
|
class ReturnKind extends TReturnKind {
|
|
/** Gets a textual representation of this element. */
|
|
string toString() { result = "return" }
|
|
}
|
|
|
|
/** A data flow node that represents a value returned by a callable. */
|
|
abstract class ReturnNode extends Node {
|
|
/** Gets the kind of this return node. */
|
|
ReturnKind getKind() { any() }
|
|
}
|
|
|
|
/** A data flow node that represents a value returned by a callable. */
|
|
class ExtractedReturnNode extends ReturnNode, CfgNode {
|
|
// See `TaintTrackingImplementation::returnFlowStep`
|
|
ExtractedReturnNode() {
|
|
node = any(Return ret).getValue().getAFlowNode() or
|
|
node = any(Yield yield).getAFlowNode()
|
|
}
|
|
|
|
override ReturnKind getKind() { any() }
|
|
}
|
|
|
|
/**
|
|
* A data flow node that represents the value yielded by a callable with a
|
|
* `contextlib.contextmanager` decorator. We treat this as a normal return, which makes
|
|
* things just work when used in a `with` statement -- technically calling the function
|
|
* directly will give you a `contextlib._GeneratorContextManager` instance, so it's a
|
|
* slight workaround solution.
|
|
*
|
|
* See https://docs.python.org/3/library/contextlib.html#contextlib.contextmanager
|
|
*/
|
|
class YieldNodeInContextManagerFunction extends ReturnNode, CfgNode {
|
|
YieldNodeInContextManagerFunction() {
|
|
hasContextmanagerDecorator(node.getScope()) and
|
|
node = any(Yield yield).getValue().getAFlowNode()
|
|
}
|
|
|
|
override ReturnKind getKind() { any() }
|
|
}
|
|
|
|
/** A data-flow node that represents the output of a call. */
|
|
abstract class OutNode extends Node {
|
|
/** Gets the underlying call, where this node is a corresponding output of kind `kind`. */
|
|
abstract DataFlowCall getCall(ReturnKind kind);
|
|
}
|
|
|
|
private module OutNodes {
|
|
/**
|
|
* A data-flow node that reads a value returned directly by a callable.
|
|
*/
|
|
class ExprOutNode extends OutNode, ExprNode {
|
|
private DataFlowCall call;
|
|
|
|
ExprOutNode() { call.(ExtractedDataFlowCall).getNode() = this.getNode() }
|
|
|
|
override DataFlowCall getCall(ReturnKind kind) {
|
|
result = call and
|
|
kind = kind
|
|
}
|
|
}
|
|
|
|
private class SummaryOutNode extends FlowSummaryNode, OutNode {
|
|
private SummaryCall call;
|
|
private ReturnKind kind_;
|
|
|
|
SummaryOutNode() {
|
|
FlowSummaryImpl::Private::summaryOutNode(call.getReceiver(), this.getSummaryNode(), kind_)
|
|
}
|
|
|
|
override DataFlowCall getCall(ReturnKind kind) {
|
|
result = call and
|
|
kind = kind_
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Gets a node that can read the value returned from `call` with return kind
|
|
* `kind`.
|
|
*/
|
|
OutNode getAnOutNode(DataFlowCall call, ReturnKind kind) { call = result.getCall(kind) }
|