Files
codeql/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
Taus 3f718123a6 Python: Make capturing closure arguments synthetic and non-global
Uses the same trick as for `ExtractedArgumentNode`, wherein we postpone
the global restriction on the charpred to instead be in the `argumentOf`
predicate (which is global anyway).

In addition to this, we also converted `CapturedVariablesArgumentNode`
into a proper synthetic node, and added an explicit post-update node for
it. These nodes just act as wrappers for the function part of call
nodes. Thus, to make them work with the variable capture machinery, we
simply map them to the closure node for the corresponding control-flow
or post-update node.
2026-01-30 12:50:25 +00:00

1924 lines
67 KiB
Plaintext

/**
* INTERNAL: Do not use.
*
* TypeTracker based call-graph.
*
* The overall scheme for resolving calls, is to notice that Python has different kinds
* of callables, and resolve those with different strategies. Currently we handle these
* completely separately:
* 1. plain functions (and lambdas)
* 2. methods on classes
* 3. class instantiation
*
* So we have type-trackers for each of the 3 categories above, with some considerable
* effort to handle different kinds of methods on classes (staticmethod, classmethod,
* normal), and resolving methods correctly in regards to MRO.
*
*
* A goal of this library is to support modeling calls that happens by third-party
* libraries. For example `call_later(func, arg0, arg1, foo=val)`, and the fact that the
* library might inject it's own arguments, for example a context that will always be
* passed as the actual first argument to the function. Currently the aim is to provide
* enough predicates for such `call_later` function to be modeled by providing
* additional data-flow steps for the arguments/parameters. This means we cannot have
* any special logic that requires an AST call to be made before we care to figure out
* what callable this call might end up targeting.
*
* Specifically this means that we cannot use type-backtrackers from the function of a
* `CallNode`, since there is no `CallNode` to backtrack from for `func` in the example
* above.
*
* Note: This hasn't been 100% realized yet, so we don't currently expose a predicate to
* ask what targets any data-flow node has. But it's still the plan to do this!
*/
private import python
private import DataFlowPublic
private import DataFlowPrivate
private import FlowSummaryImpl as FlowSummaryImpl
private import semmle.python.internal.CachedStages
private import semmle.python.dataflow.new.internal.TypeTrackingImpl::CallGraphConstruction as CallGraphConstruction
newtype TParameterPosition =
/** Used for `self` in methods, and `cls` in classmethods. */
TSelfParameterPosition() or
/**
* This is used for tracking flow through captured variables, and
* we use separate parameter/argument positions in order to distinguish
* "lambda self" from "normal self", as lambdas may also access outer `self`
* variables (through variable capture).
*/
TLambdaSelfParameterPosition() or
TPositionalParameterPosition(int index) {
index = any(Parameter p).getPosition()
or
// since synthetic parameters are made for a synthetic summary callable, based on
// what Argument positions they have flow for, we need to make sure we have such
// parameter positions available.
FlowSummaryImpl::ParsePositions::isParsedPositionalArgumentPosition(_, index)
} or
TPositionalParameterLowerBoundPosition(int pos) {
FlowSummaryImpl::ParsePositions::isParsedArgumentLowerBoundPosition(_, pos)
} or
TKeywordParameterPosition(string name) {
name = any(Parameter p).getName()
or
// see comment for TPositionalParameterPosition
FlowSummaryImpl::ParsePositions::isParsedKeywordArgumentPosition(_, name)
} or
TStarArgsParameterPosition(int index) {
// since `.getPosition` does not work for `*args`, we need *args parameter positions
// at index 1 larger than the largest positional parameter position (and 0 must be
// included as well). This is a bit of an over-approximation.
index = 0 or
index = any(Parameter p).getPosition() + 1
} or
TSynthStarArgsElementParameterPosition(int index) { exists(TStarArgsParameterPosition(index)) } or
TDictSplatParameterPosition() or
// To get flow from a **kwargs argument to a keyword parameter, we add a read-step
// from a synthetic **kwargs parameter. We need this separate synthetic ParameterNode,
// since we clear content of the normal **kwargs parameter for the names that
// correspond to normal keyword parameters. Since we cannot re-use the same parameter
// position for multiple parameter nodes in the same callable, we introduce this
// synthetic parameter position.
TSynthDictSplatParameterPosition()
/** A parameter position. */
class ParameterPosition extends TParameterPosition {
/** Holds if this position represents a `self`/`cls` parameter. */
predicate isSelf() { this = TSelfParameterPosition() }
/** Holds if this position represents a reference to a lambda itself. Only used for tracking flow through captured variables. */
predicate isLambdaSelf() { this = TLambdaSelfParameterPosition() }
/** Holds if this position represents a positional parameter at (0-based) `index`. */
predicate isPositional(int index) { this = TPositionalParameterPosition(index) }
/** Holds if this position represents any positional parameter starting from position `pos`. */
predicate isPositionalLowerBound(int pos) { this = TPositionalParameterLowerBoundPosition(pos) }
/** Holds if this position represents a keyword parameter named `name`. */
predicate isKeyword(string name) { this = TKeywordParameterPosition(name) }
/** Holds if this position represents a `*args` parameter at (0-based) `index`. */
predicate isStarArgs(int index) { this = TStarArgsParameterPosition(index) }
/**
* Holds if this position represents a synthetic parameter at or after (0-based)
* position `index`, from which there will be made a store step to the real
* `*args` parameter.
*/
predicate isSynthStarArgsElement(int index) {
this = TSynthStarArgsElementParameterPosition(index)
}
/** Holds if this position represents a `**kwargs` parameter. */
predicate isDictSplat() { this = TDictSplatParameterPosition() }
/**
* Holds if this position represents a **synthetic** `**kwargs` parameter
* (see comment for `TSynthDictSplatParameterPosition`).
*/
predicate isSynthDictSplat() { this = TSynthDictSplatParameterPosition() }
/** Gets a textual representation of this element. */
string toString() {
this.isSelf() and result = "self"
or
this.isLambdaSelf() and result = "lambda self"
or
exists(int index | this.isPositional(index) and result = "position " + index)
or
exists(int pos | this.isPositionalLowerBound(pos) and result = "position " + pos + "..")
or
exists(string name | this.isKeyword(name) and result = "keyword " + name)
or
exists(int index | this.isStarArgs(index) and result = "*args at " + index)
or
exists(int index |
this.isSynthStarArgsElement(index) and
result = "synthetic *args element at (or after) " + index
)
or
this.isDictSplat() and result = "**"
or
this.isSynthDictSplat() and result = "synthetic **"
}
}
newtype TArgumentPosition =
/** Used for `self` in methods, and `cls` in classmethods. */
TSelfArgumentPosition() or
/**
* This is used for tracking flow through captured variables, and
* we use separate parameter/argument positions in order to distinguish
* "lambda self" from "normal self", as lambdas may also access outer `self`
* variables (through variable capture).
*/
TLambdaSelfArgumentPosition() or
TPositionalArgumentPosition(int index) {
exists(any(CallNode c).getArg(index))
or
// since synthetic calls within a summarized callable could use a unique argument
// position, we need to ensure we make these available (these are specified as
// parameters in the flow-summary spec)
FlowSummaryImpl::ParsePositions::isParsedPositionalParameterPosition(_, index)
or
// the generated function inside a comprehension has a positional argument at index 0
exists(Comp c) and
index = 0
} or
TKeywordArgumentPosition(string name) {
exists(any(CallNode c).getArgByName(name))
or
// see comment for TPositionalArgumentPosition
FlowSummaryImpl::ParsePositions::isParsedKeywordParameterPosition(_, name)
} or
TStarArgsArgumentPosition(int index) {
exists(Call c | c.getPositionalArg(index) instanceof Starred)
} or
TDictSplatArgumentPosition()
/** An argument position. */
class ArgumentPosition extends TArgumentPosition {
/** Holds if this position represents a `self`/`cls` argument. */
predicate isSelf() { this = TSelfArgumentPosition() }
/** Holds if this position represents a lambda `self` argument. Only used for tracking flow through captured variables. */
predicate isLambdaSelf() { this = TLambdaSelfArgumentPosition() }
/** Holds if this position represents a positional argument at (0-based) `index`. */
predicate isPositional(int index) { this = TPositionalArgumentPosition(index) }
/** Holds if this position represents a keyword argument named `name`. */
predicate isKeyword(string name) { this = TKeywordArgumentPosition(name) }
/** Holds if this position represents a `*args` argument at (0-based) `index`. */
predicate isStarArgs(int index) { this = TStarArgsArgumentPosition(index) }
/** Holds if this position represents a `**kwargs` argument. */
predicate isDictSplat() { this = TDictSplatArgumentPosition() }
/** Gets a textual representation of this element. */
string toString() {
this.isSelf() and result = "self"
or
this.isLambdaSelf() and result = "lambda self"
or
exists(int pos | this.isPositional(pos) and result = "position " + pos)
or
exists(string name | this.isKeyword(name) and result = "keyword " + name)
or
exists(int index | this.isStarArgs(index) and result = "*args at " + index)
or
this.isDictSplat() and result = "**"
}
}
/** Holds if arguments at position `apos` match parameters at position `ppos`. */
predicate parameterMatch(ParameterPosition ppos, ArgumentPosition apos) {
ppos.isSelf() and apos.isSelf()
or
ppos.isLambdaSelf() and apos.isLambdaSelf()
or
exists(int index | ppos.isPositional(index) and apos.isPositional(index))
or
exists(int index1, int index2 |
ppos.isPositionalLowerBound(index1) and apos.isPositional(index2) and index2 >= index1
)
or
exists(string name | ppos.isKeyword(name) and apos.isKeyword(name))
or
exists(int index | ppos.isStarArgs(index) and apos.isStarArgs(index))
or
exists(int paramIndex, int argIndex | argIndex >= paramIndex |
ppos.isSynthStarArgsElement(paramIndex) and apos.isPositional(argIndex)
)
or
ppos.isDictSplat() and apos.isDictSplat()
or
ppos.isSynthDictSplat() and apos.isDictSplat()
}
// =============================================================================
// Helper predicates
// =============================================================================
/**
* Holds if the function `func` is a staticmethod -- either by having a
* `@staticmethod` decorator or by convention
* (like a `__new__` method on a class is a classmethod even without the decorator).
*/
predicate isStaticmethod(Function func) {
exists(NameNode id | id.getId() = "staticmethod" and id.isGlobal() |
func.getADecorator() = id.getNode()
)
}
/**
* Holds if the function `func` is a classmethod -- either by having a
* `@classmethod` decorator or by convention
* (like a `__new__` method on a class is a classmethod even without the decorator).
*/
predicate isClassmethod(Function func) {
exists(NameNode id | id.getId() = "classmethod" and id.isGlobal() |
func.getADecorator() = id.getNode()
)
or
exists(Class cls |
cls.getAMethod() = func and
func.getName() in [
"__new__", // https://docs.python.org/3.10/reference/datamodel.html#object.__new__
"__init_subclass__", // https://docs.python.org/3.10/reference/datamodel.html#object.__init_subclass__
"__class_getitem__", // https://docs.python.org/3.10/reference/datamodel.html#object.__class_getitem__
]
)
}
/** Holds if the function `func` has a `property` decorator. */
predicate hasPropertyDecorator(Function func) {
exists(NameNode id | id.getId() = "property" and id.isGlobal() |
func.getADecorator() = id.getNode()
)
}
/**
* Holds if the function `func` has a `contextlib.contextmanager`.
*/
predicate hasContextmanagerDecorator(Function func) {
exists(ControlFlowNode contextmanager |
contextmanager.(NameNode).getId() = "contextmanager" and contextmanager.(NameNode).isGlobal()
or
contextmanager.(AttrNode).getObject("contextmanager").(NameNode).getId() = "contextlib"
|
func.getADecorator() = contextmanager.getNode()
)
}
// =============================================================================
// Callables
// =============================================================================
/** A callable defined in library code, identified by a unique string. */
abstract class LibraryCallable extends string {
bindingset[this]
LibraryCallable() { any() }
/** Gets a call to this library callable. */
abstract CallCfgNode getACall();
/** Same as `getACall` but without referring to the call graph or API graph. */
CallCfgNode getACallSimple() { none() }
/** Gets a data-flow node, where this library callable is used as a call-back. */
abstract ArgumentNode getACallback();
}
newtype TDataFlowCallable =
/**
* Is used as the target for all calls: plain functions, lambdas, methods on classes,
* class instantiations, and (in the future) special methods.
*/
TFunction(Function func) {
// Functions with an explicit definition
exists(func.getDefinition())
or
// For generators/list-comprehensions we create a synthetic function.
exists(Comp c | c.getFunction() = func)
} or
/** see QLDoc for `DataFlowModuleScope` for why we need this. */
TModule(Module m) or
TLibraryCallable(LibraryCallable callable)
/** A callable. */
abstract class DataFlowCallable extends TDataFlowCallable {
/** Gets a textual representation of this element. */
abstract string toString();
/** Gets qualified name for this callable, if any. */
abstract string getQualifiedName();
/** Gets the scope of this callable */
abstract Scope getScope();
/** Gets the parameter at position `ppos`, if any. */
abstract ParameterNode getParameter(ParameterPosition ppos);
/** Gets the underlying library callable, if any. */
LibraryCallable asLibraryCallable() { this = TLibraryCallable(result) }
/** Gets the location of this dataflow callable. */
abstract Location getLocation();
}
/** A callable function. */
abstract class DataFlowFunction extends DataFlowCallable, TFunction {
Function func;
DataFlowFunction() {
this = TFunction(func) and
// TODO: Handle @property decorators
not hasPropertyDecorator(func)
}
override string toString() { result = func.toString() }
override string getQualifiedName() { result = func.getQualifiedName() }
override Function getScope() { result = func }
override Location getLocation() { result = func.getLocation() }
/** Gets the positional parameter offset, to take into account self/cls parameters. */
int positionalOffset() { result = 0 }
override ParameterNode getParameter(ParameterPosition ppos) {
// Do not handle lower bound positions (such as `[1..]`) here
// they are handled by parameter matching and would create
// inconsistencies here as multiple parameters could match such a position.
exists(int index | ppos.isPositional(index) |
result.getParameter() = func.getArg(index + this.positionalOffset())
)
or
exists(string name | ppos.isKeyword(name) | result.getParameter() = func.getArgByName(name))
or
// `*args`
exists(int index |
(
ppos.isStarArgs(index) and
result.getParameter() = func.getVararg()
or
ppos.isSynthStarArgsElement(index) and
result = TSynthStarArgsElementParameterNode(this)
)
|
// a `*args` parameter comes after the last positional parameter. We need to take
// self parameter into account, so for
// `def func(foo, bar, *args)` it should be index 2 (pos-param-count == 2)
// `class A: def func(self, foo, bar, *args)` it should be index 2 (pos-param-count - 1 == 3 - 1)
index = func.getPositionalParameterCount() - this.positionalOffset()
or
// no positional argument
not exists(func.getArg(_)) and index = 0
)
or
// `**kwargs`
ppos.isDictSplat() and result.getParameter() = func.getKwarg()
or
ppos.isSynthDictSplat() and result = TSynthDictSplatParameterNode(this)
}
}
/** A plain (non-method) function. */
class DataFlowPlainFunction extends DataFlowFunction {
DataFlowPlainFunction() { not this instanceof DataFlowMethod }
}
/** A method. */
class DataFlowMethod extends DataFlowFunction {
Class cls;
DataFlowMethod() { cls.getAMethod() = func }
/** Gets the class this function is a method of. */
Class getClass() { result = cls }
override int positionalOffset() { result = 1 }
override ParameterNode getParameter(ParameterPosition ppos) {
ppos.isSelf() and result.getParameter() = func.getArg(0)
or
result = super.getParameter(ppos)
}
}
/** A classmethod. */
class DataFlowClassmethod extends DataFlowMethod {
DataFlowClassmethod() { isClassmethod(func) }
}
/** A staticmethod. */
class DataFlowStaticmethod extends DataFlowMethod, DataFlowFunction {
DataFlowStaticmethod() { isStaticmethod(func) }
override int positionalOffset() { result = 0 }
override ParameterNode getParameter(ParameterPosition ppos) {
result = DataFlowFunction.super.getParameter(ppos)
}
}
/**
* A module. This is not actually a callable, but we need this so a
* `ModuleVariableNode` have an enclosing callable.
*/
class DataFlowModuleScope extends DataFlowCallable, TModule {
Module mod;
DataFlowModuleScope() { this = TModule(mod) }
override string toString() { result = mod.toString() }
override string getQualifiedName() { result = mod.getName() }
override Module getScope() { result = mod }
override Location getLocation() { result = mod.getLocation() }
override ParameterNode getParameter(ParameterPosition ppos) { none() }
}
class LibraryCallableValue extends DataFlowCallable, TLibraryCallable {
LibraryCallable callable;
LibraryCallableValue() { this = TLibraryCallable(callable) }
override string toString() { result = "LibraryCallableValue: " + callable.toString() }
override string getQualifiedName() { result = callable.toString() }
/** Gets a data-flow node, where this library callable is used as a call-back. */
ArgumentNode getACallback() { result = callable.getACallback() }
override Scope getScope() { none() }
override ParameterNode getParameter(ParameterPosition ppos) { none() }
override LibraryCallable asLibraryCallable() { result = callable }
override Location getLocation() { none() }
}
// =============================================================================
// Type trackers used to resolve calls.
// =============================================================================
/** Gets a call to `type`. */
private CallCfgNode getTypeCall() {
exists(NameNode id | id.getId() = "type" and id.isGlobal() |
result.getFunction().asCfgNode() = id
)
}
/** Gets a call to `super`. */
private CallCfgNode getSuperCall() {
// While it is possible to reference super and call it later, it's almost never done in
// practice. From looking at top 1000 projects, there were a few uses around mocking (see
// link below), but otherwise only 2 edgecases. Overall it seems ok to ignore this complexity.
//
// https://github.com/python/cpython/blob/18b1782192f85bd26db89f5bc850f8bee4247c1a/Lib/unittest/mock.py#L48-L50
exists(NameNode id | id.getId() = "super" and id.isGlobal() |
result.getFunction().asCfgNode() = id
)
}
/**
* Holds if the file `f` should be ignored when computing the call-graph.
*
* We currently see a performance problem when analyzing the `sympy` PyPI package,
* which can be part of the database when dependencies are installed and extracted.
* From what we can understand, SymPy is using Python in a exotic way, so the fact that
* our analysis currently does not handle this project has nothing to say about our
* ability to handle normal Python code. Furthermore, SymPy does not look to be relevant
* in a security context, so we should not lose out on any security results by doing
* this.
*/
private predicate ignoreForCallGraph(File f) {
f.getAbsolutePath().matches("%/site-packages/sympy/%")
}
private module TrackFunctionInput implements CallGraphConstruction::Simple::InputSig {
class State = Function;
predicate start(Node start, Function func) {
start.asExpr() = func.getDefinition()
or
// when a function is decorated, it's the result of the (last) decorator call that
// is used
start.asExpr() = func.getDefinition().(FunctionExpr).getADecoratorCall()
}
predicate filter(Node n) { ignoreForCallGraph(n.getLocation().getFile()) }
}
/**
* Gets a reference to the function `func`.
*/
Node functionTracker(Function func) {
CallGraphConstruction::Simple::Make<TrackFunctionInput>::track(func)
.(LocalSourceNode)
.flowsTo(result)
}
private module TrackClassInput implements CallGraphConstruction::Simple::InputSig {
class State = Class;
predicate start(Node start, Class cls) {
start.asExpr() = cls.getParent()
or
// when a class is decorated, it's the result of the (last) decorator call that
// is used
start.asExpr() = cls.getParent().getADecoratorCall()
or
// `type(obj)`, where obj is an instance of this class
start = getTypeCall() and
start.(CallCfgNode).getArg(0) = classInstanceTracker(cls)
}
predicate filter(Node n) {
ignoreForCallGraph(n.getLocation().getFile())
or
n.(ParameterNodeImpl).isParameterOf(_, any(ParameterPosition pp | pp.isSelf()))
}
}
/**
* Gets a reference to the class `cls`.
*/
Node classTracker(Class cls) {
CallGraphConstruction::Simple::Make<TrackClassInput>::track(cls).(LocalSourceNode).flowsTo(result)
}
private module TrackClassInstanceInput implements CallGraphConstruction::Simple::InputSig {
class State = Class;
predicate start(Node start, Class cls) {
exists(Annotation ann |
ann = classTracker(cls).asExpr() and
start.asExpr() = ann.getAnnotatedExpression()
)
or
resolveClassCall(start.(CallCfgNode).asCfgNode(), cls)
or
// result of `super().__new__` as used in a `__new__` method implementation
exists(Class classUsedInSuper |
fromSuperNewCall(start.(CallCfgNode).asCfgNode(), classUsedInSuper, _, _) and
classUsedInSuper = getADirectSuperclass*(cls)
)
}
predicate filter(Node n) {
ignoreForCallGraph(n.getLocation().getFile())
or
n.(ParameterNodeImpl).isParameterOf(_, any(ParameterPosition pp | pp.isSelf()))
}
}
/**
* Gets a reference to an instance of the class `cls`.
*/
Node classInstanceTracker(Class cls) {
CallGraphConstruction::Simple::Make<TrackClassInstanceInput>::track(cls)
.(LocalSourceNode)
.flowsTo(result)
}
private module TrackSelfInput implements CallGraphConstruction::Simple::InputSig {
class State = Class;
predicate start(Node start, Class classWithMethod) {
exists(Function func |
func = classWithMethod.getAMethod() and
not isStaticmethod(func) and
not isClassmethod(func)
|
start.asExpr() = func.getArg(0)
)
}
predicate filter(Node n) {
ignoreForCallGraph(n.getLocation().getFile())
or
n.(ParameterNodeImpl).isParameterOf(_, any(ParameterPosition pp | pp.isSelf()))
}
}
/**
* Gets a reference to the `self` argument of a method on class `classWithMethod`.
* The method cannot be a `staticmethod` or `classmethod`.
*/
Node selfTracker(Class classWithMethod) {
CallGraphConstruction::Simple::Make<TrackSelfInput>::track(classWithMethod)
.(LocalSourceNode)
.flowsTo(result)
}
private module TrackClsArgumentInput implements CallGraphConstruction::Simple::InputSig {
class State = Class;
predicate start(Node start, Class classWithMethod) {
exists(Function func |
func = classWithMethod.getAMethod() and
isClassmethod(func)
|
start.asExpr() = func.getArg(0)
)
or
// type(self)
start = getTypeCall() and
start.(CallCfgNode).getArg(0) = selfTracker(classWithMethod)
}
predicate filter(Node n) {
ignoreForCallGraph(n.getLocation().getFile())
or
n.(ParameterNodeImpl).isParameterOf(_, any(ParameterPosition pp | pp.isSelf()))
}
}
/**
* Gets a reference to the enclosing class `classWithMethod` from within one of its
* methods, either through the `cls` argument from a `classmethod` or from `type(self)`
* from a normal method.
*/
Node clsArgumentTracker(Class classWithMethod) {
CallGraphConstruction::Simple::Make<TrackClsArgumentInput>::track(classWithMethod)
.(LocalSourceNode)
.flowsTo(result)
}
private module TrackSuperCallNoArgumentInput implements CallGraphConstruction::Simple::InputSig {
class State = Function;
predicate start(Node start, Function func) {
not isStaticmethod(func) and
exists(CallCfgNode call | start = call |
call = getSuperCall() and
not exists(call.getArg(_)) and
call.getScope() = func
)
}
predicate filter(Node n) {
ignoreForCallGraph(n.getLocation().getFile())
or
n.(ParameterNodeImpl).isParameterOf(_, any(ParameterPosition pp | pp.isSelf()))
}
}
/**
* Gets a reference to the result of calling `super` without any argument, where the
* call happened in the method `func` (either a method or a classmethod).
*/
Node superCallNoArgumentTracker(Function func) {
CallGraphConstruction::Simple::Make<TrackSuperCallNoArgumentInput>::track(func)
.(LocalSourceNode)
.flowsTo(result)
}
private module TrackSuperCallTwoArgumentInput implements CallGraphConstruction::Simple::InputSig {
additional predicate superCall(CallCfgNode call, Class cls, Node obj) {
call = getSuperCall() and
call.getArg(0) = classTracker(cls) and
call.getArg(1) = obj
}
class State = CallCfgNode;
predicate start(Node start, CallCfgNode call) {
superCall(call, _, _) and
start = call
}
predicate filter(Node n) {
ignoreForCallGraph(n.getLocation().getFile())
or
n.(ParameterNodeImpl).isParameterOf(_, any(ParameterPosition pp | pp.isSelf()))
}
}
/**
* Gets a reference to the result of calling `super` with 2 arguments, where the
* first is a reference to the class `cls`, and the second argument is `obj`.
*/
Node superCallTwoArgumentTracker(Class cls, Node obj) {
exists(CallCfgNode call |
TrackSuperCallTwoArgumentInput::superCall(call, cls, obj) and
CallGraphConstruction::Simple::Make<TrackSuperCallTwoArgumentInput>::track(call)
.(LocalSourceNode)
.flowsTo(result)
)
}
// =============================================================================
// MRO
// =============================================================================
/**
* Gets a direct superclass of the argument `cls`, if any.
*
* For `A` with the class definition `class A(B, C)` it will have results `B` and `C`.
*/
Class getADirectSuperclass(Class cls) { cls.getABase() = classTracker(result).asExpr() }
/**
* Gets a direct subclass of the argument `cls`, if any.
*
*For `B` with the class definition `class A(B)` it will have result `A`.
*/
Class getADirectSubclass(Class cls) { cls = getADirectSuperclass(result) }
/**
* Gets a class that, from an approximated MRO calculation, might be the next class used
* for member-lookup when `super().attr` is used inside the class `cls`.
*
* In the example below, with `cls=B`, this predicate will have `A` and `C` as results.
* ```py
* class A: pass
* class B(A): pass
* class C(A): pass
* class D(B, C): pass
* ```
*
* NOTE: This approximation does not handle all cases correctly, and in the example
* below, with `cls=A` will not have any results, although it should include `Y`.
*
* ```py
* class A: pass
* class B(A): pass
* class X: pass
* class Y(X): pass
* class Ex(B, Y): pass
* ```
*
* NOTE for debugging the results of this predicate: Since a class can be part of
* multiple MROs, results from this predicate might only be valid in some, but not all,
* inheritance chains: This is the case with the result `C` for `cls=B` in the first
* example -- if `B` and `C` are defined in the same file, but `D` in a different file,
* this might make the results from this predicate difficult to comprehend at first.
*
* For more info on the C3 MRO used in Python see:
* - https://docs.python.org/3/glossary.html#term-method-resolution-order
* - https://www.python.org/download/releases/2.3/mro/
* - https://opendylan.org/_static/c3-linearization.pdf
*/
private Class getNextClassInMro(Class cls) {
// class A(B, ...):
// `B` must be the next class after `A` in the MRO for A.
cls.getBase(0) = classTracker(result).asExpr()
or
// class A(B, C, D):
// - `C` could be the next class after `B` in MRO.
// - `D` could be the next class after `C` in MRO.
exists(Class sub, int i |
sub.getBase(i) = classTracker(cls).asExpr() and
sub.getBase(i + 1) = classTracker(result).asExpr() and
not result = cls
)
// There are three important properties for MRO computed with C3 in Python:
//
// 1) monotonicity: if C1 precedes C2 in the MRO of C, then C1 precedes C2 in the MRO
// of any subclass of C.
// 2) local precedence ordering: if C1 precedes C2 in the list of superclasses for C,
// they will keep the same order in the MRO for C (and due to monotonicity, any
// subclass).
// 3) consistency with the extended precedence graph: if A and B (that are part of the
// class hierarchy of C) do not have a subclass/superclass relationship on their
// own, the ordering of A and B in the MRO of C will be determined by the local
// precedence ordering in the classes that use both A and B, either directly or
// through a subclass. (see paper for more details)
//
// Note that not all class hierarchies are allowed with C3, see the Python 2.3 article
// for examples.
}
/**
* Gets a potential definition of the function `name` according to our approximation of
* MRO for the class `cls` (see `getNextClassInMro` for more information).
*/
Function findFunctionAccordingToMro(Class cls, string name) {
result = cls.getAMethod() and
result.getName() = name
or
not class_has_method(cls, name) and
result = findFunctionAccordingToMro(getNextClassInMro(cls), name)
}
/**
* Join-order helper for `findFunctionAccordingToMro` and `findFunctionAccordingToMroKnownStartingClass`.
*/
pragma[nomagic]
private predicate class_has_method(Class cls, string name) { cls.getAMethod().getName() = name }
/**
* Gets a class that, from an approximated MRO calculation, might be the next class
* after `cls` in the MRO for `startingClass`.
*
* Note: this is almost the same as `getNextClassInMro`, except we know the
* `startingClass`, which can give slightly more precise results.
*
* See QLDoc for `getNextClassInMro`.
*/
Class getNextClassInMroKnownStartingClass(Class cls, Class startingClass) {
cls.getBase(0) = classTracker(result).asExpr() and
cls = getADirectSuperclass*(startingClass)
or
exists(Class sub, int i | sub = getADirectSuperclass*(startingClass) |
sub.getBase(i) = classTracker(cls).asExpr() and
sub.getBase(i + 1) = classTracker(result).asExpr() and
not result = cls
)
}
/**
* Gets a potential definition of the function `name` of the class `cls` according to our approximation of
* MRO for the class `startingCls` (see `getNextClassInMroKnownStartingClass` for more information).
*
* Note: this is almost the same as `findFunctionAccordingToMro`, except we know the
* `startingClass`, which can give slightly more precise results.
*/
Function findFunctionAccordingToMroKnownStartingClass(Class cls, Class startingClass, string name) {
result = cls.getAMethod() and
result.getName() = name and
cls = getADirectSuperclass*(startingClass)
or
not class_has_method(cls, name) and
result =
findFunctionAccordingToMroKnownStartingClass(getNextClassInMroKnownStartingClass(cls,
startingClass), startingClass, name)
}
/**
* Gets a potential definition of the function `name` according to our approximation of
* MRO for the class `startingCls` (see `getNextClassInMroKnownStartingClass` for more information).
*
* Note: this is almost the same as `findFunctionAccordingToMro`, except we know the
* `startingClass`, which can give slightly more precise results.
*/
pragma[inline]
Function findFunctionAccordingToMroKnownStartingClass(Class startingClass, string name) {
result = findFunctionAccordingToMroKnownStartingClass(startingClass, startingClass, name)
}
// =============================================================================
// attribute trackers
// =============================================================================
private module TrackAttrReadInput implements CallGraphConstruction::Simple::InputSig {
class State = AttrRead;
predicate start(Node start, AttrRead attr) {
start = attr and
pragma[only_bind_into](attr.getObject()) in [
classTracker(_), classInstanceTracker(_), selfTracker(_), clsArgumentTracker(_),
superCallNoArgumentTracker(_), superCallTwoArgumentTracker(_, _)
]
}
predicate filter(Node n) {
ignoreForCallGraph(n.getLocation().getFile())
or
n.(ParameterNodeImpl).isParameterOf(_, any(ParameterPosition pp | pp.isSelf()))
}
}
/** Gets a reference to the attribute read `attr` */
Node attrReadTracker(AttrRead attr) {
CallGraphConstruction::Simple::Make<TrackAttrReadInput>::track(attr)
.(LocalSourceNode)
.flowsTo(result)
}
// =============================================================================
// call and argument resolution
// =============================================================================
newtype TCallType =
/** A call to a function that is not part of a class. */
CallTypePlainFunction() or
/**
* A call to an "normal" method on a class instance.
* Does not include staticmethods or classmethods.
*/
CallTypeNormalMethod() or
/** A call to a staticmethod. */
CallTypeStaticMethod() or
/** A call to a classmethod. */
CallTypeClassMethod() or
/**
* A call to method on a class, not going through an instance method, such as
*
* ```py
* class Foo:
* def method(self, arg):
* pass
*
* foo = Foo()
* Foo.method(foo, 42)
* ```
*/
CallTypeMethodAsPlainFunction() or
/** A call to a class. */
CallTypeClass() or
/** A call on a class instance, that goes to the `__call__` method of the class */
CallTypeClassInstanceCall()
/** A type of call. */
class CallType extends TCallType {
string toString() {
this instanceof CallTypePlainFunction and
result = "CallTypePlainFunction"
or
this instanceof CallTypeNormalMethod and
result = "CallTypeNormalMethod"
or
this instanceof CallTypeStaticMethod and
result = "CallTypeStaticMethod"
or
this instanceof CallTypeClassMethod and
result = "CallTypeClassMethod"
or
this instanceof CallTypeMethodAsPlainFunction and
result = "CallTypeMethodAsPlainFunction"
or
this instanceof CallTypeClass and
result = "CallTypeClass"
or
this instanceof CallTypeClassInstanceCall and
result = "CallTypeClassInstanceCall"
}
}
// -------------------------------------
// method call resolution
// -------------------------------------
private module MethodCalls {
/**
* Holds if `call` is a call to a method `target` on an instance or class, where the
* instance or class is not derived from an implicit `self`/`cls` argument to a method
* -- for that, see `callWithinMethodImplicitSelfOrCls`.
*
* It is found by making an attribute read `attr` with the name `functionName` on a
* reference to the class `cls`, or to an instance of the class `cls`. The reference the
* attribute-read is made on is `self`.
*/
pragma[nomagic]
private predicate directCall(
CallNode call, Function target, string functionName, Class cls, AttrRead attr, Node self
) {
target = findFunctionAccordingToMroKnownStartingClass(cls, functionName) and
directCall_join(call, functionName, cls, attr, self)
}
/** Extracted to give good join order */
pragma[nomagic]
private predicate directCall_join(
CallNode call, string functionName, Class cls, AttrRead attr, Node self
) {
call.getFunction() = attrReadTracker(attr).asCfgNode() and
attr.accesses(self, functionName) and
self in [classTracker(cls), classInstanceTracker(cls)]
}
/**
* Holds if `call` is a call to a method `target` derived from an implicit `self`/`cls`
* argument to a method within the class `classWithMethod`.
*
* It is found by making an attribute read `attr` with the name `functionName` on a
* reference to an implicit `self`/`cls` argument. The reference the attribute-read is
* made on is `self`.
*/
pragma[nomagic]
private predicate callWithinMethodImplicitSelfOrCls(
CallNode call, Function target, string functionName, Class classWithMethod, AttrRead attr,
Node self
) {
target = findFunctionAccordingToMro(getADirectSubclass*(classWithMethod), functionName) and
callWithinMethodImplicitSelfOrCls_join(call, functionName, classWithMethod, attr, self)
}
/** Extracted to give good join order */
pragma[nomagic]
private predicate callWithinMethodImplicitSelfOrCls_join(
CallNode call, string functionName, Class classWithMethod, AttrRead attr, Node self
) {
call.getFunction() = attrReadTracker(attr).asCfgNode() and
attr.accesses(self, functionName) and
self in [clsArgumentTracker(classWithMethod), selfTracker(classWithMethod)]
}
/**
* Like `fromSuper`, but only for `__new__`, and without requirement for being able to
* resolve the call to a known target (since the only super class might be the
* builtin `object`, so we never have the implementation of `__new__` in the DB).
*/
predicate fromSuperNewCall(CallNode call, Class classUsedInSuper, AttrRead attr, Node self) {
fromSuper_join(call, "__new__", classUsedInSuper, attr, self) and
self in [classTracker(_), clsArgumentTracker(_)]
}
/**
* Holds if `call` is a call to a method `target`, derived from a use of `super`, either
* as:
*
* (1) `super(SomeClass, obj)`, where the first argument is a reference to the class
* `classUsedInSuper`, and the second argument is `self`.
*
* (2) `super()`. This implicit version can only happen within a method in a class.
* The implicit first argument is the class the call happens within `classUsedInSuper`.
* The implicit second argument is the `self`/`cls` parameter of the method this happens
* within.
*
* The method call is found by making an attribute read `attr` with the name
* `functionName` on the return value from the `super` call.
*/
pragma[nomagic]
predicate fromSuper(
CallNode call, Function target, string functionName, Class classUsedInSuper, AttrRead attr,
Node self
) {
target = findFunctionAccordingToMro(getNextClassInMro(classUsedInSuper), functionName) and
fromSuper_join(call, functionName, classUsedInSuper, attr, self)
}
/** Extracted to give good join order */
pragma[nomagic]
private predicate fromSuper_join(
CallNode call, string functionName, Class classUsedInSuper, AttrRead attr, Node self
) {
call.getFunction() = attrReadTracker(attr).asCfgNode() and
(
exists(Function func |
attr.accesses(superCallNoArgumentTracker(func), functionName) and
// Requiring enclosing scope of function to be a class is a little too
// restrictive, since it is possible to use `super()` in a function defined inside
// the method, where the first argument to the nested-function will be used as
// implicit self argument. In practice I don't expect this to be a problem, and we
// did not support this with points-to either.
func.getEnclosingScope() = classUsedInSuper and
self.(ParameterNode).getParameter() = func.getArg(0)
)
or
attr.accesses(superCallTwoArgumentTracker(classUsedInSuper, self), functionName)
)
}
predicate resolveMethodCall(CallNode call, Function target, CallType type, Node self) {
(
directCall(call, target, _, _, _, self)
or
callWithinMethodImplicitSelfOrCls(call, target, _, _, _, self)
or
fromSuper(call, target, _, _, _, self)
) and
(
// normal method call
type instanceof CallTypeNormalMethod and
(
self = classInstanceTracker(_)
or
self = selfTracker(_)
) and
not isStaticmethod(target) and
not isClassmethod(target)
or
// method as plain function call
type instanceof CallTypeMethodAsPlainFunction and
self = classTracker(_) and
not isStaticmethod(target) and
not isClassmethod(target)
or
// staticmethod call
type instanceof CallTypeStaticMethod and
isStaticmethod(target)
or
// classmethod call
type instanceof CallTypeClassMethod and
isClassmethod(target)
)
}
}
import MethodCalls
// -------------------------------------
// class call resolution
// -------------------------------------
/**
* Holds when `call` is a call to the class `cls`.
*
* NOTE: We have this predicate mostly to be able to compare with old point-to
* call-graph resolution. So it could be removed in the future.
*/
predicate resolveClassCall(CallNode call, Class cls) {
call.getFunction() = classTracker(cls).asCfgNode()
or
// `cls()` inside a classmethod (which also contains `type(self)()` inside a method)
exists(Class classWithMethod |
call.getFunction() = clsArgumentTracker(classWithMethod).asCfgNode() and
getADirectSuperclass*(cls) = classWithMethod
)
}
/**
* Gets a function, either `__init__` or `__new__` as specified by `funcName`, that will
* be invoked when `cls` is constructed -- where the function lookup is based on our MRO
* calculation.
*/
Function invokedFunctionFromClassConstruction(Class cls, string funcName) {
// as described in https://docs.python.org/3/reference/datamodel.html#object.__new__
// __init__ will only be called when __new__ returns an instance of the class (which
// is not a requirement). However, for simplicity, we assume that __init__ will always
// be called.
funcName in ["__init__", "__new__"] and
result = findFunctionAccordingToMroKnownStartingClass(cls, funcName)
}
/**
* Holds when `call` is a call on a class instance, that goes to the `__call__` method
* of the class.
*
* See https://docs.python.org/3/reference/datamodel.html#object.__call__
*/
predicate resolveClassInstanceCall(CallNode call, Function target, Node self) {
exists(Class cls |
call.getFunction() = classInstanceTracker(cls).asCfgNode() and
target = findFunctionAccordingToMroKnownStartingClass(cls, "__call__")
or
call.getFunction() = selfTracker(cls).asCfgNode() and
target = findFunctionAccordingToMro(getADirectSubclass*(cls), "__call__")
|
self.asCfgNode() = call.getFunction()
)
}
// -------------------------------------
// overall call resolution
// -------------------------------------
/**
* Holds if `call` is a call to the `target`, with call-type `type`.
*/
cached
predicate resolveCall(CallNode call, Function target, CallType type) {
Stages::DataFlow::ref() and
(
type instanceof CallTypePlainFunction and
call.getFunction() = functionTracker(target).asCfgNode() and
not exists(Class cls | cls.getAMethod() = target)
or
resolveMethodCall(call, target, type, _)
or
type instanceof CallTypeClass and
exists(Class cls |
resolveClassCall(call, cls) and
target = invokedFunctionFromClassConstruction(cls, _)
)
or
type instanceof CallTypeClassInstanceCall and
resolveClassInstanceCall(call, target, _)
)
}
// =============================================================================
// Argument resolution
// =============================================================================
/**
* Holds if the argument of `call` at position `apos` is `arg`. This is just a helper
* predicate that maps ArgumentPositions to the arguments of the underlying `CallNode`.
*/
cached
predicate normalCallArg(CallNode call, Node arg, ArgumentPosition apos) {
exists(int index |
apos.isPositional(index) and
arg.asCfgNode() = call.getArg(index)
)
or
exists(string name |
apos.isKeyword(name) and
arg.asCfgNode() = call.getArgByName(name)
)
or
// the first `*args`
exists(int index |
apos.isStarArgs(index) and
arg.asCfgNode() = call.getStarArg() and
// since `CallNode.getArg` doesn't include `*args`, we need to drop to the AST level
// to get the index. Notice that we only use the AST for getting the index, so we
// don't need to check for dominance in regards to splitting.
call.getStarArg().getNode() = call.getNode().getPositionalArg(index).(Starred).getValue()
)
or
apos.isDictSplat() and
(
arg.asCfgNode() = call.getKwargs()
or
arg = TSynthDictSplatArgumentNode(call)
)
}
/**
* Gets the argument `arg` of `call` at position `apos`, if any. Requires that we can
* resolve `call` to `target` with CallType `type`.
*
* It might seem like it's enough to know the CallType to resolve arguments. The reason
* we also need the `target`, is to avoid cross-talk. In the example below, assuming
* that `Foo` and `Bar` define their own `meth` methods, we might end up passing _both_
* `foo` and `bar` to both `Foo.meth` and `Bar.meth`, which is wrong. Since the
* attribute access uses the same name, we need to also distinguish on the resolved
* target, to know which of the two objects to pass as the self argument.
*
*
* ```py
* foo = Foo()
* bar = Bar()
* if cond:
* func = foo.meth
* else:
* func = bar.meth
* func(42)
* ```
*
* Note: If `Bar.meth` and `Foo.meth` resolves to the same function, we will end up
* sending both `self` arguments to that function, which is by definition the right thing to do.
*
* ### Bound methods
*
* For bound methods, such as `bm = x.m; bm()`, it's a little unclear whether we should
* still use the object in the attribute lookup (`x.m`) as the self argument in the
* call (`bm()`). We currently do this, but there might also be cases where we don't
* want to do this.
*
* In the example below, we want to clear taint from the list before it reaches the
* sink, but because we don't have a use of `l` in the `clear()` call, we currently
* don't have any way to achieve our goal. (Note that this is a contrived example)
*
* ```py
* l = list()
* clear = l.clear
* l.append(tainted)
* clear()
* sink(l)
* ```
*
* To make the above even worse, bound-methods have a `__self__` property that refers to
* the object of the bound-method, so we can re-write the code as:
*
* ```py
* l = list()
* clear = l.clear
* clear.__self__.append(tainted)
* clear()
* sink(l)
* ```
*
* One idea to solve this is to track the object in a synthetic data-flow node every
* time the bound method is used, such that the `clear()` call would essentially be
* translated into `l.clear()`, and we can still have use-use flow.
*/
cached
predicate getCallArg(CallNode call, Function target, CallType type, Node arg, ArgumentPosition apos) {
Stages::DataFlow::ref() and
resolveCall(call, target, type) and
(
type instanceof CallTypePlainFunction and
normalCallArg(call, arg, apos)
or
// self argument for normal method calls -- see note above about bound methods
type instanceof CallTypeNormalMethod and
apos.isSelf() and
resolveMethodCall(call, target, type, arg) and
// the dataflow library has a requirement that arguments and calls are in same enclosing
// callable. This requirement would be broken if we used `my_obj` as the self
// argument in the `f()` call in the example below:
// ```py
// def call_func(f):
// f()
//
// call_func(my_obj.some_method)
// ```
exists(CfgNode cfgNode | cfgNode.getNode() = call | sameEnclosingCallable(cfgNode, arg))
or
// cls argument for classmethod calls -- see note above about bound methods
type instanceof CallTypeClassMethod and
apos.isSelf() and
resolveMethodCall(call, target, type, arg) and
(arg = classTracker(_) or arg = clsArgumentTracker(_)) and
// dataflow lib has requirement that arguments and calls are in same enclosing callable.
exists(CfgNode cfgNode | cfgNode.getNode() = call | sameEnclosingCallable(cfgNode, arg))
or
// normal arguments for method calls
(
type instanceof CallTypeNormalMethod or
type instanceof CallTypeStaticMethod or
type instanceof CallTypeClassMethod
) and
normalCallArg(call, arg, apos)
or
// method as plain function call.
//
// argument index 0 of call has position self (and MUST be given as positional
// argument in call). This also means that call-arguments are shifted by 1, such
// that argument index 1 of call has argument position 0
type instanceof CallTypeMethodAsPlainFunction and
(
apos.isSelf() and arg.asCfgNode() = call.getArg(0)
or
not apos.isPositional(_) and normalCallArg(call, arg, apos)
or
exists(ArgumentPosition normalPos, int index |
apos.isPositional(index - 1) and
normalPos.isPositional(index) and
normalCallArg(call, arg, normalPos)
)
)
or
// class call
type instanceof CallTypeClass and
(
// only pass synthetic node for created object to __init__, and not __new__ since
// __new__ is a classmethod.
target = invokedFunctionFromClassConstruction(_, "__init__") and
apos.isSelf() and
arg = TSyntheticPreUpdateNode(call)
or
normalCallArg(call, arg, apos)
)
or
// call on class instance, which goes to `__call__` method
type instanceof CallTypeClassInstanceCall and
(
apos.isSelf() and
resolveClassInstanceCall(call, target, arg)
or
normalCallArg(call, arg, apos)
)
)
}
/**
* join-order helper for getCallArg, since otherwise we would do cartesian product of
* the enclosing callables
*/
bindingset[node1, node2]
pragma[inline_late]
private predicate sameEnclosingCallable(Node node1, Node node2) {
node1.getEnclosingCallable() = node2.getEnclosingCallable()
}
// =============================================================================
// DataFlowCall
// =============================================================================
newtype TDataFlowCall =
TNormalCall(CallNode call, Function target, CallType type) { resolveCall(call, target, type) } or
/** A call to the generated function inside a comprehension */
TComprehensionCall(Comp c) or
TPotentialLibraryCall(CallNode call) or
/** A synthesized call inside a summarized callable */
TSummaryCall(
FlowSummaryImpl::Public::SummarizedCallable c, FlowSummaryImpl::Private::SummaryNode receiver
) {
FlowSummaryImpl::Private::summaryCallbackRange(c, receiver)
}
/** A call that is taken into account by the global data flow computation. */
abstract class DataFlowCall extends TDataFlowCall {
/** Gets a textual representation of this element. */
abstract string toString();
/** Get the callable to which this call goes. */
abstract DataFlowCallable getCallable();
/** Gets the argument at position `apos`, if any. */
abstract ArgumentNode getArgument(ArgumentPosition apos);
/** Get the control flow node representing this call, if any. */
abstract ControlFlowNode getNode();
/** Gets the enclosing callable of this call. */
DataFlowCallable getEnclosingCallable() { result = getCallableScope(this.getScope()) }
/** Gets the scope of this node, if any. */
abstract Scope getScope();
/** Gets the location of this dataflow call. */
abstract Location getLocation();
/**
* Holds if this element is at the specified location.
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
) {
this.getLocation().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
}
}
/** A call found in the program source (as opposed to a synthesised call). */
abstract class ExtractedDataFlowCall extends DataFlowCall {
override Location getLocation() { result = this.getNode().getLocation() }
}
/**
* A resolved call in source code with an underlying `CallNode`.
*
* This is considered normal, compared with special calls such as `obj[0]` calling the
* `__getitem__` method on the object. However, this also includes calls that go to the
* `__call__` special method.
*/
class NormalCall extends ExtractedDataFlowCall, TNormalCall {
CallNode call;
Function target;
CallType type;
NormalCall() { this = TNormalCall(call, target, type) }
override string toString() {
// note: if we used toString directly on the CallNode we would get
// `ControlFlowNode for func()`
// but the `ControlFlowNode` part is just clutter, so we go directly to the AST node
// instead.
result = call.getNode().toString()
}
override ControlFlowNode getNode() { result = call }
override Scope getScope() { result = call.getScope() }
override DataFlowCallable getCallable() { result.(DataFlowFunction).getScope() = target }
override ArgumentNode getArgument(ArgumentPosition apos) {
getCallArg(call, target, type, result, apos)
}
/** Gets the `CallType` of this call. */
CallType getCallType() { result = type }
}
/** A call to the generated function inside a comprhension */
class ComprehensionCall extends ExtractedDataFlowCall, TComprehensionCall {
Comp c;
Function target;
ComprehensionCall() {
this = TComprehensionCall(c) and
target = c.getFunction()
}
Comp getComprehension() { result = c }
override string toString() { result = "comprehension call" }
override ControlFlowNode getNode() { result.getNode() = c }
override Scope getScope() { result = c.getScope() }
override DataFlowCallable getCallable() { result.(DataFlowFunction).getScope() = target }
override ArgumentNode getArgument(ArgumentPosition apos) {
result.asExpr() = c.getIterable() and
apos.isPositional(0)
}
override Location getLocation() { result = c.getLocation() }
}
/**
* A potential call to a summarized callable, a `LibraryCallable`.
*
* We currently exclude all resolved calls. This means that a call to, say, `map`, which
* is a `ClassCall`, cannot currently be given a summary.
* We hope to lift this restriction in the future and include all potential calls to summaries
* in this class.
*/
class PotentialLibraryCall extends ExtractedDataFlowCall, TPotentialLibraryCall {
CallNode call;
PotentialLibraryCall() { this = TPotentialLibraryCall(call) }
override string toString() {
// note: if we used toString directly on the CallNode we would get
// `ControlFlowNode for func()`
// but the `ControlFlowNode` part is just clutter, so we go directly to the AST node
// instead.
result = call.getNode().toString()
}
// We cannot refer to a `PotentialLibraryCall` here,
// as that could in turn refer to type tracking.
// This call will be tied to a `PotentialLibraryCall` via
// `viableCallable` when the global data flow is assembled.
override DataFlowCallable getCallable() { none() }
override ArgumentNode getArgument(ArgumentPosition apos) {
normalCallArg(call, result, apos)
or
// potential self argument, from `foo.bar()` -- note that this could also just be a
// module reference, but we really don't have a good way of knowing :|
apos.isSelf() and
result.asCfgNode() = call.getFunction().(AttrNode).getObject()
}
override ControlFlowNode getNode() { result = call }
override Scope getScope() { result = call.getScope() }
}
/**
* A synthesized call inside a callable with a flow summary.
*
* For example, in
* ```python
* map(lambda x: x + 1, [1, 2, 3])
* ```
*
* there is a synthesized call to the lambda argument inside `map`.
*/
class SummaryCall extends DataFlowCall, TSummaryCall {
private FlowSummaryImpl::Public::SummarizedCallable c;
private FlowSummaryImpl::Private::SummaryNode receiver;
SummaryCall() { this = TSummaryCall(c, receiver) }
/** Gets the data flow node that this call targets. */
FlowSummaryImpl::Private::SummaryNode getReceiver() { result = receiver }
override DataFlowCallable getEnclosingCallable() { result.asLibraryCallable() = c }
override Scope getScope() { none() }
override DataFlowCallable getCallable() { none() }
override ArgumentNode getArgument(ArgumentPosition apos) { none() }
override ControlFlowNode getNode() { none() }
override string toString() { result = "[summary] call to " + receiver + " in " + c }
override Location getLocation() { none() }
}
/**
* The value of a parameter at function entry, viewed as a node in a data
* flow graph.
*/
abstract class ParameterNodeImpl extends Node {
/** Gets the `Parameter` this `ParameterNode` represents. */
abstract Parameter getParameter();
/**
* Holds if this node is the parameter of callable `c` at the
* position `ppos`.
*/
predicate isParameterOf(DataFlowCallable c, ParameterPosition ppos) {
this = c.getParameter(ppos)
}
}
/**
* A synthetic parameter representing the values of the variables captured
* by the callable being called. This parameter represents a single object
* where all the values are stored as attributes.
* This is also known as the environment part of a closure.
*
* This is used for tracking flow through captured variables.
*/
class SynthCapturedVariablesParameterNode extends ParameterNodeImpl,
TSynthCapturedVariablesParameterNode
{
private Function callable;
SynthCapturedVariablesParameterNode() { this = TSynthCapturedVariablesParameterNode(callable) }
final Function getCallable() { result = callable }
override Parameter getParameter() { none() }
override predicate isParameterOf(DataFlowCallable c, ParameterPosition pos) {
c = TFunction(callable) and
pos.isLambdaSelf()
}
override Scope getScope() { result = callable }
override Location getLocation() { result = callable.getLocation() }
override string toString() { result = "lambda self in " + callable }
}
/** A parameter for a library callable with a flow summary. */
class SummaryParameterNode extends ParameterNodeImpl, FlowSummaryNode {
SummaryParameterNode() {
FlowSummaryImpl::Private::summaryParameterNode(this.getSummaryNode(), _)
}
private ParameterPosition getPosition() {
FlowSummaryImpl::Private::summaryParameterNode(this.getSummaryNode(), result)
}
override Parameter getParameter() { none() }
override predicate isParameterOf(DataFlowCallable c, ParameterPosition ppos) {
this.getSummarizedCallable() = c.asLibraryCallable() and ppos = this.getPosition()
}
}
/** A data-flow node used to model flow summaries. */
class FlowSummaryNode extends Node, TFlowSummaryNode {
FlowSummaryImpl::Private::SummaryNode getSummaryNode() { this = TFlowSummaryNode(result) }
FlowSummaryImpl::Public::SummarizedCallable getSummarizedCallable() {
result = this.getSummaryNode().getSummarizedCallable()
}
override DataFlowCallable getEnclosingCallable() {
result.asLibraryCallable() = this.getSummarizedCallable()
}
override string toString() { result = this.getSummaryNode().toString() }
// Hack to return "empty location"
deprecated override predicate hasLocationInfo(
string file, int startline, int startcolumn, int endline, int endcolumn
) {
file = "" and
startline = 0 and
startcolumn = 0 and
endline = 0 and
endcolumn = 0
}
}
private class SummaryReturnNode extends FlowSummaryNode, ReturnNode {
private ReturnKind rk;
SummaryReturnNode() { FlowSummaryImpl::Private::summaryReturnNode(this.getSummaryNode(), rk) }
override ReturnKind getKind() { result = rk }
}
private class SummaryArgumentNode extends FlowSummaryNode, ArgumentNode {
private SummaryCall call_;
private ArgumentPosition pos_;
SummaryArgumentNode() {
FlowSummaryImpl::Private::summaryArgumentNode(call_.getReceiver(), this.getSummaryNode(), pos_)
}
override predicate argumentOf(DataFlowCall call, ArgumentPosition pos) {
call = call_ and pos = pos_
}
}
private class SummaryPostUpdateNode extends FlowSummaryNode, PostUpdateNodeImpl {
private FlowSummaryNode pre;
SummaryPostUpdateNode() {
FlowSummaryImpl::Private::summaryPostUpdateNode(this.getSummaryNode(), pre.getSummaryNode())
}
override Node getPreUpdateNode() { result = pre }
}
/**
* A synthetic argument representing the values of the variables captured
* by the callable being called. This argument represents a single object
* where all the values are stored as attributes.
* This is also known as the environment part of a closure.
*
* This is used for tracking flow through captured variables.
*/
class SynthCapturedVariablesArgumentNode extends Node, TSynthCapturedVariablesArgumentNode {
ControlFlowNode callable;
SynthCapturedVariablesArgumentNode() { this = TSynthCapturedVariablesArgumentNode(callable) }
/** Gets the `CallNode` corresponding to this captured variables argument node. */
CallNode getCallNode() { result.getFunction() = callable }
/** Gets the `CfgNode` that corresponds to this synthetic node. */
CfgNode getUnderlyingNode() { result.asCfgNode() = callable }
override Scope getScope() { result = callable.getScope() }
override Location getLocation() { result = callable.getLocation() }
override string toString() { result = "Capturing closure argument" }
}
/** A captured variables argument node viewed as an argument node. Needed because `argumentOf` is a global predicate. */
class CapturedVariablesArgumentNodeAsArgumentNode extends ArgumentNode,
SynthCapturedVariablesArgumentNode
{
override predicate argumentOf(DataFlowCall call, ArgumentPosition pos) {
exists(CallNode callNode | callNode = this.getCallNode() |
callNode = call.getNode() and
exists(Function target | resolveCall(callNode, target, _) |
target = any(VariableCapture::CapturedVariable v).getACapturingScope()
) and
pos.isLambdaSelf()
)
}
}
/** A synthetic node representing the values of captured variables after the output has been computed. */
class SynthCapturedVariablesArgumentPostUpdateNode extends PostUpdateNodeImpl,
TSynthCapturedVariablesArgumentPostUpdateNode
{
ControlFlowNode callable;
SynthCapturedVariablesArgumentPostUpdateNode() {
this = TSynthCapturedVariablesArgumentPostUpdateNode(callable)
}
/** Gets the `PostUpdateNode` (for a `CfgNode`) that corresponds to this synthetic node. */
PostUpdateNode getUnderlyingNode() { result.getPreUpdateNode().asCfgNode() = callable }
override string toString() { result = "[post] Capturing closure argument" }
override Scope getScope() { result = callable.getScope() }
override Location getLocation() { result = callable.getLocation() }
override SynthCapturedVariablesArgumentNode getPreUpdateNode() {
result = TSynthCapturedVariablesArgumentNode(callable)
}
}
/** A synthetic node representing the values of variables captured by a comprehension. */
class SynthCompCapturedVariablesArgumentNode extends Node, TSynthCompCapturedVariablesArgumentNode {
Comp comp;
SynthCompCapturedVariablesArgumentNode() { this = TSynthCompCapturedVariablesArgumentNode(comp) }
override string toString() { result = "Capturing closure argument (comp)" }
override Scope getScope() { result = comp.getScope() }
override Location getLocation() { result = comp.getLocation() }
Comp getComprehension() { result = comp }
}
class SynthCompCapturedVariablesArgumentNodeAsArgumentNode extends SynthCompCapturedVariablesArgumentNode,
ArgumentNode
{
override predicate argumentOf(DataFlowCall call, ArgumentPosition pos) {
call.(ComprehensionCall).getComprehension() = comp and
pos.isLambdaSelf()
}
}
/** A synthetic node representing the values of variables captured by a comprehension after the output has been computed. */
class SynthCompCapturedVariablesArgumentPostUpdateNode extends PostUpdateNodeImpl,
TSynthCompCapturedVariablesArgumentPostUpdateNode
{
Comp comp;
SynthCompCapturedVariablesArgumentPostUpdateNode() {
this = TSynthCompCapturedVariablesArgumentPostUpdateNode(comp)
}
override string toString() { result = "[post] Capturing closure argument (comp)" }
override Scope getScope() { result = comp.getScope() }
override Location getLocation() { result = comp.getLocation() }
override Node getPreUpdateNode() { result = TSynthCompCapturedVariablesArgumentNode(comp) }
}
/** Gets a viable run-time target for the call `call`. */
DataFlowCallable viableCallable(DataFlowCall call) {
call instanceof ExtractedDataFlowCall and
result = call.getCallable()
or
// A call to a library callable with a flow summary
// In this situation we can not resolve the callable from the call,
// as that would make data flow depend on type tracking.
// Instead we resolve the call from the summary.
exists(LibraryCallable callable |
result = TLibraryCallable(callable) and
call.getNode() = callable.getACall().getNode() and
call instanceof PotentialLibraryCall
)
}
// =============================================================================
// Remaining required data-flow things
// =============================================================================
private newtype TReturnKind = TNormalReturnKind()
/**
* A return kind. A return kind describes how a value can be returned
* from a callable. For Python, this is simply a method return.
*/
class ReturnKind extends TReturnKind {
/** Gets a textual representation of this element. */
string toString() { result = "return" }
}
/** A data flow node that represents a value returned by a callable. */
abstract class ReturnNode extends Node {
/** Gets the kind of this return node. */
ReturnKind getKind() { any() }
}
/** A data flow node that represents a value returned by a callable. */
class ExtractedReturnNode extends ReturnNode, CfgNode {
// See `TaintTrackingImplementation::returnFlowStep`
ExtractedReturnNode() {
node = any(Return ret).getValue().getAFlowNode() or
node = any(Yield yield).getAFlowNode()
}
override ReturnKind getKind() { any() }
}
/**
* A data flow node that represents the value yielded by a callable with a
* `contextlib.contextmanager` decorator. We treat this as a normal return, which makes
* things just work when used in a `with` statement -- technically calling the function
* directly will give you a `contextlib._GeneratorContextManager` instance, so it's a
* slight workaround solution.
*
* See https://docs.python.org/3/library/contextlib.html#contextlib.contextmanager
*/
class YieldNodeInContextManagerFunction extends ReturnNode, CfgNode {
YieldNodeInContextManagerFunction() {
hasContextmanagerDecorator(node.getScope()) and
node = any(Yield yield).getValue().getAFlowNode()
}
override ReturnKind getKind() { any() }
}
/** A data-flow node that represents the output of a call. */
abstract class OutNode extends Node {
/** Gets the underlying call, where this node is a corresponding output of kind `kind`. */
abstract DataFlowCall getCall(ReturnKind kind);
}
private module OutNodes {
/**
* A data-flow node that reads a value returned directly by a callable.
*/
class ExprOutNode extends OutNode, ExprNode {
private DataFlowCall call;
ExprOutNode() { call.(ExtractedDataFlowCall).getNode() = this.getNode() }
override DataFlowCall getCall(ReturnKind kind) {
result = call and
kind = kind
}
}
private class SummaryOutNode extends FlowSummaryNode, OutNode {
private SummaryCall call;
private ReturnKind kind_;
SummaryOutNode() {
FlowSummaryImpl::Private::summaryOutNode(call.getReceiver(), this.getSummaryNode(), kind_)
}
override DataFlowCall getCall(ReturnKind kind) {
result = call and
kind = kind_
}
}
}
/**
* Gets a node that can read the value returned from `call` with return kind
* `kind`.
*/
OutNode getAnOutNode(DataFlowCall call, ReturnKind kind) { call = result.getCall(kind) }