Files
codeql/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
Rasmus Wriedt Larsen bdda0f574b Python: Use new parameter position for synthetic **kwargs instead
We wanted to ensure that a callable did not have multiple parameters
with same parameter position. Originally we fixed this with
02b3a1b515 (like Ruby). This commit
reverts that and solves it by introducing a new parameter position
instead.
2023-03-09 15:00:20 +01:00

1617 lines
58 KiB
Plaintext

/**
* INTERNAL: Do not use.
*
* TypeTracker based call-graph.
*
* The overall scheme for resolving calls, is to notice that Python has different kinds
* of callables, and resolve those with different strategies. Currently we handle these
* completely separately:
* 1. plain functions (and lambdas)
* 2. methods on classes
* 3. class instantiation
*
* So we have type-trackers for each of the 3 categories above, with some considerable
* effort to handle different kinds of methods on classes (staticmethod, classmethod,
* normal), and resolving methods correctly in regards to MRO.
*
*
* A goal of this library is to support modeling calls that happens by third-party
* libraries. For example `call_later(func, arg0, arg1, foo=val)`, and the fact that the
* library might inject it's own arguments, for example a context that will always be
* passed as the actual first argument to the function. Currently the aim is to provide
* enough predicates for such `call_later` function to be modeled by providing
* additional data-flow steps for the arguments/parameters. This means we cannot have
* any special logic that requires an AST call to be made before we care to figure out
* what callable this call might end up targeting.
*
* Specifically this means that we cannot use type-backtrackers from the function of a
* `CallNode`, since there is no `CallNode` to backtrack from for `func` in the example
* above.
*
* Note: This hasn't been 100% realized yet, so we don't currently expose a predicate to
* ask what targets any data-flow node has. But it's still the plan to do this!
*/
private import python
private import DataFlowPublic
private import DataFlowPrivate
private import FlowSummaryImpl as FlowSummaryImpl
private import FlowSummaryImplSpecific as FlowSummaryImplSpecific
private import semmle.python.internal.CachedStages
newtype TParameterPosition =
/** Used for `self` in methods, and `cls` in classmethods. */
TSelfParameterPosition() or
TPositionalParameterPosition(int index) {
index = any(Parameter p).getPosition()
or
// since synthetic parameters are made for a synthetic summary callable, based on
// what Argument positions they have flow for, we need to make sure we have such
// parameter positions available.
FlowSummaryImplSpecific::ParsePositions::isParsedPositionalArgumentPosition(_, index)
} or
TKeywordParameterPosition(string name) {
name = any(Parameter p).getName()
or
// see comment for TPositionalParameterPosition
FlowSummaryImplSpecific::ParsePositions::isParsedKeywordArgumentPosition(_, name)
} or
TStarArgsParameterPosition(int index) {
// since `.getPosition` does not work for `*args`, we need *args parameter positions
// at index 1 larger than the largest positional parameter position (and 0 must be
// included as well). This is a bit of an over-approximation.
index = 0 or
index = any(Parameter p).getPosition() + 1
} or
TSynthStarArgsElementParameterPosition(int index) { exists(TStarArgsParameterPosition(index)) } or
TDictSplatParameterPosition() or
// To get flow from a **kwargs argument to a keyword parameter, we add a read-step
// from a synthetic **kwargs parameter. We need this separate synthetic ParameterNode,
// since we clear content of the normal **kwargs parameter for the names that
// correspond to normal keyword parameters. Since we cannot re-use the same parameter
// position for multiple parameter nodes in the same callable, we introduce this
// synthetic parameter position.
TSynthDictSplatParameterPosition()
/** A parameter position. */
class ParameterPosition extends TParameterPosition {
/** Holds if this position represents a `self`/`cls` parameter. */
predicate isSelf() { this = TSelfParameterPosition() }
/** Holds if this position represents a positional parameter at (0-based) `index`. */
predicate isPositional(int index) { this = TPositionalParameterPosition(index) }
/** Holds if this position represents a keyword parameter named `name`. */
predicate isKeyword(string name) { this = TKeywordParameterPosition(name) }
/** Holds if this position represents a `*args` parameter at (0-based) `index`. */
predicate isStarArgs(int index) { this = TStarArgsParameterPosition(index) }
/**
* Holds if this position represents a synthetic parameter at or after (0-based)
* position `index`, from which there will be made a store step to the real
* `*args` parameter.
*/
predicate isSynthStarArgsElement(int index) {
this = TSynthStarArgsElementParameterPosition(index)
}
/** Holds if this position represents a `**kwargs` parameter. */
predicate isDictSplat() { this = TDictSplatParameterPosition() }
/**
* Holds if this position represents a **synthetic** `**kwargs` parameter
* (see comment for `TSynthDictSplatParameterPosition`).
*/
predicate isSynthDictSplat() { this = TSynthDictSplatParameterPosition() }
/** Gets a textual representation of this element. */
string toString() {
this.isSelf() and result = "self"
or
exists(int index | this.isPositional(index) and result = "position " + index)
or
exists(string name | this.isKeyword(name) and result = "keyword " + name)
or
exists(int index | this.isStarArgs(index) and result = "*args at " + index)
or
exists(int index |
this.isSynthStarArgsElement(index) and
result = "synthetic *args element at (or after) " + index
)
or
this.isDictSplat() and result = "**"
or
this.isSynthDictSplat() and result = "synthetic **"
}
}
newtype TArgumentPosition =
/** Used for `self` in methods, and `cls` in classmethods. */
TSelfArgumentPosition() or
TPositionalArgumentPosition(int index) {
exists(any(CallNode c).getArg(index))
or
// since synthetic calls within a summarized callable could use a unique argument
// position, we need to ensure we make these available (these are specified as
// parameters in the flow-summary spec)
FlowSummaryImplSpecific::ParsePositions::isParsedPositionalParameterPosition(_, index)
} or
TKeywordArgumentPosition(string name) {
exists(any(CallNode c).getArgByName(name))
or
// see comment for TPositionalArgumentPosition
FlowSummaryImplSpecific::ParsePositions::isParsedKeywordParameterPosition(_, name)
} or
TStarArgsArgumentPosition(int index) {
exists(Call c | c.getPositionalArg(index) instanceof Starred)
} or
TDictSplatArgumentPosition()
/** An argument position. */
class ArgumentPosition extends TArgumentPosition {
/** Holds if this position represents a `self`/`cls` argument. */
predicate isSelf() { this = TSelfArgumentPosition() }
/** Holds if this position represents a positional argument at (0-based) `index`. */
predicate isPositional(int index) { this = TPositionalArgumentPosition(index) }
/** Holds if this position represents a keyword argument named `name`. */
predicate isKeyword(string name) { this = TKeywordArgumentPosition(name) }
/** Holds if this position represents a `*args` argument at (0-based) `index`. */
predicate isStarArgs(int index) { this = TStarArgsArgumentPosition(index) }
/** Holds if this position represents a `**kwargs` argument. */
predicate isDictSplat() { this = TDictSplatArgumentPosition() }
/** Gets a textual representation of this element. */
string toString() {
this.isSelf() and result = "self"
or
exists(int pos | this.isPositional(pos) and result = "position " + pos)
or
exists(string name | this.isKeyword(name) and result = "keyword " + name)
or
exists(int index | this.isStarArgs(index) and result = "*args at " + index)
or
this.isDictSplat() and result = "**"
}
}
/** Holds if arguments at position `apos` match parameters at position `ppos`. */
predicate parameterMatch(ParameterPosition ppos, ArgumentPosition apos) {
ppos.isSelf() and apos.isSelf()
or
exists(int index | ppos.isPositional(index) and apos.isPositional(index))
or
exists(string name | ppos.isKeyword(name) and apos.isKeyword(name))
or
exists(int index | ppos.isStarArgs(index) and apos.isStarArgs(index))
or
exists(int paramIndex, int argIndex | argIndex >= paramIndex |
ppos.isSynthStarArgsElement(paramIndex) and apos.isPositional(argIndex)
)
or
ppos.isDictSplat() and apos.isDictSplat()
or
ppos.isSynthDictSplat() and apos.isDictSplat()
}
// =============================================================================
// Helper predicates
// =============================================================================
/**
* Holds if the function `func` is a staticmethod -- either by having a
* `@staticmethod` decorator or by convention
* (like a `__new__` method on a class is a classmethod even without the decorator).
*/
predicate isStaticmethod(Function func) {
exists(NameNode id | id.getId() = "staticmethod" and id.isGlobal() |
func.getADecorator() = id.getNode()
)
}
/**
* Holds if the function `func` is a classmethod -- either by having a
* `@classmethod` decorator or by convention
* (like a `__new__` method on a class is a classmethod even without the decorator).
*/
predicate isClassmethod(Function func) {
exists(NameNode id | id.getId() = "classmethod" and id.isGlobal() |
func.getADecorator() = id.getNode()
)
or
exists(Class cls |
cls.getAMethod() = func and
func.getName() in [
"__new__", // https://docs.python.org/3.10/reference/datamodel.html#object.__new__
"__init_subclass__", // https://docs.python.org/3.10/reference/datamodel.html#object.__init_subclass__
"__class_getitem__", // https://docs.python.org/3.10/reference/datamodel.html#object.__class_getitem__
]
)
}
/** Holds if the function `func` has a `property` decorator. */
predicate hasPropertyDecorator(Function func) {
exists(NameNode id | id.getId() = "property" and id.isGlobal() |
func.getADecorator() = id.getNode()
)
}
// =============================================================================
// Callables
// =============================================================================
/** A callable defined in library code, identified by a unique string. */
abstract class LibraryCallable extends string {
bindingset[this]
LibraryCallable() { any() }
/** Gets a call to this library callable. */
abstract CallCfgNode getACall();
/** Gets a data-flow node, where this library callable is used as a call-back. */
abstract ArgumentNode getACallback();
}
newtype TDataFlowCallable =
/**
* Is used as the target for all calls: plain functions, lambdas, methods on classes,
* class instantiations, and (in the future) special methods.
*/
TFunction(Function func) {
// For generators/list-comprehensions we create a synthetic function. In the
// points-to call-graph these were not considered callable, and instead we added
// data-flow steps (read/write) for these. As an easy solution for now, we do the
// same to keep things easy to reason about (and therefore exclude things that do
// not have a definition)
exists(func.getDefinition())
} or
/** see QLDoc for `DataFlowModuleScope` for why we need this. */
TModule(Module m) or
TLibraryCallable(LibraryCallable callable)
/** A callable. */
abstract class DataFlowCallable extends TDataFlowCallable {
/** Gets a textual representation of this element. */
abstract string toString();
/** Gets qualified name for this callable, if any. */
abstract string getQualifiedName();
/** Gets the scope of this callable */
abstract Scope getScope();
/** Gets the parameter at position `ppos`, if any. */
abstract ParameterNode getParameter(ParameterPosition ppos);
/** Gets the underlying library callable, if any. */
LibraryCallable asLibraryCallable() { this = TLibraryCallable(result) }
/** Gets the location of this dataflow callable. */
abstract Location getLocation();
}
/** A callable function. */
abstract class DataFlowFunction extends DataFlowCallable, TFunction {
Function func;
DataFlowFunction() {
this = TFunction(func) and
// TODO: Handle @property decorators
not hasPropertyDecorator(func)
}
override string toString() { result = func.toString() }
override string getQualifiedName() { result = func.getQualifiedName() }
override Function getScope() { result = func }
override Location getLocation() { result = func.getLocation() }
/** Gets the positional parameter offset, to take into account self/cls parameters. */
int positionalOffset() { result = 0 }
override ParameterNode getParameter(ParameterPosition ppos) {
exists(int index | ppos.isPositional(index) |
result.getParameter() = func.getArg(index + this.positionalOffset())
)
or
exists(string name | ppos.isKeyword(name) | result.getParameter() = func.getArgByName(name))
or
// `*args`
exists(int index |
(
ppos.isStarArgs(index) and
result.getParameter() = func.getVararg()
or
ppos.isSynthStarArgsElement(index) and
result = TSynthStarArgsElementParameterNode(this)
)
|
// a `*args` parameter comes after the last positional parameter. We need to take
// self parameter into account, so for
// `def func(foo, bar, *args)` it should be index 2 (pos-param-count == 2)
// `class A: def func(self, foo, bar, *args)` it should be index 2 (pos-param-count - 1 == 3 - 1)
index = func.getPositionalParameterCount() - this.positionalOffset()
or
// no positional argument
not exists(func.getArg(_)) and index = 0
)
or
// `**kwargs`
ppos.isDictSplat() and result.getParameter() = func.getKwarg()
or
ppos.isSynthDictSplat() and result = TSynthDictSplatParameterNode(this)
}
}
/** A plain (non-method) function. */
class DataFlowPlainFunction extends DataFlowFunction {
DataFlowPlainFunction() { not this instanceof DataFlowMethod }
}
/** A method. */
class DataFlowMethod extends DataFlowFunction {
Class cls;
DataFlowMethod() { cls.getAMethod() = func }
/** Gets the class this function is a method of. */
Class getClass() { result = cls }
override int positionalOffset() { result = 1 }
override ParameterNode getParameter(ParameterPosition ppos) {
ppos.isSelf() and result.getParameter() = func.getArg(0)
or
result = super.getParameter(ppos)
}
}
/** A classmethod. */
class DataFlowClassmethod extends DataFlowMethod {
DataFlowClassmethod() { isClassmethod(func) }
}
/** A staticmethod. */
class DataFlowStaticmethod extends DataFlowMethod, DataFlowFunction {
DataFlowStaticmethod() { isStaticmethod(func) }
override int positionalOffset() { result = 0 }
override ParameterNode getParameter(ParameterPosition ppos) {
result = DataFlowFunction.super.getParameter(ppos)
}
}
/**
* A module. This is not actually a callable, but we need this so a
* `ModuleVariableNode` have an enclosing callable.
*/
class DataFlowModuleScope extends DataFlowCallable, TModule {
Module mod;
DataFlowModuleScope() { this = TModule(mod) }
override string toString() { result = mod.toString() }
override string getQualifiedName() { result = mod.getName() }
override Module getScope() { result = mod }
override Location getLocation() { result = mod.getLocation() }
override ParameterNode getParameter(ParameterPosition ppos) { none() }
}
class LibraryCallableValue extends DataFlowCallable, TLibraryCallable {
LibraryCallable callable;
LibraryCallableValue() { this = TLibraryCallable(callable) }
override string toString() { result = "LibraryCallableValue: " + callable.toString() }
override string getQualifiedName() { result = callable.toString() }
/** Gets a data-flow node, where this library callable is used as a call-back. */
ArgumentNode getACallback() { result = callable.getACallback() }
override Scope getScope() { none() }
override ParameterNode getParameter(ParameterPosition ppos) { none() }
override LibraryCallable asLibraryCallable() { result = callable }
override Location getLocation() { none() }
}
// =============================================================================
// Type trackers used to resolve calls.
// =============================================================================
/** Gets a call to `type`. */
private CallCfgNode getTypeCall() {
exists(NameNode id | id.getId() = "type" and id.isGlobal() |
result.getFunction().asCfgNode() = id
)
}
/** Gets a call to `super`. */
private CallCfgNode getSuperCall() {
// While it is possible to reference super and call it later, it's almost never done in
// practice. From looking at top 1000 projects, there were a few uses around mocking (see
// link below), but otherwise only 2 edgecases. Overall it seems ok to ignore this complexity.
//
// https://github.com/python/cpython/blob/18b1782192f85bd26db89f5bc850f8bee4247c1a/Lib/unittest/mock.py#L48-L50
exists(NameNode id | id.getId() = "super" and id.isGlobal() |
result.getFunction().asCfgNode() = id
)
}
/**
* Holds if the file `f` should be ignored when computing the call-graph.
*
* We currently see a performance problem when analyzing the `sympy` PyPI package,
* which can be part of the database when dependencies are installed and extracted.
* From what we can understand, SymPy is using Python in a exotic way, so the fact that
* our analysis currently does not handle this project has nothing to say about our
* ability to handle normal Python code. Furthermore, SymPy does not look to be relevant
* in a security context, so we should not lose out on any security results by doing
* this.
*/
private predicate ignoreForCallGraph(File f) {
f.getAbsolutePath().matches("%/site-packages/sympy/%")
}
/**
* Gets a reference to the function `func`.
*/
private TypeTrackingNode functionTracker(TypeTracker t, Function func) {
not ignoreForCallGraph(result.getLocation().getFile()) and
t.start() and
(
result.asExpr() = func.getDefinition()
or
// when a function is decorated, it's the result of the (last) decorator call that
// is used
result.asExpr() = func.getDefinition().(FunctionExpr).getADecoratorCall()
)
or
not ignoreForCallGraph(result.getLocation().getFile()) and
exists(TypeTracker t2 | result = functionTracker(t2, func).track(t2, t))
}
/**
* Gets a reference to the function `func`.
*/
Node functionTracker(Function func) { functionTracker(TypeTracker::end(), func).flowsTo(result) }
/**
* Gets a reference to the class `cls`.
*/
private TypeTrackingNode classTracker(TypeTracker t, Class cls) {
not ignoreForCallGraph(result.getLocation().getFile()) and
t.start() and
(
result.asExpr() = cls.getParent()
or
// when a class is decorated, it's the result of the (last) decorator call that
// is used
result.asExpr() = cls.getParent().getADecoratorCall()
or
// `type(obj)`, where obj is an instance of this class
result = getTypeCall() and
result.(CallCfgNode).getArg(0) = classInstanceTracker(cls)
)
or
not ignoreForCallGraph(result.getLocation().getFile()) and
exists(TypeTracker t2 | result = classTracker(t2, cls).track(t2, t)) and
not result.(ParameterNodeImpl).isParameterOf(_, any(ParameterPosition pp | pp.isSelf()))
}
/**
* Gets a reference to the class `cls`.
*/
Node classTracker(Class cls) { classTracker(TypeTracker::end(), cls).flowsTo(result) }
/**
* Gets a reference to an instance of the class `cls`.
*/
private TypeTrackingNode classInstanceTracker(TypeTracker t, Class cls) {
not ignoreForCallGraph(result.getLocation().getFile()) and
t.start() and
resolveClassCall(result.(CallCfgNode).asCfgNode(), cls)
or
// result of `super().__new__` as used in a `__new__` method implementation
not ignoreForCallGraph(result.getLocation().getFile()) and
t.start() and
exists(Class classUsedInSuper |
fromSuperNewCall(result.(CallCfgNode).asCfgNode(), classUsedInSuper, _, _) and
classUsedInSuper = getADirectSuperclass*(cls)
)
or
not ignoreForCallGraph(result.getLocation().getFile()) and
exists(TypeTracker t2 | result = classInstanceTracker(t2, cls).track(t2, t)) and
not result.(ParameterNodeImpl).isParameterOf(_, any(ParameterPosition pp | pp.isSelf()))
}
/**
* Gets a reference to an instance of the class `cls`.
*/
Node classInstanceTracker(Class cls) {
classInstanceTracker(TypeTracker::end(), cls).flowsTo(result)
}
/**
* Gets a reference to the `self` argument of a method on class `classWithMethod`.
* The method cannot be a `staticmethod` or `classmethod`.
*/
private TypeTrackingNode selfTracker(TypeTracker t, Class classWithMethod) {
not ignoreForCallGraph(result.getLocation().getFile()) and
t.start() and
exists(Function func |
func = classWithMethod.getAMethod() and
not isStaticmethod(func) and
not isClassmethod(func)
|
result.asExpr() = func.getArg(0)
)
or
not ignoreForCallGraph(result.getLocation().getFile()) and
exists(TypeTracker t2 | result = selfTracker(t2, classWithMethod).track(t2, t)) and
not result.(ParameterNodeImpl).isParameterOf(_, any(ParameterPosition pp | pp.isSelf()))
}
/**
* Gets a reference to the `self` argument of a method on class `classWithMethod`.
* The method cannot be a `staticmethod` or `classmethod`.
*/
Node selfTracker(Class classWithMethod) {
selfTracker(TypeTracker::end(), classWithMethod).flowsTo(result)
}
/**
* Gets a reference to the enclosing class `classWithMethod` from within one of its
* methods, either through the `cls` argument from a `classmethod` or from `type(self)`
* from a normal method.
*/
private TypeTrackingNode clsArgumentTracker(TypeTracker t, Class classWithMethod) {
not ignoreForCallGraph(result.getLocation().getFile()) and
t.start() and
(
exists(Function func |
func = classWithMethod.getAMethod() and
isClassmethod(func)
|
result.asExpr() = func.getArg(0)
)
or
// type(self)
result = getTypeCall() and
result.(CallCfgNode).getArg(0) = selfTracker(classWithMethod)
)
or
not ignoreForCallGraph(result.getLocation().getFile()) and
exists(TypeTracker t2 | result = clsArgumentTracker(t2, classWithMethod).track(t2, t)) and
not result.(ParameterNodeImpl).isParameterOf(_, any(ParameterPosition pp | pp.isSelf()))
}
/**
* Gets a reference to the enclosing class `classWithMethod` from within one of its
* methods, either through the `cls` argument from a `classmethod` or from `type(self)`
* from a normal method.
*/
Node clsArgumentTracker(Class classWithMethod) {
clsArgumentTracker(TypeTracker::end(), classWithMethod).flowsTo(result)
}
/**
* Gets a reference to the result of calling `super` without any argument, where the
* call happened in the method `func` (either a method or a classmethod).
*/
private TypeTrackingNode superCallNoArgumentTracker(TypeTracker t, Function func) {
not ignoreForCallGraph(result.getLocation().getFile()) and
t.start() and
not isStaticmethod(func) and
exists(CallCfgNode call | result = call |
call = getSuperCall() and
not exists(call.getArg(_)) and
call.getScope() = func
)
or
not ignoreForCallGraph(result.getLocation().getFile()) and
exists(TypeTracker t2 | result = superCallNoArgumentTracker(t2, func).track(t2, t)) and
not result.(ParameterNodeImpl).isParameterOf(_, any(ParameterPosition pp | pp.isSelf()))
}
/**
* Gets a reference to the result of calling `super` without any argument, where the
* call happened in the method `func` (either a method or a classmethod).
*/
Node superCallNoArgumentTracker(Function func) {
superCallNoArgumentTracker(TypeTracker::end(), func).flowsTo(result)
}
/**
* Gets a reference to the result of calling `super` with 2 arguments, where the
* first is a reference to the class `cls`, and the second argument is `obj`.
*/
private TypeTrackingNode superCallTwoArgumentTracker(TypeTracker t, Class cls, Node obj) {
not ignoreForCallGraph(result.getLocation().getFile()) and
t.start() and
exists(CallCfgNode call | result = call |
call = getSuperCall() and
call.getArg(0) = classTracker(cls) and
call.getArg(1) = obj
)
or
not ignoreForCallGraph(result.getLocation().getFile()) and
exists(TypeTracker t2 | result = superCallTwoArgumentTracker(t2, cls, obj).track(t2, t)) and
not result.(ParameterNodeImpl).isParameterOf(_, any(ParameterPosition pp | pp.isSelf()))
}
/**
* Gets a reference to the result of calling `super` with 2 arguments, where the
* first is a reference to the class `cls`, and the second argument is `obj`.
*/
Node superCallTwoArgumentTracker(Class cls, Node obj) {
superCallTwoArgumentTracker(TypeTracker::end(), cls, obj).flowsTo(result)
}
// =============================================================================
// MRO
// =============================================================================
/**
* Gets a direct superclass of the argument `cls`, if any.
*
* For `A` with the class definition `class A(B, C)` it will have results `B` and `C`.
*/
Class getADirectSuperclass(Class cls) { cls.getABase() = classTracker(result).asExpr() }
/**
* Gets a direct subclass of the argument `cls`, if any.
*
*For `B` with the class definition `class A(B)` it will have result `A`.
*/
Class getADirectSubclass(Class cls) { cls = getADirectSuperclass(result) }
/**
* Gets a class that, from an approximated MRO calculation, might be the next class used
* for member-lookup when `super().attr` is used inside the class `cls`.
*
* In the example below, with `cls=B`, this predicate will have `A` and `C` as results.
* ```py
* class A: pass
* class B(A): pass
* class C(A): pass
* class D(B, C): pass
* ```
*
* NOTE: This approximation does not handle all cases correctly, and in the example
* below, with `cls=A` will not have any results, although it should include `Y`.
*
* ```py
* class A: pass
* class B(A): pass
* class X: pass
* class Y(X): pass
* class Ex(B, Y): pass
* ```
*
* NOTE for debugging the results of this predicate: Since a class can be part of
* multiple MROs, results from this predicate might only be valid in some, but not all,
* inheritance chains: This is the case with the result `C` for `cls=B` in the first
* example -- if `B` and `C` are defined in the same file, but `D` in a different file,
* this might make the results from this predicate difficult to comprehend at first.
*
* For more info on the C3 MRO used in Python see:
* - https://docs.python.org/3/glossary.html#term-method-resolution-order
* - https://www.python.org/download/releases/2.3/mro/
* - https://opendylan.org/_static/c3-linearization.pdf
*/
private Class getNextClassInMro(Class cls) {
// class A(B, ...):
// `B` must be the next class after `A` in the MRO for A.
cls.getBase(0) = classTracker(result).asExpr()
or
// class A(B, C, D):
// - `C` could be the next class after `B` in MRO.
// - `D` could be the next class after `C` in MRO.
exists(Class sub, int i |
sub.getBase(i) = classTracker(cls).asExpr() and
sub.getBase(i + 1) = classTracker(result).asExpr() and
not result = cls
)
// There are three important properties for MRO computed with C3 in Python:
//
// 1) monotonicity: if C1 precedes C2 in the MRO of C, then C1 precedes C2 in the MRO
// of any subclass of C.
// 2) local precedence ordering: if C1 precedes C2 in the list of superclasses for C,
// they will keep the same order in the MRO for C (and due to monotonicity, any
// subclass).
// 3) consistency with the extended precedence graph: if A and B (that are part of the
// class hierarchy of C) do not have a subclass/superclass relationship on their
// own, the ordering of A and B in the MRO of C will be determined by the local
// precedence ordering in the classes that use both A and B, either directly or
// through a subclass. (see paper for more details)
//
// Note that not all class hierarchies are allowed with C3, see the Python 2.3 article
// for examples.
}
/**
* Gets a potential definition of the function `name` according to our approximation of
* MRO for the class `cls` (see `getNextClassInMro` for more information).
*/
Function findFunctionAccordingToMro(Class cls, string name) {
result = cls.getAMethod() and
result.getName() = name
or
not cls.getAMethod().getName() = name and
result = findFunctionAccordingToMro(getNextClassInMro(cls), name)
}
/**
* Gets a class that, from an approximated MRO calculation, might be the next class
* after `cls` in the MRO for `startingClass`.
*
* Note: this is almost the same as `getNextClassInMro`, except we know the
* `startingClass`, which can give slightly more precise results.
*
* See QLDoc for `getNextClassInMro`.
*/
Class getNextClassInMroKnownStartingClass(Class cls, Class startingClass) {
cls.getBase(0) = classTracker(result).asExpr() and
cls = getADirectSuperclass*(startingClass)
or
exists(Class sub, int i | sub = getADirectSuperclass*(startingClass) |
sub.getBase(i) = classTracker(cls).asExpr() and
sub.getBase(i + 1) = classTracker(result).asExpr() and
not result = cls
)
}
private Function findFunctionAccordingToMroKnownStartingClass(
Class cls, Class startingClass, string name
) {
result = cls.getAMethod() and
result.getName() = name and
cls = getADirectSuperclass*(startingClass)
or
not cls.getAMethod().getName() = name and
result =
findFunctionAccordingToMroKnownStartingClass(getNextClassInMroKnownStartingClass(cls,
startingClass), startingClass, name)
}
/**
* Gets a potential definition of the function `name` according to our approximation of
* MRO for the class `cls` (see `getNextClassInMroKnownStartingClass` for more information).
*
* Note: this is almost the same as `findFunctionAccordingToMro`, except we know the
* `startingClass`, which can give slightly more precise results.
*/
pragma[inline]
Function findFunctionAccordingToMroKnownStartingClass(Class startingClass, string name) {
result = findFunctionAccordingToMroKnownStartingClass(startingClass, startingClass, name)
}
// =============================================================================
// attribute trackers
// =============================================================================
/** Gets a reference to the attribute read `attr` */
private TypeTrackingNode attrReadTracker(TypeTracker t, AttrRead attr) {
t.start() and
result = attr and
attr.getObject() in [
classTracker(_), classInstanceTracker(_), selfTracker(_), clsArgumentTracker(_),
superCallNoArgumentTracker(_), superCallTwoArgumentTracker(_, _)
]
or
exists(TypeTracker t2 | result = attrReadTracker(t2, attr).track(t2, t))
}
/** Gets a reference to the attribute read `attr` */
Node attrReadTracker(AttrRead attr) { attrReadTracker(TypeTracker::end(), attr).flowsTo(result) }
// =============================================================================
// call and argument resolution
// =============================================================================
newtype TCallType =
/** A call to a function that is not part of a class. */
CallTypePlainFunction() or
/**
* A call to an "normal" method on a class instance.
* Does not include staticmethods or classmethods.
*/
CallTypeNormalMethod() or
/** A call to a staticmethod. */
CallTypeStaticMethod() or
/** A call to a classmethod. */
CallTypeClassMethod() or
/**
* A call to method on a class, not going through an instance method, such as
*
* ```py
* class Foo:
* def method(self, arg):
* pass
*
* foo = Foo()
* Foo.method(foo, 42)
* ```
*/
CallTypeMethodAsPlainFunction() or
/** A call to a class. */
CallTypeClass() or
/** A call on a class instance, that goes to the `__call__` method of the class */
CallTypeClassInstanceCall()
/** A type of call. */
class CallType extends TCallType {
string toString() {
this instanceof CallTypePlainFunction and
result = "CallTypePlainFunction"
or
this instanceof CallTypeNormalMethod and
result = "CallTypeNormalMethod"
or
this instanceof CallTypeStaticMethod and
result = "CallTypeStaticMethod"
or
this instanceof CallTypeClassMethod and
result = "CallTypeClassMethod"
or
this instanceof CallTypeMethodAsPlainFunction and
result = "CallTypeMethodAsPlainFunction"
or
this instanceof CallTypeClass and
result = "CallTypeClass"
or
this instanceof CallTypeClassInstanceCall and
result = "CallTypeClassInstanceCall"
}
}
// -------------------------------------
// method call resolution
// -------------------------------------
private module MethodCalls {
/**
* Holds if `call` is a call to a method `target` on an instance or class, where the
* instance or class is not derived from an implicit `self`/`cls` argument to a method
* -- for that, see `callWithinMethodImplicitSelfOrCls`.
*
* It is found by making an attribute read `attr` with the name `functionName` on a
* reference to the class `cls`, or to an instance of the class `cls`. The reference the
* attribute-read is made on is `self`.
*/
pragma[nomagic]
private predicate directCall(
CallNode call, Function target, string functionName, Class cls, AttrRead attr, Node self
) {
target = findFunctionAccordingToMroKnownStartingClass(cls, functionName) and
directCall_join(call, functionName, cls, attr, self)
}
/** Extracted to give good join order */
pragma[nomagic]
private predicate directCall_join(
CallNode call, string functionName, Class cls, AttrRead attr, Node self
) {
call.getFunction() = attrReadTracker(attr).asCfgNode() and
attr.accesses(self, functionName) and
self in [classTracker(cls), classInstanceTracker(cls)]
}
/**
* Holds if `call` is a call to a method `target` derived from an implicit `self`/`cls`
* argument to a method within the class `classWithMethod`.
*
* It is found by making an attribute read `attr` with the name `functionName` on a
* reference to an implicit `self`/`cls` argument. The reference the attribute-read is
* made on is `self`.
*/
pragma[nomagic]
private predicate callWithinMethodImplicitSelfOrCls(
CallNode call, Function target, string functionName, Class classWithMethod, AttrRead attr,
Node self
) {
target = findFunctionAccordingToMro(getADirectSubclass*(classWithMethod), functionName) and
callWithinMethodImplicitSelfOrCls_join(call, functionName, classWithMethod, attr, self)
}
/** Extracted to give good join order */
pragma[nomagic]
private predicate callWithinMethodImplicitSelfOrCls_join(
CallNode call, string functionName, Class classWithMethod, AttrRead attr, Node self
) {
call.getFunction() = attrReadTracker(attr).asCfgNode() and
attr.accesses(self, functionName) and
self in [clsArgumentTracker(classWithMethod), selfTracker(classWithMethod)]
}
/**
* Like `fromSuper`, but only for `__new__`, and without requirement for being able to
* resolve the call to a known target (since the only super class might be the
* builtin `object`, so we never have the implementation of `__new__` in the DB).
*/
predicate fromSuperNewCall(CallNode call, Class classUsedInSuper, AttrRead attr, Node self) {
fromSuper_join(call, "__new__", classUsedInSuper, attr, self) and
self in [classTracker(_), clsArgumentTracker(_)]
}
/**
* Holds if `call` is a call to a method `target`, derived from a use of `super`, either
* as:
*
* (1) `super(SomeClass, obj)`, where the first argument is a reference to the class
* `classUsedInSuper`, and the second argument is `self`.
*
* (2) `super()`. This implicit version can only happen within a method in a class.
* The implicit first argument is the class the call happens within `classUsedInSuper`.
* The implicit second argument is the `self`/`cls` parameter of the method this happens
* within.
*
* The method call is found by making an attribute read `attr` with the name
* `functionName` on the return value from the `super` call.
*/
pragma[nomagic]
predicate fromSuper(
CallNode call, Function target, string functionName, Class classUsedInSuper, AttrRead attr,
Node self
) {
target = findFunctionAccordingToMro(getNextClassInMro(classUsedInSuper), functionName) and
fromSuper_join(call, functionName, classUsedInSuper, attr, self)
}
/** Extracted to give good join order */
pragma[nomagic]
private predicate fromSuper_join(
CallNode call, string functionName, Class classUsedInSuper, AttrRead attr, Node self
) {
call.getFunction() = attrReadTracker(attr).asCfgNode() and
(
exists(Function func |
attr.accesses(superCallNoArgumentTracker(func), functionName) and
// Requiring enclosing scope of function to be a class is a little too
// restrictive, since it is possible to use `super()` in a function defined inside
// the method, where the first argument to the nested-function will be used as
// implicit self argument. In practice I don't expect this to be a problem, and we
// did not support this with points-to either.
func.getEnclosingScope() = classUsedInSuper and
self.(ParameterNode).getParameter() = func.getArg(0)
)
or
attr.accesses(superCallTwoArgumentTracker(classUsedInSuper, self), functionName)
)
}
predicate resolveMethodCall(CallNode call, Function target, CallType type, Node self) {
(
directCall(call, target, _, _, _, self)
or
callWithinMethodImplicitSelfOrCls(call, target, _, _, _, self)
or
fromSuper(call, target, _, _, _, self)
) and
(
// normal method call
type instanceof CallTypeNormalMethod and
(
self = classInstanceTracker(_)
or
self = selfTracker(_)
) and
not isStaticmethod(target) and
not isClassmethod(target)
or
// method as plain function call
type instanceof CallTypeMethodAsPlainFunction and
self = classTracker(_) and
not isStaticmethod(target) and
not isClassmethod(target)
or
// staticmethod call
type instanceof CallTypeStaticMethod and
isStaticmethod(target)
or
// classmethod call
type instanceof CallTypeClassMethod and
isClassmethod(target)
)
}
}
import MethodCalls
// -------------------------------------
// class call resolution
// -------------------------------------
/**
* Holds when `call` is a call to the class `cls`.
*
* NOTE: We have this predicate mostly to be able to compare with old point-to
* call-graph resolution. So it could be removed in the future.
*/
predicate resolveClassCall(CallNode call, Class cls) {
call.getFunction() = classTracker(cls).asCfgNode()
or
// `cls()` inside a classmethod (which also contains `type(self)()` inside a method)
exists(Class classWithMethod |
call.getFunction() = clsArgumentTracker(classWithMethod).asCfgNode() and
getADirectSuperclass*(cls) = classWithMethod
)
}
/**
* Gets a function, either `__init__` or `__new__` as specified by `funcName`, that will
* be invoked when `cls` is constructed -- where the function lookup is based on our MRO
* calculation.
*/
Function invokedFunctionFromClassConstruction(Class cls, string funcName) {
// as described in https://docs.python.org/3/reference/datamodel.html#object.__new__
// __init__ will only be called when __new__ returns an instance of the class (which
// is not a requirement). However, for simplicity, we assume that __init__ will always
// be called.
funcName in ["__init__", "__new__"] and
result = findFunctionAccordingToMroKnownStartingClass(cls, funcName)
}
/**
* Holds when `call` is a call on a class instance, that goes to the `__call__` method
* of the class.
*
* See https://docs.python.org/3/reference/datamodel.html#object.__call__
*/
predicate resolveClassInstanceCall(CallNode call, Function target, Node self) {
exists(Class cls |
call.getFunction() = classInstanceTracker(cls).asCfgNode() and
target = findFunctionAccordingToMroKnownStartingClass(cls, "__call__")
or
call.getFunction() = selfTracker(cls).asCfgNode() and
target = findFunctionAccordingToMro(getADirectSubclass*(cls), "__call__")
|
self.asCfgNode() = call.getFunction()
)
}
// -------------------------------------
// overall call resolution
// -------------------------------------
/**
* Holds if `call` is a call to the `target`, with call-type `type`.
*/
cached
predicate resolveCall(CallNode call, Function target, CallType type) {
Stages::DataFlow::ref() and
(
type instanceof CallTypePlainFunction and
call.getFunction() = functionTracker(target).asCfgNode() and
not exists(Class cls | cls.getAMethod() = target)
or
resolveMethodCall(call, target, type, _)
or
type instanceof CallTypeClass and
exists(Class cls |
resolveClassCall(call, cls) and
target = invokedFunctionFromClassConstruction(cls, _)
)
or
type instanceof CallTypeClassInstanceCall and
resolveClassInstanceCall(call, target, _)
)
}
// =============================================================================
// Argument resolution
// =============================================================================
/**
* Holds if the argument of `call` at position `apos` is `arg`. This is just a helper
* predicate that maps ArgumentPositions to the arguments of the underlying `CallNode`.
*/
cached
predicate normalCallArg(CallNode call, Node arg, ArgumentPosition apos) {
exists(int index |
apos.isPositional(index) and
arg.asCfgNode() = call.getArg(index)
)
or
exists(string name |
apos.isKeyword(name) and
arg.asCfgNode() = call.getArgByName(name)
)
or
// the first `*args`
exists(int index |
apos.isStarArgs(index) and
arg.asCfgNode() = call.getStarArg() and
// since `CallNode.getArg` doesn't include `*args`, we need to drop to the AST level
// to get the index. Notice that we only use the AST for getting the index, so we
// don't need to check for dominance in regards to splitting.
call.getStarArg().getNode() = call.getNode().getPositionalArg(index).(Starred).getValue()
)
or
apos.isDictSplat() and
(
arg.asCfgNode() = call.getKwargs()
or
arg = TSynthDictSplatArgumentNode(call)
)
}
/**
* Gets the argument `arg` of `call` at position `apos`, if any. Requires that we can
* resolve `call` to `target` with CallType `type`.
*
* It might seem like it's enough to know the CallType to resolve arguments. The reason
* we also need the `target`, is to avoid cross-talk. In the example below, assuming
* that `Foo` and `Bar` define their own `meth` methods, we might end up passing _both_
* `foo` and `bar` to both `Foo.meth` and `Bar.meth`, which is wrong. Since the
* attribute access uses the same name, we need to also distinguish on the resolved
* target, to know which of the two objects to pass as the self argument.
*
*
* ```py
* foo = Foo()
* bar = Bar()
* if cond:
* func = foo.meth
* else:
* func = bar.meth
* func(42)
* ```
*
* Note: If `Bar.meth` and `Foo.meth` resolves to the same function, we will end up
* sending both `self` arguments to that function, which is by definition the right thing to do.
*
* ### Bound methods
*
* For bound methods, such as `bm = x.m; bm()`, it's a little unclear whether we should
* still use the object in the attribute lookup (`x.m`) as the self argument in the
* call (`bm()`). We currently do this, but there might also be cases where we don't
* want to do this.
*
* In the example below, we want to clear taint from the list before it reaches the
* sink, but because we don't have a use of `l` in the `clear()` call, we currently
* don't have any way to achieve our goal. (Note that this is a contrived example)
*
* ```py
* l = list()
* clear = l.clear
* l.append(tainted)
* clear()
* sink(l)
* ```
*
* To make the above even worse, bound-methods have a `__self__` property that refers to
* the object of the bound-method, so we can re-write the code as:
*
* ```py
* l = list()
* clear = l.clear
* clear.__self__.append(tainted)
* clear()
* sink(l)
* ```
*
* One idea to solve this is to track the object in a synthetic data-flow node every
* time the bound method is used, such that the `clear()` call would essentially be
* translated into `l.clear()`, and we can still have use-use flow.
*/
cached
predicate getCallArg(CallNode call, Function target, CallType type, Node arg, ArgumentPosition apos) {
Stages::DataFlow::ref() and
resolveCall(call, target, type) and
(
type instanceof CallTypePlainFunction and
normalCallArg(call, arg, apos)
or
// self argument for normal method calls -- see note above about bound methods
type instanceof CallTypeNormalMethod and
apos.isSelf() and
resolveMethodCall(call, target, type, arg) and
// the dataflow library has a requirement that arguments and calls are in same enclosing
// callable. This requirement would be broken if we used `my_obj` as the self
// argument in the `f()` call in the example below:
// ```py
// def call_func(f):
// f()
//
// call_func(my_obj.some_method)
// ```
exists(CfgNode cfgNode | cfgNode.getNode() = call |
cfgNode.getEnclosingCallable() = arg.getEnclosingCallable()
)
or
// cls argument for classmethod calls -- see note above about bound methods
type instanceof CallTypeClassMethod and
apos.isSelf() and
resolveMethodCall(call, target, type, arg) and
(arg = classTracker(_) or arg = clsArgumentTracker(_)) and
// dataflow lib has requirement that arguments and calls are in same enclosing callable.
exists(CfgNode cfgNode | cfgNode.getNode() = call |
cfgNode.getEnclosingCallable() = arg.getEnclosingCallable()
)
or
// normal arguments for method calls
(
type instanceof CallTypeNormalMethod or
type instanceof CallTypeStaticMethod or
type instanceof CallTypeClassMethod
) and
normalCallArg(call, arg, apos)
or
// method as plain function call.
//
// argument index 0 of call has position self (and MUST be given as positional
// argument in call). This also means that call-arguments are shifted by 1, such
// that argument index 1 of call has argument position 0
type instanceof CallTypeMethodAsPlainFunction and
(
apos.isSelf() and arg.asCfgNode() = call.getArg(0)
or
not apos.isPositional(_) and normalCallArg(call, arg, apos)
or
exists(ArgumentPosition normalPos, int index |
apos.isPositional(index - 1) and
normalPos.isPositional(index) and
normalCallArg(call, arg, normalPos)
)
)
or
// class call
type instanceof CallTypeClass and
(
// only pass synthetic node for created object to __init__, and not __new__ since
// __new__ is a classmethod.
target = invokedFunctionFromClassConstruction(_, "__init__") and
apos.isSelf() and
arg = TSyntheticPreUpdateNode(call)
or
normalCallArg(call, arg, apos)
)
or
// call on class instance, which goes to `__call__` method
type instanceof CallTypeClassInstanceCall and
(
apos.isSelf() and
resolveClassInstanceCall(call, target, arg)
or
normalCallArg(call, arg, apos)
)
)
}
// =============================================================================
// DataFlowCall
// =============================================================================
newtype TDataFlowCall =
TNormalCall(CallNode call, Function target, CallType type) { resolveCall(call, target, type) } or
TPotentialLibraryCall(CallNode call) or
/** A synthesized call inside a summarized callable */
TSummaryCall(FlowSummaryImpl::Public::SummarizedCallable c, Node receiver) {
FlowSummaryImpl::Private::summaryCallbackRange(c, receiver)
}
/** A call that is taken into account by the global data flow computation. */
abstract class DataFlowCall extends TDataFlowCall {
/** Gets a textual representation of this element. */
abstract string toString();
/** Get the callable to which this call goes. */
abstract DataFlowCallable getCallable();
/** Gets the argument at position `apos`, if any. */
abstract ArgumentNode getArgument(ArgumentPosition apos);
/** Get the control flow node representing this call, if any. */
abstract ControlFlowNode getNode();
/** Gets the enclosing callable of this call. */
abstract DataFlowCallable getEnclosingCallable();
/** Gets the location of this dataflow call. */
abstract Location getLocation();
/**
* Holds if this element is at the specified location.
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
) {
this.getLocation().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
}
}
/** A call found in the program source (as opposed to a synthesised call). */
abstract class ExtractedDataFlowCall extends DataFlowCall {
override Location getLocation() { result = this.getNode().getLocation() }
}
/**
* A resolved call in source code with an underlying `CallNode`.
*
* This is considered normal, compared with special calls such as `obj[0]` calling the
* `__getitem__` method on the object. However, this also includes calls that go to the
* `__call__` special method.
*/
class NormalCall extends ExtractedDataFlowCall, TNormalCall {
CallNode call;
Function target;
CallType type;
NormalCall() { this = TNormalCall(call, target, type) }
override string toString() {
// note: if we used toString directly on the CallNode we would get
// `ControlFlowNode for func()`
// but the `ControlFlowNode` part is just clutter, so we go directly to the AST node
// instead.
result = call.getNode().toString()
}
override ControlFlowNode getNode() { result = call }
override DataFlowCallable getEnclosingCallable() { result.getScope() = call.getScope() }
override DataFlowCallable getCallable() { result.(DataFlowFunction).getScope() = target }
override ArgumentNode getArgument(ArgumentPosition apos) {
getCallArg(call, target, type, result, apos)
}
/** Gets the `CallType` of this call. */
CallType getCallType() { result = type }
}
/**
* A potential call to a summarized callable, a `LibraryCallable`.
*
* We currently exclude all resolved calls. This means that a call to, say, `map`, which
* is a `ClassCall`, cannot currently be given a summary.
* We hope to lift this restriction in the future and include all potential calls to summaries
* in this class.
*/
class PotentialLibraryCall extends ExtractedDataFlowCall, TPotentialLibraryCall {
CallNode call;
PotentialLibraryCall() { this = TPotentialLibraryCall(call) }
override string toString() {
// note: if we used toString directly on the CallNode we would get
// `ControlFlowNode for func()`
// but the `ControlFlowNode` part is just clutter, so we go directly to the AST node
// instead.
result = call.getNode().toString()
}
// We cannot refer to a `PotentialLibraryCall` here,
// as that could in turn refer to type tracking.
// This call will be tied to a `PotentialLibraryCall` via
// `viableCallable` when the global data flow is assembled.
override DataFlowCallable getCallable() { none() }
override ArgumentNode getArgument(ArgumentPosition apos) {
normalCallArg(call, result, apos)
or
// potential self argument, from `foo.bar()` -- note that this could also just be a
// module reference, but we really don't have a good way of knowing :|
apos.isSelf() and
result.asCfgNode() = call.getFunction().(AttrNode).getObject()
}
override ControlFlowNode getNode() { result = call }
override DataFlowCallable getEnclosingCallable() { result.getScope() = call.getScope() }
}
/**
* A synthesized call inside a callable with a flow summary.
*
* For example, in
* ```python
* map(lambda x: x + 1, [1, 2, 3])
* ```
*
* there is a synthesized call to the lambda argument inside `map`.
*/
class SummaryCall extends DataFlowCall, TSummaryCall {
private FlowSummaryImpl::Public::SummarizedCallable c;
private Node receiver;
SummaryCall() { this = TSummaryCall(c, receiver) }
/** Gets the data flow node that this call targets. */
Node getReceiver() { result = receiver }
override DataFlowCallable getEnclosingCallable() { result.asLibraryCallable() = c }
override DataFlowCallable getCallable() { none() }
override ArgumentNode getArgument(ArgumentPosition apos) { none() }
override ControlFlowNode getNode() { none() }
override string toString() { result = "[summary] call to " + receiver + " in " + c }
override Location getLocation() { none() }
}
/**
* The value of a parameter at function entry, viewed as a node in a data
* flow graph.
*/
abstract class ParameterNodeImpl extends Node {
/** Gets the `Parameter` this `ParameterNode` represents. */
abstract Parameter getParameter();
/**
* Holds if this node is the parameter of callable `c` at the
* position `ppos`.
*/
predicate isParameterOf(DataFlowCallable c, ParameterPosition ppos) {
this = c.getParameter(ppos)
}
}
/** A parameter for a library callable with a flow summary. */
class SummaryParameterNode extends ParameterNodeImpl, TSummaryParameterNode {
private FlowSummaryImpl::Public::SummarizedCallable sc;
private ParameterPosition pos;
SummaryParameterNode() { this = TSummaryParameterNode(sc, pos) }
override Parameter getParameter() { none() }
override predicate isParameterOf(DataFlowCallable c, ParameterPosition ppos) {
sc = c.asLibraryCallable() and ppos = pos
}
override DataFlowCallable getEnclosingCallable() { result.asLibraryCallable() = sc }
override string toString() { result = "parameter " + pos + " of " + sc }
// Hack to return "empty location"
override predicate hasLocationInfo(
string file, int startline, int startcolumn, int endline, int endcolumn
) {
file = "" and
startline = 0 and
startcolumn = 0 and
endline = 0 and
endcolumn = 0
}
}
/** A data-flow node used to model flow summaries. */
class SummaryNode extends Node, TSummaryNode {
private FlowSummaryImpl::Public::SummarizedCallable c;
private FlowSummaryImpl::Private::SummaryNodeState state;
SummaryNode() { this = TSummaryNode(c, state) }
override DataFlowCallable getEnclosingCallable() { result.asLibraryCallable() = c }
override string toString() { result = "[summary] " + state + " in " + c }
// Hack to return "empty location"
override predicate hasLocationInfo(
string file, int startline, int startcolumn, int endline, int endcolumn
) {
file = "" and
startline = 0 and
startcolumn = 0 and
endline = 0 and
endcolumn = 0
}
}
private class SummaryReturnNode extends SummaryNode, ReturnNode {
private ReturnKind rk;
SummaryReturnNode() { FlowSummaryImpl::Private::summaryReturnNode(this, rk) }
override ReturnKind getKind() { result = rk }
}
private class SummaryArgumentNode extends SummaryNode, ArgumentNode {
SummaryArgumentNode() { FlowSummaryImpl::Private::summaryArgumentNode(_, this, _) }
override predicate argumentOf(DataFlowCall call, ArgumentPosition pos) {
FlowSummaryImpl::Private::summaryArgumentNode(call, this, pos)
}
}
private class SummaryPostUpdateNode extends SummaryNode, PostUpdateNodeImpl {
private Node pre;
SummaryPostUpdateNode() { FlowSummaryImpl::Private::summaryPostUpdateNode(this, pre) }
override Node getPreUpdateNode() { result = pre }
}
/** Gets a viable run-time target for the call `call`. */
DataFlowCallable viableCallable(ExtractedDataFlowCall call) {
result = call.getCallable()
or
// A call to a library callable with a flow summary
// In this situation we can not resolve the callable from the call,
// as that would make data flow depend on type tracking.
// Instead we resolve the call from the summary.
exists(LibraryCallable callable |
result = TLibraryCallable(callable) and
call.getNode() = callable.getACall().getNode() and
call instanceof PotentialLibraryCall
)
}
// =============================================================================
// Remaining required data-flow things
// =============================================================================
private newtype TReturnKind = TNormalReturnKind()
/**
* A return kind. A return kind describes how a value can be returned
* from a callable. For Python, this is simply a method return.
*/
class ReturnKind extends TReturnKind {
/** Gets a textual representation of this element. */
string toString() { result = "return" }
}
/** A data flow node that represents a value returned by a callable. */
abstract class ReturnNode extends Node {
/** Gets the kind of this return node. */
ReturnKind getKind() { any() }
}
/** A data flow node that represents a value returned by a callable. */
class ExtractedReturnNode extends ReturnNode, CfgNode {
// See `TaintTrackingImplementation::returnFlowStep`
ExtractedReturnNode() { node = any(Return ret).getValue().getAFlowNode() }
override ReturnKind getKind() { any() }
}
/** A data-flow node that represents the output of a call. */
abstract class OutNode extends Node {
/** Gets the underlying call, where this node is a corresponding output of kind `kind`. */
abstract DataFlowCall getCall(ReturnKind kind);
}
private module OutNodes {
/**
* A data-flow node that reads a value returned directly by a callable.
*/
class ExprOutNode extends OutNode, ExprNode {
private DataFlowCall call;
ExprOutNode() { call.(ExtractedDataFlowCall).getNode() = this.getNode() }
override DataFlowCall getCall(ReturnKind kind) {
result = call and
kind = kind
}
}
private class SummaryOutNode extends SummaryNode, OutNode {
SummaryOutNode() { FlowSummaryImpl::Private::summaryOutNode(_, this, _) }
override DataFlowCall getCall(ReturnKind kind) {
FlowSummaryImpl::Private::summaryOutNode(result, this, kind)
}
}
}
/**
* Gets a node that can read the value returned from `call` with return kind
* `kind`.
*/
OutNode getAnOutNode(DataFlowCall call, ReturnKind kind) { call = result.getCall(kind) }