mirror of
https://github.com/github/codeql.git
synced 2025-12-17 01:03:14 +01:00
Merge pull request #11376 from RasmusWL/call-graph-code
Python: New type-tracking based call-graph
This commit is contained in:
4
python/ql/lib/change-notes/2023-01-16-new-call-graph.md
Normal file
4
python/ql/lib/change-notes/2023-01-16-new-call-graph.md
Normal file
@@ -0,0 +1,4 @@
|
||||
---
|
||||
category: majorAnalysis
|
||||
---
|
||||
* We use a new analysis for the call-graph (determining which function is called). This can lead to changed results. In most cases this is much more accurate than the old call-graph that was based on points-to, but we do lose a few valid edges in the call-graph, especially around methods that are not defined inside its' class.
|
||||
@@ -125,7 +125,7 @@ class ControlFlowNode extends @py_flow_node {
|
||||
/** Gets a textual representation of this element. */
|
||||
cached
|
||||
string toString() {
|
||||
Stages::DataFlow::ref() and
|
||||
Stages::AST::ref() and
|
||||
exists(Scope s | s.getEntryNode() = this | result = "Entry node for " + s.toString())
|
||||
or
|
||||
exists(Scope s | s.getANormalExit() = this | result = "Exit node for " + s.toString())
|
||||
@@ -411,6 +411,12 @@ class CallNode extends ControlFlowNode {
|
||||
result.getNode() = this.getNode().getStarArg() and
|
||||
result.getBasicBlock().dominates(this.getBasicBlock())
|
||||
}
|
||||
|
||||
/** Gets a dictionary (**) argument of this call, if any. */
|
||||
ControlFlowNode getKwargs() {
|
||||
result.getNode() = this.getNode().getKwargs() and
|
||||
result.getBasicBlock().dominates(this.getBasicBlock())
|
||||
}
|
||||
}
|
||||
|
||||
/** A control flow corresponding to an attribute expression, such as `value.attr` */
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,838 +0,0 @@
|
||||
/**
|
||||
* INTERNAL: Do not use.
|
||||
*
|
||||
* Points-to based call-graph.
|
||||
*/
|
||||
|
||||
private import python
|
||||
private import DataFlowPublic
|
||||
private import semmle.python.SpecialMethods
|
||||
private import FlowSummaryImpl as FlowSummaryImpl
|
||||
|
||||
/** A parameter position represented by an integer. */
|
||||
class ParameterPosition extends int {
|
||||
ParameterPosition() { exists(any(DataFlowCallable c).getParameter(this)) }
|
||||
|
||||
/** Holds if this position represents a positional parameter at position `pos`. */
|
||||
predicate isPositional(int pos) { this = pos } // with the current representation, all parameters are positional
|
||||
}
|
||||
|
||||
/** An argument position represented by an integer. */
|
||||
class ArgumentPosition extends int {
|
||||
ArgumentPosition() { this in [-2, -1] or exists(any(Call c).getArg(this)) }
|
||||
|
||||
/** Holds if this position represents a positional argument at position `pos`. */
|
||||
predicate isPositional(int pos) { this = pos } // with the current representation, all arguments are positional
|
||||
}
|
||||
|
||||
/** Holds if arguments at position `apos` match parameters at position `ppos`. */
|
||||
pragma[inline]
|
||||
predicate parameterMatch(ParameterPosition ppos, ArgumentPosition apos) { ppos = apos }
|
||||
|
||||
/**
|
||||
* Computes routing of arguments to parameters
|
||||
*
|
||||
* When a call contains more positional arguments than there are positional parameters,
|
||||
* the extra positional arguments are passed as a tuple to a starred parameter. This is
|
||||
* achieved by synthesizing a node `TPosOverflowNode(call, callable)`
|
||||
* that represents the tuple of extra positional arguments. There is a store step from each
|
||||
* extra positional argument to this node.
|
||||
*
|
||||
* CURRENTLY NOT SUPPORTED:
|
||||
* When a call contains an iterable unpacking argument, such as `func(*args)`, it is expanded into positional arguments.
|
||||
*
|
||||
* CURRENTLY NOT SUPPORTED:
|
||||
* If a call contains an iterable unpacking argument, such as `func(*args)`, and the callee contains a starred argument, any extra
|
||||
* positional arguments are passed to the starred argument.
|
||||
*
|
||||
* When a call contains keyword arguments that do not correspond to keyword parameters, these
|
||||
* extra keyword arguments are passed as a dictionary to a doubly starred parameter. This is
|
||||
* achieved by synthesizing a node `TKwOverflowNode(call, callable)`
|
||||
* that represents the dictionary of extra keyword arguments. There is a store step from each
|
||||
* extra keyword argument to this node.
|
||||
*
|
||||
* When a call contains a dictionary unpacking argument, such as `func(**kwargs)`, with entries corresponding to a keyword parameter,
|
||||
* the value at such a key is unpacked and passed to the parameter. This is achieved
|
||||
* by synthesizing an argument node `TKwUnpacked(call, callable, name)` representing the unpacked
|
||||
* value. This node is used as the argument passed to the matching keyword parameter. There is a read
|
||||
* step from the dictionary argument to the synthesized argument node.
|
||||
*
|
||||
* When a call contains a dictionary unpacking argument, such as `func(**kwargs)`, and the callee contains a doubly starred parameter,
|
||||
* entries which are not unpacked are passed to the doubly starred parameter. This is achieved by
|
||||
* adding a dataflow step from the dictionary argument to `TKwOverflowNode(call, callable)` and a
|
||||
* step to clear content of that node at any unpacked keys.
|
||||
*
|
||||
* ## Examples:
|
||||
* Assume that we have the callable
|
||||
* ```python
|
||||
* def f(x, y, *t, **d):
|
||||
* pass
|
||||
* ```
|
||||
* Then the call
|
||||
* ```python
|
||||
* f(0, 1, 2, a=3)
|
||||
* ```
|
||||
* will be modeled as
|
||||
* ```python
|
||||
* f(0, 1, [*t], [**d])
|
||||
* ```
|
||||
* where `[` and `]` denotes synthesized nodes, so `[*t]` is the synthesized tuple argument
|
||||
* `TPosOverflowNode` and `[**d]` is the synthesized dictionary argument `TKwOverflowNode`.
|
||||
* There will be a store step from `2` to `[*t]` at pos `0` and one from `3` to `[**d]` at key
|
||||
* `a`.
|
||||
*
|
||||
* For the call
|
||||
* ```python
|
||||
* f(0, **{"y": 1, "a": 3})
|
||||
* ```
|
||||
* no tuple argument is synthesized. It is modeled as
|
||||
* ```python
|
||||
* f(0, [y=1], [**d])
|
||||
* ```
|
||||
* where `[y=1]` is the synthesized unpacked argument `TKwUnpacked` (with `name` = `y`). There is
|
||||
* a read step from `**{"y": 1, "a": 3}` to `[y=1]` at key `y` to get the value passed to the parameter
|
||||
* `y`. There is a dataflow step from `**{"y": 1, "a": 3}` to `[**d]` to transfer the content and
|
||||
* a clearing of content at key `y` for node `[**d]`, since that value has been unpacked.
|
||||
*/
|
||||
module ArgumentPassing {
|
||||
/**
|
||||
* Holds if `call` represents a `DataFlowCall` to a `DataFlowCallable` represented by `callable`.
|
||||
*
|
||||
* It _may not_ be the case that `call = callable.getACall()`, i.e. if `call` represents a `ClassCall`.
|
||||
*
|
||||
* Used to limit the size of predicates.
|
||||
*/
|
||||
predicate connects(CallNode call, CallableValue callable) {
|
||||
exists(NormalCall c |
|
||||
call = c.getNode() and
|
||||
callable = c.getCallable().getCallableValue()
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the `n`th parameter of `callable`.
|
||||
* If the callable has a starred parameter, say `*tuple`, that is matched with `n=-1`.
|
||||
* If the callable has a doubly starred parameter, say `**dict`, that is matched with `n=-2`.
|
||||
* Note that, unlike other languages, we do _not_ use -1 for the position of `self` in Python,
|
||||
* as it is an explicit parameter at position 0.
|
||||
*/
|
||||
NameNode getParameter(CallableValue callable, int n) {
|
||||
// positional parameter
|
||||
result = callable.getParameter(n)
|
||||
or
|
||||
// starred parameter, `*tuple`
|
||||
exists(Function f |
|
||||
f = callable.getScope() and
|
||||
n = -1 and
|
||||
result = f.getVararg().getAFlowNode()
|
||||
)
|
||||
or
|
||||
// doubly starred parameter, `**dict`
|
||||
exists(Function f |
|
||||
f = callable.getScope() and
|
||||
n = -2 and
|
||||
result = f.getKwarg().getAFlowNode()
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* A type representing a mapping from argument indices to parameter indices.
|
||||
* We currently use two mappings: NoShift, the identity, used for ordinary
|
||||
* function calls, and ShiftOneUp which is used for calls where an extra argument
|
||||
* is inserted. These include method calls, constructor calls and class calls.
|
||||
* In these calls, the argument at index `n` is mapped to the parameter at position `n+1`.
|
||||
*/
|
||||
newtype TArgParamMapping =
|
||||
TNoShift() or
|
||||
TShiftOneUp()
|
||||
|
||||
/** A mapping used for parameter passing. */
|
||||
abstract class ArgParamMapping extends TArgParamMapping {
|
||||
/** Gets the index of the parameter that corresponds to the argument at index `argN`. */
|
||||
bindingset[argN]
|
||||
abstract int getParamN(int argN);
|
||||
|
||||
/** Gets a textual representation of this element. */
|
||||
abstract string toString();
|
||||
}
|
||||
|
||||
/** A mapping that passes argument `n` to parameter `n`. */
|
||||
class NoShift extends ArgParamMapping, TNoShift {
|
||||
NoShift() { this = TNoShift() }
|
||||
|
||||
override string toString() { result = "NoShift [n -> n]" }
|
||||
|
||||
bindingset[argN]
|
||||
override int getParamN(int argN) { result = argN }
|
||||
}
|
||||
|
||||
/** A mapping that passes argument `n` to parameter `n+1`. */
|
||||
class ShiftOneUp extends ArgParamMapping, TShiftOneUp {
|
||||
ShiftOneUp() { this = TShiftOneUp() }
|
||||
|
||||
override string toString() { result = "ShiftOneUp [n -> n+1]" }
|
||||
|
||||
bindingset[argN]
|
||||
override int getParamN(int argN) { result = argN + 1 }
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the node representing the argument to `call` that is passed to the parameter at
|
||||
* (zero-based) index `paramN` in `callable`. If this is a positional argument, it must appear
|
||||
* at an index, `argN`, in `call` which satisfies `paramN = mapping.getParamN(argN)`.
|
||||
*
|
||||
* `mapping` will be the identity for function calls, but not for method- or constructor calls,
|
||||
* where the first parameter is `self` and the first positional argument is passed to the second positional parameter.
|
||||
* Similarly for classmethod calls, where the first parameter is `cls`.
|
||||
*
|
||||
* NOT SUPPORTED: Keyword-only parameters.
|
||||
*/
|
||||
Node getArg(CallNode call, ArgParamMapping mapping, CallableValue callable, int paramN) {
|
||||
connects(call, callable) and
|
||||
(
|
||||
// positional argument
|
||||
exists(int argN |
|
||||
paramN = mapping.getParamN(argN) and
|
||||
result = TCfgNode(call.getArg(argN))
|
||||
)
|
||||
or
|
||||
// keyword argument
|
||||
// TODO: Since `getArgName` have no results for keyword-only parameters,
|
||||
// these are currently not supported.
|
||||
exists(Function f, string argName |
|
||||
f = callable.getScope() and
|
||||
f.getArgName(paramN) = argName and
|
||||
result = TCfgNode(call.getArgByName(unbind_string(argName)))
|
||||
)
|
||||
or
|
||||
// a synthesized argument passed to the starred parameter (at position -1)
|
||||
callable.getScope().hasVarArg() and
|
||||
paramN = -1 and
|
||||
result = TPosOverflowNode(call, callable)
|
||||
or
|
||||
// a synthesized argument passed to the doubly starred parameter (at position -2)
|
||||
callable.getScope().hasKwArg() and
|
||||
paramN = -2 and
|
||||
result = TKwOverflowNode(call, callable)
|
||||
or
|
||||
// argument unpacked from dict
|
||||
exists(string name |
|
||||
call_unpacks(call, mapping, callable, name, paramN) and
|
||||
result = TKwUnpackedNode(call, callable, name)
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
/** Currently required in `getArg` in order to prevent a bad join. */
|
||||
bindingset[result, s]
|
||||
private string unbind_string(string s) { result <= s and s <= result }
|
||||
|
||||
/** Gets the control flow node that is passed as the `n`th overflow positional argument. */
|
||||
ControlFlowNode getPositionalOverflowArg(CallNode call, CallableValue callable, int n) {
|
||||
connects(call, callable) and
|
||||
exists(Function f, int posCount, int argNr |
|
||||
f = callable.getScope() and
|
||||
f.hasVarArg() and
|
||||
posCount = f.getPositionalParameterCount() and
|
||||
result = call.getArg(argNr) and
|
||||
argNr >= posCount and
|
||||
argNr = posCount + n
|
||||
)
|
||||
}
|
||||
|
||||
/** Gets the control flow node that is passed as the overflow keyword argument with key `key`. */
|
||||
ControlFlowNode getKeywordOverflowArg(CallNode call, CallableValue callable, string key) {
|
||||
connects(call, callable) and
|
||||
exists(Function f |
|
||||
f = callable.getScope() and
|
||||
f.hasKwArg() and
|
||||
not exists(f.getArgByName(key)) and
|
||||
result = call.getArgByName(key)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `call` unpacks a dictionary argument in order to pass it via `name`.
|
||||
* It will then be passed to the parameter of `callable` at index `paramN`.
|
||||
*/
|
||||
predicate call_unpacks(
|
||||
CallNode call, ArgParamMapping mapping, CallableValue callable, string name, int paramN
|
||||
) {
|
||||
connects(call, callable) and
|
||||
exists(Function f |
|
||||
f = callable.getScope() and
|
||||
not exists(int argN | paramN = mapping.getParamN(argN) | exists(call.getArg(argN))) and // no positional argument available
|
||||
name = f.getArgName(paramN) and
|
||||
// not exists(call.getArgByName(name)) and // only matches keyword arguments not preceded by **
|
||||
// TODO: make the below logic respect control flow splitting (by not going to the AST).
|
||||
not call.getNode().getANamedArg().(Keyword).getArg() = name and // no keyword argument available
|
||||
paramN >= 0 and
|
||||
paramN < f.getPositionalParameterCount() + f.getKeywordOnlyParameterCount() and
|
||||
exists(call.getNode().getKwargs()) // dict argument available
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
import ArgumentPassing
|
||||
|
||||
/** A callable defined in library code, identified by a unique string. */
|
||||
abstract class LibraryCallable extends string {
|
||||
bindingset[this]
|
||||
LibraryCallable() { any() }
|
||||
|
||||
/** Gets a call to this library callable. */
|
||||
abstract CallCfgNode getACall();
|
||||
|
||||
/** Gets a data-flow node, where this library callable is used as a call-back. */
|
||||
abstract ArgumentNode getACallback();
|
||||
}
|
||||
|
||||
/**
|
||||
* IPA type for DataFlowCallable.
|
||||
*
|
||||
* A callable is either a function value, a class value, or a module (for enclosing `ModuleVariableNode`s).
|
||||
* A module has no calls.
|
||||
*/
|
||||
newtype TDataFlowCallable =
|
||||
TCallableValue(CallableValue callable) {
|
||||
callable instanceof FunctionValue and
|
||||
not callable.(FunctionValue).isLambda()
|
||||
or
|
||||
callable instanceof ClassValue
|
||||
} or
|
||||
TLambda(Function lambda) { lambda.isLambda() } or
|
||||
TModule(Module m) or
|
||||
TLibraryCallable(LibraryCallable callable)
|
||||
|
||||
/** A callable. */
|
||||
class DataFlowCallable extends TDataFlowCallable {
|
||||
/** Gets a textual representation of this element. */
|
||||
string toString() { result = "DataFlowCallable" }
|
||||
|
||||
/** Gets a call to this callable. */
|
||||
CallNode getACall() { none() }
|
||||
|
||||
/** Gets the scope of this callable */
|
||||
Scope getScope() { none() }
|
||||
|
||||
/** Gets the specified parameter of this callable */
|
||||
NameNode getParameter(int n) { none() }
|
||||
|
||||
/** Gets the name of this callable. */
|
||||
string getName() { none() }
|
||||
|
||||
/** Gets a callable value for this callable, if any. */
|
||||
CallableValue getCallableValue() { none() }
|
||||
|
||||
/** Gets the underlying library callable, if any. */
|
||||
LibraryCallable asLibraryCallable() { this = TLibraryCallable(result) }
|
||||
|
||||
Location getLocation() { none() }
|
||||
}
|
||||
|
||||
/** A class representing a callable value. */
|
||||
class DataFlowCallableValue extends DataFlowCallable, TCallableValue {
|
||||
CallableValue callable;
|
||||
|
||||
DataFlowCallableValue() { this = TCallableValue(callable) }
|
||||
|
||||
override string toString() { result = callable.toString() }
|
||||
|
||||
override CallNode getACall() { result = callable.getACall() }
|
||||
|
||||
override Scope getScope() { result = callable.getScope() }
|
||||
|
||||
override NameNode getParameter(int n) { result = getParameter(callable, n) }
|
||||
|
||||
override string getName() { result = callable.getName() }
|
||||
|
||||
override CallableValue getCallableValue() { result = callable }
|
||||
}
|
||||
|
||||
/** A class representing a callable lambda. */
|
||||
class DataFlowLambda extends DataFlowCallable, TLambda {
|
||||
Function lambda;
|
||||
|
||||
DataFlowLambda() { this = TLambda(lambda) }
|
||||
|
||||
override string toString() { result = lambda.toString() }
|
||||
|
||||
override CallNode getACall() { result = this.getCallableValue().getACall() }
|
||||
|
||||
override Scope getScope() { result = lambda.getEvaluatingScope() }
|
||||
|
||||
override NameNode getParameter(int n) { result = getParameter(this.getCallableValue(), n) }
|
||||
|
||||
override string getName() { result = "Lambda callable" }
|
||||
|
||||
override FunctionValue getCallableValue() {
|
||||
result.getOrigin().getNode() = lambda.getDefinition()
|
||||
}
|
||||
|
||||
Expr getDefinition() { result = lambda.getDefinition() }
|
||||
}
|
||||
|
||||
/** A class representing the scope in which a `ModuleVariableNode` appears. */
|
||||
class DataFlowModuleScope extends DataFlowCallable, TModule {
|
||||
Module mod;
|
||||
|
||||
DataFlowModuleScope() { this = TModule(mod) }
|
||||
|
||||
override string toString() { result = mod.toString() }
|
||||
|
||||
override CallNode getACall() { none() }
|
||||
|
||||
override Scope getScope() { result = mod }
|
||||
|
||||
override NameNode getParameter(int n) { none() }
|
||||
|
||||
override string getName() { result = mod.getName() }
|
||||
|
||||
override CallableValue getCallableValue() { none() }
|
||||
}
|
||||
|
||||
class LibraryCallableValue extends DataFlowCallable, TLibraryCallable {
|
||||
LibraryCallable callable;
|
||||
|
||||
LibraryCallableValue() { this = TLibraryCallable(callable) }
|
||||
|
||||
override string toString() { result = callable.toString() }
|
||||
|
||||
override CallNode getACall() { result = callable.getACall().getNode() }
|
||||
|
||||
/** Gets a data-flow node, where this library callable is used as a call-back. */
|
||||
ArgumentNode getACallback() { result = callable.getACallback() }
|
||||
|
||||
override Scope getScope() { none() }
|
||||
|
||||
override NameNode getParameter(int n) { none() }
|
||||
|
||||
override string getName() { result = callable }
|
||||
|
||||
override LibraryCallable asLibraryCallable() { result = callable }
|
||||
}
|
||||
|
||||
/**
|
||||
* IPA type for DataFlowCall.
|
||||
*
|
||||
* Calls corresponding to `CallNode`s are either to callable values or to classes.
|
||||
* The latter is directed to the callable corresponding to the `__init__` method of the class.
|
||||
*
|
||||
* An `__init__` method can also be called directly, so that the callable can be targeted by
|
||||
* different types of calls. In that case, the parameter mappings will be different,
|
||||
* as the class call will synthesize an argument node to be mapped to the `self` parameter.
|
||||
*
|
||||
* A call corresponding to a special method call is handled by the corresponding `SpecialMethodCallNode`.
|
||||
*
|
||||
* TODO: Add `TClassMethodCall` mapping `cls` appropriately.
|
||||
*/
|
||||
newtype TDataFlowCall =
|
||||
/**
|
||||
* Includes function calls, method calls, class calls and library calls.
|
||||
* All these will be associated with a `CallNode`.
|
||||
*/
|
||||
TNormalCall(CallNode call) or
|
||||
/**
|
||||
* Includes calls to special methods.
|
||||
* These will be associated with a `SpecialMethodCallNode`.
|
||||
*/
|
||||
TSpecialCall(SpecialMethodCallNode special) or
|
||||
/** A synthesized call inside a summarized callable */
|
||||
TSummaryCall(FlowSummaryImpl::Public::SummarizedCallable c, Node receiver) {
|
||||
FlowSummaryImpl::Private::summaryCallbackRange(c, receiver)
|
||||
}
|
||||
|
||||
/** A call found in the program source (as opposed to a synthesised summary call). */
|
||||
class TExtractedDataFlowCall = TSpecialCall or TNormalCall;
|
||||
|
||||
/** A call that is taken into account by the global data flow computation. */
|
||||
abstract class DataFlowCall extends TDataFlowCall {
|
||||
/** Gets a textual representation of this element. */
|
||||
abstract string toString();
|
||||
|
||||
/** Get the callable to which this call goes, if such exists. */
|
||||
abstract DataFlowCallable getCallable();
|
||||
|
||||
/**
|
||||
* Gets the argument to this call that will be sent
|
||||
* to the `n`th parameter of the callable, if any.
|
||||
*/
|
||||
abstract Node getArg(int n);
|
||||
|
||||
/** Get the control flow node representing this call, if any. */
|
||||
abstract ControlFlowNode getNode();
|
||||
|
||||
/** Gets the enclosing callable of this call. */
|
||||
abstract DataFlowCallable getEnclosingCallable();
|
||||
|
||||
/** Gets the location of this dataflow call. */
|
||||
abstract Location getLocation();
|
||||
|
||||
/**
|
||||
* Holds if this element is at the specified location.
|
||||
* The location spans column `startcolumn` of line `startline` to
|
||||
* column `endcolumn` of line `endline` in file `filepath`.
|
||||
* For more information, see
|
||||
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
|
||||
*/
|
||||
predicate hasLocationInfo(
|
||||
string filepath, int startline, int startcolumn, int endline, int endcolumn
|
||||
) {
|
||||
this.getLocation().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
|
||||
}
|
||||
}
|
||||
|
||||
/** A call found in the program source (as opposed to a synthesised call). */
|
||||
abstract class ExtractedDataFlowCall extends DataFlowCall, TExtractedDataFlowCall {
|
||||
final override Location getLocation() { result = this.getNode().getLocation() }
|
||||
|
||||
abstract override DataFlowCallable getCallable();
|
||||
|
||||
abstract override Node getArg(int n);
|
||||
|
||||
abstract override ControlFlowNode getNode();
|
||||
}
|
||||
|
||||
/** A call associated with a `CallNode`. */
|
||||
class NormalCall extends ExtractedDataFlowCall, TNormalCall {
|
||||
CallNode call;
|
||||
|
||||
NormalCall() { this = TNormalCall(call) }
|
||||
|
||||
override string toString() { result = call.toString() }
|
||||
|
||||
abstract override Node getArg(int n);
|
||||
|
||||
override CallNode getNode() { result = call }
|
||||
|
||||
abstract override DataFlowCallable getCallable();
|
||||
|
||||
override DataFlowCallable getEnclosingCallable() { result.getScope() = call.getNode().getScope() }
|
||||
}
|
||||
|
||||
/**
|
||||
* A call to a function.
|
||||
* This excludes calls to bound methods, classes, and special methods.
|
||||
* Bound method calls and class calls insert an argument for the explicit
|
||||
* `self` parameter, and special method calls have special argument passing.
|
||||
*/
|
||||
class FunctionCall extends NormalCall {
|
||||
DataFlowCallableValue callable;
|
||||
|
||||
FunctionCall() {
|
||||
call = any(FunctionValue f).getAFunctionCall() and
|
||||
call = callable.getACall()
|
||||
}
|
||||
|
||||
override Node getArg(int n) { result = getArg(call, TNoShift(), callable.getCallableValue(), n) }
|
||||
|
||||
override DataFlowCallable getCallable() { result = callable }
|
||||
}
|
||||
|
||||
/** A call to a lambda. */
|
||||
class LambdaCall extends NormalCall {
|
||||
DataFlowLambda callable;
|
||||
|
||||
LambdaCall() {
|
||||
call = callable.getACall() and
|
||||
callable = TLambda(any(Function f))
|
||||
}
|
||||
|
||||
override Node getArg(int n) { result = getArg(call, TNoShift(), callable.getCallableValue(), n) }
|
||||
|
||||
override DataFlowCallable getCallable() { result = callable }
|
||||
}
|
||||
|
||||
/**
|
||||
* Represents a call to a bound method call.
|
||||
* The node representing the instance is inserted as argument to the `self` parameter.
|
||||
*/
|
||||
class MethodCall extends NormalCall {
|
||||
FunctionValue bm;
|
||||
|
||||
MethodCall() { call = bm.getAMethodCall() }
|
||||
|
||||
private CallableValue getCallableValue() { result = bm }
|
||||
|
||||
override Node getArg(int n) {
|
||||
n > 0 and result = getArg(call, TShiftOneUp(), this.getCallableValue(), n)
|
||||
or
|
||||
n = 0 and result = TCfgNode(call.getFunction().(AttrNode).getObject())
|
||||
}
|
||||
|
||||
override DataFlowCallable getCallable() { result = TCallableValue(this.getCallableValue()) }
|
||||
}
|
||||
|
||||
/**
|
||||
* Represents a call to a class.
|
||||
* The pre-update node for the call is inserted as argument to the `self` parameter.
|
||||
* That makes the call node be the post-update node holding the value of the object
|
||||
* after the constructor has run.
|
||||
*/
|
||||
class ClassCall extends NormalCall {
|
||||
ClassValue c;
|
||||
|
||||
ClassCall() {
|
||||
not c.isAbsent() and
|
||||
call = c.getACall()
|
||||
}
|
||||
|
||||
private CallableValue getCallableValue() { c.getScope().getInitMethod() = result.getScope() }
|
||||
|
||||
override Node getArg(int n) {
|
||||
n > 0 and result = getArg(call, TShiftOneUp(), this.getCallableValue(), n)
|
||||
or
|
||||
n = 0 and result = TSyntheticPreUpdateNode(TCfgNode(call))
|
||||
}
|
||||
|
||||
override DataFlowCallable getCallable() { result = TCallableValue(this.getCallableValue()) }
|
||||
}
|
||||
|
||||
/** A call to a special method. */
|
||||
class SpecialCall extends ExtractedDataFlowCall, TSpecialCall {
|
||||
SpecialMethodCallNode special;
|
||||
|
||||
SpecialCall() { this = TSpecialCall(special) }
|
||||
|
||||
override string toString() { result = special.toString() }
|
||||
|
||||
override Node getArg(int n) { result = TCfgNode(special.(SpecialMethod::Potential).getArg(n)) }
|
||||
|
||||
override ControlFlowNode getNode() { result = special }
|
||||
|
||||
override DataFlowCallable getCallable() {
|
||||
result = TCallableValue(special.getResolvedSpecialMethod())
|
||||
}
|
||||
|
||||
override DataFlowCallable getEnclosingCallable() {
|
||||
result.getScope() = special.getNode().getScope()
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A call to a summarized callable, a `LibraryCallable`.
|
||||
*
|
||||
* We currently exclude all resolved calls. This means that a call to, say, `map`, which
|
||||
* is a `ClassCall`, cannot currently be given a summary.
|
||||
* We hope to lift this restriction in the future and include all potential calls to summaries
|
||||
* in this class.
|
||||
*/
|
||||
class LibraryCall extends NormalCall {
|
||||
LibraryCall() {
|
||||
// TODO: share this with `resolvedCall`
|
||||
not (
|
||||
call = any(DataFlowCallableValue cv).getACall()
|
||||
or
|
||||
call = any(DataFlowLambda l).getACall()
|
||||
or
|
||||
// TODO: this should be covered by `DataFlowCallableValue`, but a `ClassValue` is not a `CallableValue`.
|
||||
call = any(ClassValue c).getACall()
|
||||
)
|
||||
}
|
||||
|
||||
// TODO: Implement Python calling convention?
|
||||
override Node getArg(int n) { result = TCfgNode(call.getArg(n)) }
|
||||
|
||||
// We cannot refer to a `LibraryCallable` here,
|
||||
// as that could in turn refer to type tracking.
|
||||
// This call will be tied to a `LibraryCallable` via
|
||||
// `getViableCallabe` when the global data flow is assembled.
|
||||
override DataFlowCallable getCallable() { none() }
|
||||
}
|
||||
|
||||
/**
|
||||
* A synthesized call inside a callable with a flow summary.
|
||||
*
|
||||
* For example, in
|
||||
* ```python
|
||||
* map(lambda x: x + 1, [1, 2, 3])
|
||||
* ```
|
||||
*
|
||||
* there is a synthesized call to the lambda argument inside `map`.
|
||||
*/
|
||||
class SummaryCall extends DataFlowCall, TSummaryCall {
|
||||
private FlowSummaryImpl::Public::SummarizedCallable c;
|
||||
private Node receiver;
|
||||
|
||||
SummaryCall() { this = TSummaryCall(c, receiver) }
|
||||
|
||||
/** Gets the data flow node that this call targets. */
|
||||
Node getReceiver() { result = receiver }
|
||||
|
||||
override DataFlowCallable getEnclosingCallable() { result.asLibraryCallable() = c }
|
||||
|
||||
override DataFlowCallable getCallable() { none() }
|
||||
|
||||
override Node getArg(int n) { none() }
|
||||
|
||||
override ControlFlowNode getNode() { none() }
|
||||
|
||||
override string toString() { result = "[summary] call to " + receiver + " in " + c }
|
||||
|
||||
override Location getLocation() { none() }
|
||||
}
|
||||
|
||||
/**
|
||||
* The value of a parameter at function entry, viewed as a node in a data
|
||||
* flow graph.
|
||||
*/
|
||||
abstract class ParameterNodeImpl extends Node {
|
||||
abstract Parameter getParameter();
|
||||
|
||||
/**
|
||||
* Holds if this node is the parameter of callable `c` at the
|
||||
* (zero-based) index `i`.
|
||||
*/
|
||||
abstract predicate isParameterOf(DataFlowCallable c, int i);
|
||||
}
|
||||
|
||||
/** A parameter for a library callable with a flow summary. */
|
||||
class SummaryParameterNode extends ParameterNodeImpl, TSummaryParameterNode {
|
||||
private FlowSummaryImpl::Public::SummarizedCallable sc;
|
||||
private int pos;
|
||||
|
||||
SummaryParameterNode() { this = TSummaryParameterNode(sc, pos) }
|
||||
|
||||
override Parameter getParameter() { none() }
|
||||
|
||||
override predicate isParameterOf(DataFlowCallable c, int i) {
|
||||
sc = c.asLibraryCallable() and i = pos
|
||||
}
|
||||
|
||||
override DataFlowCallable getEnclosingCallable() { result.asLibraryCallable() = sc }
|
||||
|
||||
override string toString() { result = "parameter " + pos + " of " + sc }
|
||||
|
||||
// Hack to return "empty location"
|
||||
override predicate hasLocationInfo(
|
||||
string file, int startline, int startcolumn, int endline, int endcolumn
|
||||
) {
|
||||
file = "" and
|
||||
startline = 0 and
|
||||
startcolumn = 0 and
|
||||
endline = 0 and
|
||||
endcolumn = 0
|
||||
}
|
||||
}
|
||||
|
||||
/** A data-flow node used to model flow summaries. */
|
||||
class SummaryNode extends Node, TSummaryNode {
|
||||
private FlowSummaryImpl::Public::SummarizedCallable c;
|
||||
private FlowSummaryImpl::Private::SummaryNodeState state;
|
||||
|
||||
SummaryNode() { this = TSummaryNode(c, state) }
|
||||
|
||||
override DataFlowCallable getEnclosingCallable() { result.asLibraryCallable() = c }
|
||||
|
||||
override string toString() { result = "[summary] " + state + " in " + c }
|
||||
|
||||
// Hack to return "empty location"
|
||||
override predicate hasLocationInfo(
|
||||
string file, int startline, int startcolumn, int endline, int endcolumn
|
||||
) {
|
||||
file = "" and
|
||||
startline = 0 and
|
||||
startcolumn = 0 and
|
||||
endline = 0 and
|
||||
endcolumn = 0
|
||||
}
|
||||
}
|
||||
|
||||
private class SummaryReturnNode extends SummaryNode, ReturnNode {
|
||||
private ReturnKind rk;
|
||||
|
||||
SummaryReturnNode() { FlowSummaryImpl::Private::summaryReturnNode(this, rk) }
|
||||
|
||||
override ReturnKind getKind() { result = rk }
|
||||
}
|
||||
|
||||
private class SummaryArgumentNode extends SummaryNode, ArgumentNode {
|
||||
SummaryArgumentNode() { FlowSummaryImpl::Private::summaryArgumentNode(_, this, _) }
|
||||
|
||||
override predicate argumentOf(DataFlowCall call, ArgumentPosition pos) {
|
||||
FlowSummaryImpl::Private::summaryArgumentNode(call, this, pos)
|
||||
}
|
||||
}
|
||||
|
||||
private class SummaryPostUpdateNode extends SummaryNode, PostUpdateNode {
|
||||
private Node pre;
|
||||
|
||||
SummaryPostUpdateNode() { FlowSummaryImpl::Private::summaryPostUpdateNode(this, pre) }
|
||||
|
||||
override Node getPreUpdateNode() { result = pre }
|
||||
}
|
||||
|
||||
/** Gets a viable run-time target for the call `call`. */
|
||||
DataFlowCallable viableCallable(ExtractedDataFlowCall call) {
|
||||
result = call.getCallable()
|
||||
or
|
||||
// A call to a library callable with a flow summary
|
||||
// In this situation we can not resolve the callable from the call,
|
||||
// as that would make data flow depend on type tracking.
|
||||
// Instead we resolve the call from the summary.
|
||||
exists(LibraryCallable callable |
|
||||
result = TLibraryCallable(callable) and
|
||||
call.getNode() = callable.getACall().getNode()
|
||||
)
|
||||
}
|
||||
|
||||
private newtype TReturnKind = TNormalReturnKind()
|
||||
|
||||
/**
|
||||
* A return kind. A return kind describes how a value can be returned
|
||||
* from a callable. For Python, this is simply a method return.
|
||||
*/
|
||||
class ReturnKind extends TReturnKind {
|
||||
/** Gets a textual representation of this element. */
|
||||
string toString() { result = "return" }
|
||||
}
|
||||
|
||||
/** A data flow node that represents a value returned by a callable. */
|
||||
abstract class ReturnNode extends Node {
|
||||
/** Gets the kind of this return node. */
|
||||
ReturnKind getKind() { any() }
|
||||
}
|
||||
|
||||
/** A data flow node that represents a value returned by a callable. */
|
||||
class ExtractedReturnNode extends ReturnNode, CfgNode {
|
||||
// See `TaintTrackingImplementation::returnFlowStep`
|
||||
ExtractedReturnNode() { node = any(Return ret).getValue().getAFlowNode() }
|
||||
|
||||
override ReturnKind getKind() { any() }
|
||||
}
|
||||
|
||||
/** A data-flow node that represents the output of a call. */
|
||||
abstract class OutNode extends Node {
|
||||
/** Gets the underlying call, where this node is a corresponding output of kind `kind`. */
|
||||
abstract DataFlowCall getCall(ReturnKind kind);
|
||||
}
|
||||
|
||||
private module OutNodes {
|
||||
/**
|
||||
* A data-flow node that reads a value returned directly by a callable.
|
||||
*/
|
||||
class ExprOutNode extends OutNode, ExprNode {
|
||||
private DataFlowCall call;
|
||||
|
||||
ExprOutNode() { call.(ExtractedDataFlowCall).getNode() = this.getNode() }
|
||||
|
||||
override DataFlowCall getCall(ReturnKind kind) {
|
||||
result = call and
|
||||
kind = kind
|
||||
}
|
||||
}
|
||||
|
||||
private class SummaryOutNode extends SummaryNode, OutNode {
|
||||
SummaryOutNode() { FlowSummaryImpl::Private::summaryOutNode(_, this, _) }
|
||||
|
||||
override DataFlowCall getCall(ReturnKind kind) {
|
||||
FlowSummaryImpl::Private::summaryOutNode(result, this, kind)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a node that can read the value returned from `call` with return kind
|
||||
* `kind`.
|
||||
*/
|
||||
OutNode getAnOutNode(DataFlowCall call, ReturnKind kind) { call = result.getCall(kind) }
|
||||
@@ -16,7 +16,7 @@ private import semmle.python.Frameworks
|
||||
// make it more digestible.
|
||||
import MatchUnpacking
|
||||
import IterableUnpacking
|
||||
import DataFlowDispatchPointsTo
|
||||
import DataFlowDispatch
|
||||
|
||||
/** Gets the callable in which this node occurs. */
|
||||
DataFlowCallable nodeGetEnclosingCallable(Node n) { result = n.getEnclosingCallable() }
|
||||
@@ -39,162 +39,267 @@ predicate isArgumentNode(ArgumentNode arg, DataFlowCall c, ArgumentPosition pos)
|
||||
//--------
|
||||
predicate isExpressionNode(ControlFlowNode node) { node.getNode() instanceof Expr }
|
||||
|
||||
/** DEPRECATED: Alias for `SyntheticPreUpdateNode` */
|
||||
deprecated module syntheticPreUpdateNode = SyntheticPreUpdateNode;
|
||||
// =============================================================================
|
||||
// SyntheticPreUpdateNode
|
||||
// =============================================================================
|
||||
class SyntheticPreUpdateNode extends Node, TSyntheticPreUpdateNode {
|
||||
CallNode node;
|
||||
|
||||
/** A module collecting the different reasons for synthesising a pre-update node. */
|
||||
module SyntheticPreUpdateNode {
|
||||
class SyntheticPreUpdateNode extends Node, TSyntheticPreUpdateNode {
|
||||
NeedsSyntheticPreUpdateNode post;
|
||||
SyntheticPreUpdateNode() { this = TSyntheticPreUpdateNode(node) }
|
||||
|
||||
SyntheticPreUpdateNode() { this = TSyntheticPreUpdateNode(post) }
|
||||
/** Gets the node for which this is a synthetic pre-update node. */
|
||||
CfgNode getPostUpdateNode() { result.getNode() = node }
|
||||
|
||||
/** Gets the node for which this is a synthetic pre-update node. */
|
||||
Node getPostUpdateNode() { result = post }
|
||||
override string toString() { result = "[pre] " + node.toString() }
|
||||
|
||||
override string toString() { result = "[pre " + post.label() + "] " + post.toString() }
|
||||
override Scope getScope() { result = node.getScope() }
|
||||
|
||||
override Scope getScope() { result = post.getScope() }
|
||||
|
||||
override Location getLocation() { result = post.getLocation() }
|
||||
}
|
||||
|
||||
/** A data flow node for which we should synthesise an associated pre-update node. */
|
||||
class NeedsSyntheticPreUpdateNode extends PostUpdateNode {
|
||||
NeedsSyntheticPreUpdateNode() { this = objectCreationNode() }
|
||||
|
||||
override Node getPreUpdateNode() { result.(SyntheticPreUpdateNode).getPostUpdateNode() = this }
|
||||
|
||||
/**
|
||||
* Gets the label for this kind of node. This will figure in the textual representation of the synthesized pre-update node.
|
||||
*
|
||||
* There is currently only one reason for needing a pre-update node, so we always use that as the label.
|
||||
*/
|
||||
string label() { result = "objCreate" }
|
||||
}
|
||||
|
||||
/**
|
||||
* Calls to constructors are treated as post-update nodes for the synthesized argument
|
||||
* that is mapped to the `self` parameter. That way, constructor calls represent the value of the
|
||||
* object after the constructor (currently only `__init__`) has run.
|
||||
*/
|
||||
CfgNode objectCreationNode() { result.getNode() = any(ClassCall c).getNode() }
|
||||
override Location getLocation() { result = node.getLocation() }
|
||||
}
|
||||
|
||||
import SyntheticPreUpdateNode
|
||||
// =============================================================================
|
||||
// *args (StarArgs) related
|
||||
// =============================================================================
|
||||
/**
|
||||
* A (synthetic) data-flow parameter node to capture all positional arguments that
|
||||
* should be passed to the `*args` parameter.
|
||||
*
|
||||
* To handle
|
||||
* ```py
|
||||
* def func(*args):
|
||||
* for arg in args:
|
||||
* sink(arg)
|
||||
*
|
||||
* func(source1, source2, ...)
|
||||
* ```
|
||||
*
|
||||
* we add a synthetic parameter to `func` that accepts any positional argument at (or
|
||||
* after) the index for the `*args` parameter. We add a store step (at any list index) to the real
|
||||
* `*args` parameter. This means we can handle the code above, but if the code had done `sink(args[0])`
|
||||
* we would (wrongly) add flow for `source2` as well.
|
||||
*
|
||||
* To solve this more precisely, we could add a synthetic argument with position `*args`
|
||||
* that had store steps with the correct index (like we do for mapping keyword arguments to a
|
||||
* `**kwargs` parameter). However, if a single call could go to 2 different
|
||||
* targets with `*args` parameters at different positions, as in the example below, it's unclear what
|
||||
* index to store `2` at. For the `foo` callable it should be 1, for the `bar` callable it should be 0.
|
||||
* So this information would need to be encoded in the arguments of a `ArgumentPosition` branch, and
|
||||
* one of the arguments would be which callable is the target. However, we cannot build `ArgumentPosition`
|
||||
* branches based on the call-graph, so this strategy doesn't work.
|
||||
*
|
||||
* Another approach to solving it precisely is to add multiple synthetic parameters that have store steps
|
||||
* to the real `*args` parameter. So for the example below, `foo` would need to have synthetic parameter
|
||||
* nodes for indexes 1 and 2 (which would have store step for index 0 and 1 of the `*args` parameter),
|
||||
* and `bar` would need it for indexes 1, 2, and 3. The question becomes how many synthetic parameters to
|
||||
* create, which _must_ be `max(Call call, int i | exists(call.getArg(i)))`, since (again) we can't base
|
||||
* this on the call-graph. And each function with a `*args` parameter would need this many extra synthetic
|
||||
* nodes. My gut feeling at that this simple approach will be good enough, but if we need to get it more
|
||||
* precise, it should be possible to do it like this.
|
||||
*
|
||||
* In PR review, @yoff suggested an alternative approach for more precise handling:
|
||||
*
|
||||
* - At the call site, all positional arguments are stored into a synthetic starArgs argument, always tarting at index 0
|
||||
* - This is sent to a synthetic star parameter
|
||||
* - At the receiving end, we know the offset of a potential real star parameter, so we can define read steps accordingly: In foo, we read from the synthetic star parameter at index 1 and store to the real star parameter at index 0.
|
||||
*
|
||||
* ```py
|
||||
* def foo(one, *args): ...
|
||||
* def bar(*args): ...
|
||||
*
|
||||
* func = foo if <cond> else bar
|
||||
* func(1, 2, 3)
|
||||
*/
|
||||
class SynthStarArgsElementParameterNode extends ParameterNodeImpl,
|
||||
TSynthStarArgsElementParameterNode {
|
||||
DataFlowCallable callable;
|
||||
|
||||
/** DEPRECATED: Alias for `SyntheticPostUpdateNode` */
|
||||
deprecated module syntheticPostUpdateNode = SyntheticPostUpdateNode;
|
||||
SynthStarArgsElementParameterNode() { this = TSynthStarArgsElementParameterNode(callable) }
|
||||
|
||||
/** A module collecting the different reasons for synthesising a post-update node. */
|
||||
module SyntheticPostUpdateNode {
|
||||
private import semmle.python.SpecialMethods
|
||||
override string toString() { result = "SynthStarArgsElementParameterNode" }
|
||||
|
||||
/** A post-update node is synthesized for all nodes which satisfy `NeedsSyntheticPostUpdateNode`. */
|
||||
class SyntheticPostUpdateNode extends PostUpdateNode, TSyntheticPostUpdateNode {
|
||||
NeedsSyntheticPostUpdateNode pre;
|
||||
override Scope getScope() { result = callable.getScope() }
|
||||
|
||||
SyntheticPostUpdateNode() { this = TSyntheticPostUpdateNode(pre) }
|
||||
override Location getLocation() { result = callable.getLocation() }
|
||||
|
||||
override Node getPreUpdateNode() { result = pre }
|
||||
|
||||
override string toString() { result = "[post " + pre.label() + "] " + pre.toString() }
|
||||
|
||||
override Scope getScope() { result = pre.getScope() }
|
||||
|
||||
override Location getLocation() { result = pre.getLocation() }
|
||||
}
|
||||
|
||||
/** A data flow node for which we should synthesise an associated post-update node. */
|
||||
class NeedsSyntheticPostUpdateNode extends Node {
|
||||
NeedsSyntheticPostUpdateNode() {
|
||||
this = argumentPreUpdateNode()
|
||||
or
|
||||
this = storePreUpdateNode()
|
||||
or
|
||||
this = readPreUpdateNode()
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the label for this kind of node. This will figure in the textual representation of the synthesized post-update node.
|
||||
* We favour being an arguments as the reason for the post-update node in case multiple reasons apply.
|
||||
*/
|
||||
string label() {
|
||||
if this = argumentPreUpdateNode()
|
||||
then result = "arg"
|
||||
else
|
||||
if this = storePreUpdateNode()
|
||||
then result = "store"
|
||||
else result = "read"
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the pre-update node for this node.
|
||||
*
|
||||
* An argument might have its value changed as a result of a call.
|
||||
* Certain arguments, such as implicit self arguments are already post-update nodes
|
||||
* and should not have an extra node synthesised.
|
||||
*/
|
||||
Node argumentPreUpdateNode() {
|
||||
result = any(FunctionCall c).getArg(_)
|
||||
or
|
||||
result = any(LambdaCall c).getArg(_)
|
||||
or
|
||||
// Avoid argument 0 of method calls as those have read post-update nodes.
|
||||
exists(MethodCall c, int n | n > 0 | result = c.getArg(n))
|
||||
or
|
||||
result = any(SpecialCall c).getArg(_)
|
||||
or
|
||||
// Avoid argument 0 of class calls as those have non-synthetic post-update nodes.
|
||||
exists(ClassCall c, int n | n > 0 | result = c.getArg(n))
|
||||
or
|
||||
// any argument of any call that we have not been able to resolve
|
||||
exists(CallNode call | not resolvedCall(call) |
|
||||
result.(CfgNode).getNode() in [call.getArg(_), call.getArgByName(_)]
|
||||
)
|
||||
}
|
||||
|
||||
/** Holds if `call` can be resolved as a normal call */
|
||||
private predicate resolvedCall(CallNode call) {
|
||||
call = any(DataFlowCallableValue cv).getACall()
|
||||
or
|
||||
call = any(DataFlowLambda l).getACall()
|
||||
}
|
||||
|
||||
/** Gets the pre-update node associated with a store. This is used for when an object might have its value changed after a store. */
|
||||
CfgNode storePreUpdateNode() {
|
||||
exists(Attribute a |
|
||||
result.getNode() = a.getObject().getAFlowNode() and
|
||||
a.getCtx() instanceof Store
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a node marking the state change of an object after a read.
|
||||
*
|
||||
* A reverse read happens when the result of a read is modified, e.g. in
|
||||
* ```python
|
||||
* l = [ mutable ]
|
||||
* l[0].mutate()
|
||||
* ```
|
||||
* we may now have changed the content of `l`. To track this, there must be
|
||||
* a postupdate node for `l`.
|
||||
*/
|
||||
CfgNode readPreUpdateNode() {
|
||||
exists(Attribute a |
|
||||
result.getNode() = a.getObject().getAFlowNode() and
|
||||
a.getCtx() instanceof Load
|
||||
)
|
||||
or
|
||||
result.getNode() = any(SubscriptNode s).getObject()
|
||||
or
|
||||
// The dictionary argument is read from if the callable has parameters matching the keys.
|
||||
result.getNode().getNode() = any(Call call).getKwargs()
|
||||
}
|
||||
override Parameter getParameter() { none() }
|
||||
}
|
||||
|
||||
import SyntheticPostUpdateNode
|
||||
predicate synthStarArgsElementParameterNodeStoreStep(
|
||||
SynthStarArgsElementParameterNode nodeFrom, ListElementContent c, ParameterNode nodeTo
|
||||
) {
|
||||
c = c and // suppress warning about unused parameter
|
||||
exists(DataFlowCallable callable, ParameterPosition ppos |
|
||||
nodeFrom = TSynthStarArgsElementParameterNode(callable) and
|
||||
nodeTo = callable.getParameter(ppos) and
|
||||
ppos.isStarArgs(_)
|
||||
)
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// **kwargs (DictSplat) related
|
||||
// =============================================================================
|
||||
/**
|
||||
* A (synthetic) data-flow node that represents all keyword arguments, as if they had
|
||||
* been passed in a `**kwargs` argument.
|
||||
*/
|
||||
class SynthDictSplatArgumentNode extends Node, TSynthDictSplatArgumentNode {
|
||||
CallNode node;
|
||||
|
||||
SynthDictSplatArgumentNode() { this = TSynthDictSplatArgumentNode(node) }
|
||||
|
||||
override string toString() { result = "SynthDictSplatArgumentNode" }
|
||||
|
||||
override Scope getScope() { result = node.getScope() }
|
||||
|
||||
override Location getLocation() { result = node.getLocation() }
|
||||
}
|
||||
|
||||
private predicate synthDictSplatArgumentNodeStoreStep(
|
||||
ArgumentNode nodeFrom, DictionaryElementContent c, SynthDictSplatArgumentNode nodeTo
|
||||
) {
|
||||
exists(string name, CallNode call, ArgumentPosition keywordPos |
|
||||
nodeTo = TSynthDictSplatArgumentNode(call) and
|
||||
getCallArg(call, _, _, nodeFrom, keywordPos) and
|
||||
keywordPos.isKeyword(name) and
|
||||
c.getKey() = name
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Ensures that the a `**kwargs` parameter will not contain elements with names of
|
||||
* keyword parameters.
|
||||
*
|
||||
* For example, for the function below, it's not possible that the `kwargs` dictionary
|
||||
* can contain an element with the name `a`, since that parameter can be given as a
|
||||
* keyword argument.
|
||||
*
|
||||
* ```py
|
||||
* def func(a, **kwargs):
|
||||
* ...
|
||||
* ```
|
||||
*/
|
||||
private predicate dictSplatParameterNodeClearStep(ParameterNode n, DictionaryElementContent c) {
|
||||
exists(DataFlowCallable callable, ParameterPosition dictSplatPos, ParameterPosition keywordPos |
|
||||
dictSplatPos.isDictSplat() and
|
||||
(
|
||||
n.getParameter() = callable.(DataFlowFunction).getScope().getKwarg()
|
||||
or
|
||||
n = TSummaryParameterNode(callable.asLibraryCallable(), dictSplatPos)
|
||||
) and
|
||||
exists(callable.getParameter(keywordPos)) and
|
||||
keywordPos.isKeyword(c.getKey())
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* A synthetic data-flow node to allow flow to keyword parameters from a `**kwargs` argument.
|
||||
*
|
||||
* Take the code snippet below as an example. Since the call only has a `**kwargs` argument,
|
||||
* with a `**` argument position, we add this synthetic parameter node with `**` parameter position,
|
||||
* and a read step to the `p1` parameter.
|
||||
*
|
||||
* ```py
|
||||
* def foo(p1, p2): ...
|
||||
*
|
||||
* kwargs = {"p1": 42, "p2": 43}
|
||||
* foo(**kwargs)
|
||||
* ```
|
||||
*
|
||||
*
|
||||
* Note that this will introduce a bit of redundancy in cases like
|
||||
*
|
||||
* ```py
|
||||
* foo(p1=taint(1), p2=taint(2))
|
||||
* ```
|
||||
*
|
||||
* where direct keyword matching is possible, since we construct a synthesized dict
|
||||
* splat argument (`SynthDictSplatArgumentNode`) at the call site, which means that
|
||||
* `taint(1)` will flow into `p1` both via normal keyword matching and via the synthesized
|
||||
* nodes (and similarly for `p2`). However, this redundancy is OK since
|
||||
* (a) it means that type-tracking through keyword arguments also works in most cases,
|
||||
* (b) read/store steps can be avoided when direct keyword matching is possible, and
|
||||
* hence access path limits are not a concern, and
|
||||
* (c) since the synthesized nodes are hidden, the reported data-flow paths will be
|
||||
* collapsed anyway.
|
||||
*/
|
||||
class SynthDictSplatParameterNode extends ParameterNodeImpl, TSynthDictSplatParameterNode {
|
||||
DataFlowCallable callable;
|
||||
|
||||
SynthDictSplatParameterNode() { this = TSynthDictSplatParameterNode(callable) }
|
||||
|
||||
override string toString() { result = "SynthDictSplatParameterNode" }
|
||||
|
||||
override Scope getScope() { result = callable.getScope() }
|
||||
|
||||
override Location getLocation() { result = callable.getLocation() }
|
||||
|
||||
override Parameter getParameter() { none() }
|
||||
}
|
||||
|
||||
/**
|
||||
* Flow step from the synthetic `**kwargs` parameter to the real `**kwargs` parameter.
|
||||
* Due to restriction in dataflow library, we can only give one of them as result for
|
||||
* `DataFlowCallable.getParameter`, so this is a workaround to ensure there is flow to
|
||||
* _both_ of them.
|
||||
*/
|
||||
private predicate dictSplatParameterNodeFlowStep(
|
||||
ParameterNodeImpl nodeFrom, ParameterNodeImpl nodeTo
|
||||
) {
|
||||
exists(DataFlowCallable callable |
|
||||
nodeFrom = TSynthDictSplatParameterNode(callable) and
|
||||
(
|
||||
nodeTo.getParameter() = callable.(DataFlowFunction).getScope().getKwarg()
|
||||
or
|
||||
exists(ParameterPosition pos |
|
||||
nodeTo = TSummaryParameterNode(callable.asLibraryCallable(), pos) and
|
||||
pos.isDictSplat()
|
||||
)
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads from the synthetic **kwargs parameter to each keyword parameter.
|
||||
*/
|
||||
predicate synthDictSplatParameterNodeReadStep(
|
||||
SynthDictSplatParameterNode nodeFrom, DictionaryElementContent c, ParameterNode nodeTo
|
||||
) {
|
||||
exists(DataFlowCallable callable, ParameterPosition ppos |
|
||||
nodeFrom = TSynthDictSplatParameterNode(callable) and
|
||||
nodeTo = callable.getParameter(ppos) and
|
||||
ppos.isKeyword(c.getKey())
|
||||
)
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// PostUpdateNode
|
||||
// =============================================================================
|
||||
abstract class PostUpdateNodeImpl extends Node {
|
||||
/** Gets the node before the state update. */
|
||||
abstract Node getPreUpdateNode();
|
||||
}
|
||||
|
||||
class SyntheticPostUpdateNode extends PostUpdateNodeImpl, TSyntheticPostUpdateNode {
|
||||
ControlFlowNode node;
|
||||
|
||||
SyntheticPostUpdateNode() { this = TSyntheticPostUpdateNode(node) }
|
||||
|
||||
override Node getPreUpdateNode() { result.(CfgNode).getNode() = node }
|
||||
|
||||
override string toString() { result = "[post] " + node.toString() }
|
||||
|
||||
override Scope getScope() { result = node.getScope() }
|
||||
|
||||
override Location getLocation() { result = node.getLocation() }
|
||||
}
|
||||
|
||||
class NonSyntheticPostUpdateNode extends PostUpdateNodeImpl, CfgNode {
|
||||
SyntheticPreUpdateNode pre;
|
||||
|
||||
NonSyntheticPostUpdateNode() { this = pre.getPostUpdateNode() }
|
||||
|
||||
override Node getPreUpdateNode() { result = pre }
|
||||
}
|
||||
|
||||
class DataFlowExpr = Expr;
|
||||
|
||||
@@ -274,13 +379,6 @@ module EssaFlow {
|
||||
iterableUnpackingFlowStep(nodeFrom, nodeTo)
|
||||
or
|
||||
matchFlowStep(nodeFrom, nodeTo)
|
||||
or
|
||||
// Overflow keyword argument
|
||||
exists(CallNode call, CallableValue callable |
|
||||
call = callable.getACall() and
|
||||
nodeTo = TKwOverflowNode(call, callable) and
|
||||
nodeFrom.asCfgNode() = call.getNode().getKwargs().getAFlowNode()
|
||||
)
|
||||
}
|
||||
|
||||
predicate useToNextUse(NameNode nodeFrom, NameNode nodeTo) {
|
||||
@@ -305,6 +403,8 @@ predicate simpleLocalFlowStep(Node nodeFrom, Node nodeTo) {
|
||||
simpleLocalFlowStepForTypetracking(nodeFrom, nodeTo)
|
||||
or
|
||||
summaryFlowSteps(nodeFrom, nodeTo)
|
||||
or
|
||||
dictSplatParameterNodeFlowStep(nodeFrom, nodeTo)
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -521,15 +621,15 @@ predicate storeStep(Node nodeFrom, Content c, Node nodeTo) {
|
||||
or
|
||||
attributeStoreStep(nodeFrom, c, nodeTo)
|
||||
or
|
||||
posOverflowStoreStep(nodeFrom, c, nodeTo)
|
||||
or
|
||||
kwOverflowStoreStep(nodeFrom, c, nodeTo)
|
||||
or
|
||||
matchStoreStep(nodeFrom, c, nodeTo)
|
||||
or
|
||||
any(Orm::AdditionalOrmSteps es).storeStep(nodeFrom, c, nodeTo)
|
||||
or
|
||||
FlowSummaryImpl::Private::Steps::summaryStoreStep(nodeFrom, c, nodeTo)
|
||||
or
|
||||
synthStarArgsElementParameterNodeStoreStep(nodeFrom, c, nodeTo)
|
||||
or
|
||||
synthDictSplatArgumentNodeStoreStep(nodeFrom, c, nodeTo)
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -669,30 +769,6 @@ predicate attributeStoreStep(Node nodeFrom, AttributeContent c, PostUpdateNode n
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `nodeFrom` flows into the synthesized positional overflow argument (`nodeTo`)
|
||||
* at the position indicated by `c`.
|
||||
*/
|
||||
predicate posOverflowStoreStep(CfgNode nodeFrom, TupleElementContent c, Node nodeTo) {
|
||||
exists(CallNode call, CallableValue callable, int n |
|
||||
nodeFrom.asCfgNode() = getPositionalOverflowArg(call, callable, n) and
|
||||
nodeTo = TPosOverflowNode(call, callable) and
|
||||
c.getIndex() = n
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `nodeFrom` flows into the synthesized keyword overflow argument (`nodeTo`)
|
||||
* at the key indicated by `c`.
|
||||
*/
|
||||
predicate kwOverflowStoreStep(CfgNode nodeFrom, DictionaryElementContent c, Node nodeTo) {
|
||||
exists(CallNode call, CallableValue callable, string key |
|
||||
nodeFrom.asCfgNode() = getKeywordOverflowArg(call, callable, key) and
|
||||
nodeTo = TKwOverflowNode(call, callable) and
|
||||
c.getKey() = key
|
||||
)
|
||||
}
|
||||
|
||||
predicate defaultValueFlowStep(CfgNode nodeFrom, CfgNode nodeTo) {
|
||||
exists(Function f, Parameter p, ParameterDefinition def |
|
||||
// `getArgByName` supports, unlike `getAnArg`, keyword-only parameters
|
||||
@@ -722,9 +798,9 @@ predicate readStep(Node nodeFrom, Content c, Node nodeTo) {
|
||||
or
|
||||
attributeReadStep(nodeFrom, c, nodeTo)
|
||||
or
|
||||
kwUnpackReadStep(nodeFrom, c, nodeTo)
|
||||
or
|
||||
FlowSummaryImpl::Private::Steps::summaryReadStep(nodeFrom, c, nodeTo)
|
||||
or
|
||||
synthDictSplatParameterNodeReadStep(nodeFrom, c, nodeTo)
|
||||
}
|
||||
|
||||
/** Data flows from a sequence to a subscript of the sequence. */
|
||||
@@ -814,43 +890,19 @@ predicate attributeReadStep(Node nodeFrom, AttributeContent c, AttrRead nodeTo)
|
||||
nodeTo.accesses(nodeFrom, c.getAttribute())
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `nodeFrom` is a dictionary argument being unpacked and `nodeTo` is the
|
||||
* synthesized unpacked argument with the name indicated by `c`.
|
||||
*/
|
||||
predicate kwUnpackReadStep(CfgNode nodeFrom, DictionaryElementContent c, Node nodeTo) {
|
||||
exists(CallNode call, string name |
|
||||
nodeFrom.asCfgNode() = call.getNode().getKwargs().getAFlowNode() and
|
||||
nodeTo = TKwUnpackedNode(call, _, name) and
|
||||
name = c.getKey()
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Clear content at key `name` of the synthesized dictionary `TKwOverflowNode(call, callable)`,
|
||||
* whenever `call` unpacks `name`.
|
||||
*/
|
||||
predicate kwOverflowClearStep(Node n, Content c) {
|
||||
exists(CallNode call, CallableValue callable, string name |
|
||||
call_unpacks(call, _, callable, name, _) and
|
||||
n = TKwOverflowNode(call, callable) and
|
||||
c.(DictionaryElementContent).getKey() = name
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if values stored inside content `c` are cleared at node `n`. For example,
|
||||
* any value stored inside `f` is cleared at the pre-update node associated with `x`
|
||||
* in `x.f = newValue`.
|
||||
*/
|
||||
predicate clearsContent(Node n, Content c) {
|
||||
kwOverflowClearStep(n, c)
|
||||
or
|
||||
matchClearStep(n, c)
|
||||
or
|
||||
attributeClearStep(n, c)
|
||||
or
|
||||
FlowSummaryImpl::Private::Steps::summaryClearsContent(n, c)
|
||||
or
|
||||
dictSplatParameterNodeClearStep(n, c)
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -906,23 +958,24 @@ predicate nodeIsHidden(Node n) {
|
||||
n instanceof SummaryNode
|
||||
or
|
||||
n instanceof SummaryParameterNode
|
||||
or
|
||||
n instanceof SynthStarArgsElementParameterNode
|
||||
or
|
||||
n instanceof SynthDictSplatArgumentNode
|
||||
or
|
||||
n instanceof SynthDictSplatParameterNode
|
||||
}
|
||||
|
||||
class LambdaCallKind = Unit;
|
||||
|
||||
/** Holds if `creation` is an expression that creates a lambda of kind `kind` for `c`. */
|
||||
predicate lambdaCreation(Node creation, LambdaCallKind kind, DataFlowCallable c) {
|
||||
// lambda
|
||||
// lambda and plain functions
|
||||
kind = kind and
|
||||
creation.asExpr() = c.(DataFlowLambda).getDefinition()
|
||||
or
|
||||
// normal function
|
||||
exists(FunctionDef def |
|
||||
def.defines(creation.asVar().getSourceVariable()) and
|
||||
def.getDefinedFunction() = c.(DataFlowCallableValue).getCallableValue().getScope()
|
||||
)
|
||||
creation.asExpr() = c.(DataFlowPlainFunction).getScope().getDefinition()
|
||||
or
|
||||
// summarized function
|
||||
exists(kind) and // avoid warning on unused 'kind'
|
||||
exists(Call call |
|
||||
creation.asExpr() = call.getAnArg() and
|
||||
creation = c.(LibraryCallableValue).getACallback()
|
||||
|
||||
@@ -31,10 +31,44 @@ newtype TNode =
|
||||
or
|
||||
node.getNode() instanceof Pattern
|
||||
} or
|
||||
/** A synthetic node representing the value of an object before a state change */
|
||||
TSyntheticPreUpdateNode(NeedsSyntheticPreUpdateNode post) or
|
||||
/** A synthetic node representing the value of an object after a state change. */
|
||||
TSyntheticPostUpdateNode(NeedsSyntheticPostUpdateNode pre) or
|
||||
/**
|
||||
* A synthetic node representing the value of an object before a state change.
|
||||
*
|
||||
* For class calls we pass a synthetic self argument, so attribute writes in
|
||||
* `__init__` is reflected on the resulting object (we need special logic for this
|
||||
* since there is no `return` in `__init__`)
|
||||
*/
|
||||
// NOTE: since we can't rely on the call graph, but we want to have synthetic
|
||||
// pre-update nodes for class calls, we end up getting synthetic pre-update nodes for
|
||||
// ALL calls :|
|
||||
TSyntheticPreUpdateNode(CallNode call) or
|
||||
/**
|
||||
* A synthetic node representing the value of an object after a state change.
|
||||
* See QLDoc for `PostUpdateNode`.
|
||||
*/
|
||||
TSyntheticPostUpdateNode(ControlFlowNode node) {
|
||||
exists(CallNode call |
|
||||
node = call.getArg(_)
|
||||
or
|
||||
node = call.getArgByName(_)
|
||||
or
|
||||
// `self` argument when handling class instance calls (`__call__` special method))
|
||||
node = call.getFunction()
|
||||
)
|
||||
or
|
||||
node = any(AttrNode a).getObject()
|
||||
or
|
||||
node = any(SubscriptNode s).getObject()
|
||||
or
|
||||
// self parameter when used implicitly in `super()`
|
||||
exists(Class cls, Function func, ParameterDefinition def |
|
||||
func = cls.getAMethod() and
|
||||
not isStaticmethod(func) and
|
||||
// this matches what we do in ExtractedParameterNode
|
||||
def.getDefiningNode() = node and
|
||||
def.getParameter() = func.getArg(0)
|
||||
)
|
||||
} or
|
||||
/** A node representing a global (module-level) variable in a specific module. */
|
||||
TModuleVariableNode(Module m, GlobalVariable v) {
|
||||
v.getScope() = m and
|
||||
@@ -45,37 +79,6 @@ newtype TNode =
|
||||
ImportStar::globalNameDefinedInModule(v.getId(), m)
|
||||
)
|
||||
} or
|
||||
/**
|
||||
* A node representing the overflow positional arguments to a call.
|
||||
* That is, `call` contains more positional arguments than there are
|
||||
* positional parameters in `callable`. The extra ones are passed as
|
||||
* a tuple to a starred parameter; this synthetic node represents that tuple.
|
||||
*/
|
||||
TPosOverflowNode(CallNode call, CallableValue callable) {
|
||||
exists(getPositionalOverflowArg(call, callable, _))
|
||||
} or
|
||||
/**
|
||||
* A node representing the overflow keyword arguments to a call.
|
||||
* That is, `call` contains keyword arguments for keys that do not have
|
||||
* keyword parameters in `callable`. These extra ones are passed as
|
||||
* a dictionary to a doubly starred parameter; this synthetic node
|
||||
* represents that dictionary.
|
||||
*/
|
||||
TKwOverflowNode(CallNode call, CallableValue callable) {
|
||||
exists(getKeywordOverflowArg(call, callable, _))
|
||||
or
|
||||
ArgumentPassing::connects(call, callable) and
|
||||
exists(call.getNode().getKwargs()) and
|
||||
callable.getScope().hasKwArg()
|
||||
} or
|
||||
/**
|
||||
* A node representing an unpacked element of a dictionary argument.
|
||||
* That is, `call` contains argument `**{"foo": bar}` which is passed
|
||||
* to parameter `foo` of `callable`.
|
||||
*/
|
||||
TKwUnpackedNode(CallNode call, CallableValue callable, string name) {
|
||||
call_unpacks(call, _, callable, name, _)
|
||||
} or
|
||||
/**
|
||||
* A synthetic node representing that an iterable sequence flows to consumer.
|
||||
*/
|
||||
@@ -109,10 +112,18 @@ newtype TNode =
|
||||
} or
|
||||
TSummaryParameterNode(FlowSummaryImpl::Public::SummarizedCallable c, ParameterPosition pos) {
|
||||
FlowSummaryImpl::Private::summaryParameterNodeRange(c, pos)
|
||||
} or
|
||||
/** A synthetic node to capture positional arguments that are passed to a `*args` parameter. */
|
||||
TSynthStarArgsElementParameterNode(DataFlowCallable callable) {
|
||||
exists(ParameterPosition ppos | ppos.isStarArgs(_) | exists(callable.getParameter(ppos)))
|
||||
} or
|
||||
/** A synthetic node to capture keyword arguments that are passed to a `**kwargs` parameter. */
|
||||
TSynthDictSplatArgumentNode(CallNode call) { exists(call.getArgByName(_)) } or
|
||||
/** A synthetic node to allow flow to keyword parameters from a `**kwargs` argument. */
|
||||
TSynthDictSplatParameterNode(DataFlowCallable callable) {
|
||||
exists(ParameterPosition ppos | ppos.isKeyword(_) | exists(callable.getParameter(ppos)))
|
||||
}
|
||||
|
||||
class TParameterNode = TCfgNode or TSummaryParameterNode;
|
||||
|
||||
/** Helper for `Node::getEnclosingCallable`. */
|
||||
private DataFlowCallable getCallableScope(Scope s) {
|
||||
result.getScope() = s
|
||||
@@ -288,7 +299,7 @@ ExprNode exprNode(DataFlowExpr e) { result.getNode().getNode() = e }
|
||||
* The value of a parameter at function entry, viewed as a node in a data
|
||||
* flow graph.
|
||||
*/
|
||||
class ParameterNode extends Node, TParameterNode instanceof ParameterNodeImpl {
|
||||
class ParameterNode extends Node instanceof ParameterNodeImpl {
|
||||
/** Gets the parameter corresponding to this node, if any. */
|
||||
final Parameter getParameter() { result = super.getParameter() }
|
||||
}
|
||||
@@ -298,18 +309,8 @@ class ExtractedParameterNode extends ParameterNodeImpl, CfgNode {
|
||||
//, LocalSourceNode {
|
||||
ParameterDefinition def;
|
||||
|
||||
ExtractedParameterNode() {
|
||||
node = def.getDefiningNode() and
|
||||
// Disregard parameters that we cannot resolve
|
||||
// TODO: Make this unnecessary
|
||||
exists(DataFlowCallable c | node = c.getParameter(_))
|
||||
}
|
||||
ExtractedParameterNode() { node = def.getDefiningNode() }
|
||||
|
||||
override predicate isParameterOf(DataFlowCallable c, int i) { node = c.getParameter(i) }
|
||||
|
||||
override DataFlowCallable getEnclosingCallable() { this.isParameterOf(result, _) }
|
||||
|
||||
/** Gets the `Parameter` this `ParameterNode` represents. */
|
||||
override Parameter getParameter() { result = def.getParameter() }
|
||||
}
|
||||
|
||||
@@ -327,16 +328,24 @@ abstract class ArgumentNode extends Node {
|
||||
final ExtractedDataFlowCall getCall() { this.argumentOf(result, _) }
|
||||
}
|
||||
|
||||
/** A data flow node that represents a call argument found in the source code. */
|
||||
/**
|
||||
* A data flow node that represents a call argument found in the source code.
|
||||
*/
|
||||
class ExtractedArgumentNode extends ArgumentNode {
|
||||
ExtractedArgumentNode() { this = any(ExtractedDataFlowCall c).getArg(_) }
|
||||
|
||||
final override predicate argumentOf(DataFlowCall call, ArgumentPosition pos) {
|
||||
this.extractedArgumentOf(call, pos)
|
||||
ExtractedArgumentNode() {
|
||||
// for resolved calls, we need to allow all argument nodes
|
||||
getCallArg(_, _, _, this, _)
|
||||
or
|
||||
// for potential summaries we allow all normal call arguments
|
||||
normalCallArg(_, this, _)
|
||||
or
|
||||
// and self arguments
|
||||
this.asCfgNode() = any(CallNode c).getFunction().(AttrNode).getObject()
|
||||
}
|
||||
|
||||
predicate extractedArgumentOf(ExtractedDataFlowCall call, ArgumentPosition pos) {
|
||||
this = call.getArg(pos)
|
||||
final override predicate argumentOf(DataFlowCall call, ArgumentPosition pos) {
|
||||
this = call.getArgument(pos) and
|
||||
call instanceof ExtractedDataFlowCall
|
||||
}
|
||||
}
|
||||
|
||||
@@ -345,16 +354,17 @@ class ExtractedArgumentNode extends ArgumentNode {
|
||||
* changed its state.
|
||||
*
|
||||
* This can be either the argument to a callable after the callable returns
|
||||
* (which might have mutated the argument), or the qualifier of a field after
|
||||
* an update to the field.
|
||||
* (which might have mutated the argument), the qualifier of a field after
|
||||
* an update to the field, or a container such as a list/dictionary after an element
|
||||
* update.
|
||||
*
|
||||
* Nodes corresponding to AST elements, for example `ExprNode`s, usually refer
|
||||
* to the value before the update with the exception of `ObjectCreationNode`s,
|
||||
* to the value before the update with the exception of class calls,
|
||||
* which represents the value _after_ the constructor has run.
|
||||
*/
|
||||
abstract class PostUpdateNode extends Node {
|
||||
class PostUpdateNode extends Node instanceof PostUpdateNodeImpl {
|
||||
/** Gets the node before the state update. */
|
||||
abstract Node getPreUpdateNode();
|
||||
Node getPreUpdateNode() { result = super.getPreUpdateNode() }
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -448,70 +458,6 @@ private predicate resolved_import_star_module(Module m, string name, Node n) {
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* The node holding the extra positional arguments to a call. This node is passed as a tuple
|
||||
* to the starred parameter of the callable.
|
||||
*/
|
||||
class PosOverflowNode extends Node, TPosOverflowNode {
|
||||
CallNode call;
|
||||
|
||||
PosOverflowNode() { this = TPosOverflowNode(call, _) }
|
||||
|
||||
override string toString() { result = "PosOverflowNode for " + call.getNode().toString() }
|
||||
|
||||
override DataFlowCallable getEnclosingCallable() {
|
||||
exists(Node node |
|
||||
node = TCfgNode(call) and
|
||||
result = node.getEnclosingCallable()
|
||||
)
|
||||
}
|
||||
|
||||
override Location getLocation() { result = call.getLocation() }
|
||||
}
|
||||
|
||||
/**
|
||||
* The node holding the extra keyword arguments to a call. This node is passed as a dictionary
|
||||
* to the doubly starred parameter of the callable.
|
||||
*/
|
||||
class KwOverflowNode extends Node, TKwOverflowNode {
|
||||
CallNode call;
|
||||
|
||||
KwOverflowNode() { this = TKwOverflowNode(call, _) }
|
||||
|
||||
override string toString() { result = "KwOverflowNode for " + call.getNode().toString() }
|
||||
|
||||
override DataFlowCallable getEnclosingCallable() {
|
||||
exists(Node node |
|
||||
node = TCfgNode(call) and
|
||||
result = node.getEnclosingCallable()
|
||||
)
|
||||
}
|
||||
|
||||
override Location getLocation() { result = call.getLocation() }
|
||||
}
|
||||
|
||||
/**
|
||||
* The node representing the synthetic argument of a call that is unpacked from a dictionary
|
||||
* argument.
|
||||
*/
|
||||
class KwUnpackedNode extends Node, TKwUnpackedNode {
|
||||
CallNode call;
|
||||
string name;
|
||||
|
||||
KwUnpackedNode() { this = TKwUnpackedNode(call, _, name) }
|
||||
|
||||
override string toString() { result = "KwUnpacked " + name }
|
||||
|
||||
override DataFlowCallable getEnclosingCallable() {
|
||||
exists(Node node |
|
||||
node = TCfgNode(call) and
|
||||
result = node.getEnclosingCallable()
|
||||
)
|
||||
}
|
||||
|
||||
override Location getLocation() { result = call.getLocation() }
|
||||
}
|
||||
|
||||
/**
|
||||
* A synthetic node representing an iterable sequence. Used for changing content type
|
||||
* for instance from a `ListElement` to a `TupleElement`, especially if the content is
|
||||
|
||||
@@ -61,11 +61,11 @@ bindingset[c, rk]
|
||||
DataFlowType getReturnType(SummarizedCallable c, ReturnKind rk) { any() }
|
||||
|
||||
/**
|
||||
* Gets the type of the `i`th parameter in a synthesized call that targets a
|
||||
* callback of type `t`.
|
||||
* Gets the type of the parameter matching arguments at position `pos` in a
|
||||
* synthesized call that targets a callback of type `t`.
|
||||
*/
|
||||
bindingset[t, i]
|
||||
DataFlowType getCallbackParameterType(DataFlowType t, int i) { any() }
|
||||
bindingset[t, pos]
|
||||
DataFlowType getCallbackParameterType(DataFlowType t, ArgumentPosition pos) { any() }
|
||||
|
||||
/**
|
||||
* Gets the return type of kind `rk` in a synthesized call that targets a
|
||||
@@ -114,10 +114,34 @@ string getComponentSpecific(SummaryComponent sc) {
|
||||
}
|
||||
|
||||
/** Gets the textual representation of a parameter position in the format used for flow summaries. */
|
||||
string getParameterPosition(ParameterPosition pos) { result = pos.toString() }
|
||||
string getParameterPosition(ParameterPosition pos) {
|
||||
pos.isSelf() and result = "self"
|
||||
or
|
||||
exists(int i |
|
||||
pos.isPositional(i) and
|
||||
result = i.toString()
|
||||
)
|
||||
or
|
||||
exists(string name |
|
||||
pos.isKeyword(name) and
|
||||
result = name + ":"
|
||||
)
|
||||
}
|
||||
|
||||
/** Gets the textual representation of an argument position in the format used for flow summaries. */
|
||||
string getArgumentPosition(ArgumentPosition pos) { result = pos.toString() }
|
||||
string getArgumentPosition(ArgumentPosition pos) {
|
||||
pos.isSelf() and result = "self"
|
||||
or
|
||||
exists(int i |
|
||||
pos.isPositional(i) and
|
||||
result = i.toString()
|
||||
)
|
||||
or
|
||||
exists(string name |
|
||||
pos.isKeyword(name) and
|
||||
result = name + ":"
|
||||
)
|
||||
}
|
||||
|
||||
/** Holds if input specification component `c` needs a reference. */
|
||||
predicate inputNeedsReferenceSpecific(string c) { none() }
|
||||
@@ -197,29 +221,55 @@ module ParsePositions {
|
||||
)
|
||||
}
|
||||
|
||||
predicate isParsedParameterPosition(string c, int i) {
|
||||
predicate isParsedPositionalParameterPosition(string c, int i) {
|
||||
isParamBody(c) and
|
||||
i = AccessPath::parseInt(c)
|
||||
}
|
||||
|
||||
predicate isParsedArgumentPosition(string c, int i) {
|
||||
predicate isParsedKeywordParameterPosition(string c, string paramName) {
|
||||
isParamBody(c) and
|
||||
c = paramName + ":"
|
||||
}
|
||||
|
||||
predicate isParsedPositionalArgumentPosition(string c, int i) {
|
||||
isArgBody(c) and
|
||||
i = AccessPath::parseInt(c)
|
||||
}
|
||||
|
||||
predicate isParsedKeywordArgumentPosition(string c, string argName) {
|
||||
isArgBody(c) and
|
||||
c = argName + ":"
|
||||
}
|
||||
}
|
||||
|
||||
/** Gets the argument position obtained by parsing `X` in `Parameter[X]`. */
|
||||
ArgumentPosition parseParamBody(string s) {
|
||||
exists(int i |
|
||||
ParsePositions::isParsedParameterPosition(s, i) and
|
||||
ParsePositions::isParsedPositionalParameterPosition(s, i) and
|
||||
result.isPositional(i)
|
||||
)
|
||||
or
|
||||
exists(string name |
|
||||
ParsePositions::isParsedKeywordParameterPosition(s, name) and
|
||||
result.isKeyword(name)
|
||||
)
|
||||
or
|
||||
s = "self" and
|
||||
result.isSelf()
|
||||
}
|
||||
|
||||
/** Gets the parameter position obtained by parsing `X` in `Argument[X]`. */
|
||||
ParameterPosition parseArgBody(string s) {
|
||||
exists(int i |
|
||||
ParsePositions::isParsedArgumentPosition(s, i) and
|
||||
ParsePositions::isParsedPositionalArgumentPosition(s, i) and
|
||||
result.isPositional(i)
|
||||
)
|
||||
or
|
||||
exists(string name |
|
||||
ParsePositions::isParsedKeywordArgumentPosition(s, name) and
|
||||
result.isKeyword(name)
|
||||
)
|
||||
or
|
||||
s = "self" and
|
||||
result.isSelf()
|
||||
}
|
||||
|
||||
@@ -60,22 +60,6 @@ string getPossibleContentName() {
|
||||
result = any(DataFlowPublic::AttrRef a).getAttributeName()
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a callable for the call where `nodeFrom` is used as the `i`'th argument.
|
||||
*
|
||||
* Helper predicate to avoid bad join order experienced in `callStep`.
|
||||
* This happened when `isParameterOf` was joined _before_ `getCallable`.
|
||||
*/
|
||||
pragma[nomagic]
|
||||
private DataFlowPrivate::DataFlowCallable getCallableForArgument(
|
||||
DataFlowPublic::ExtractedArgumentNode nodeFrom, int i
|
||||
) {
|
||||
exists(DataFlowPrivate::ExtractedDataFlowCall call |
|
||||
nodeFrom.extractedArgumentOf(call, i) and
|
||||
result = call.getCallable()
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `nodeFrom` steps to `nodeTo` by being passed as a parameter in a call.
|
||||
*
|
||||
@@ -83,11 +67,15 @@ private DataFlowPrivate::DataFlowCallable getCallableForArgument(
|
||||
* recursion (or, at best, terrible performance), since identifying calls to library
|
||||
* methods is done using API graphs (which uses type tracking).
|
||||
*/
|
||||
predicate callStep(DataFlowPublic::ArgumentNode nodeFrom, DataFlowPrivate::ParameterNodeImpl nodeTo) {
|
||||
// TODO: Support special methods?
|
||||
exists(DataFlowPrivate::DataFlowCallable callable, int i |
|
||||
callable = getCallableForArgument(nodeFrom, i) and
|
||||
nodeTo.isParameterOf(callable, i)
|
||||
predicate callStep(DataFlowPublic::ArgumentNode nodeFrom, DataFlowPublic::ParameterNode nodeTo) {
|
||||
exists(
|
||||
DataFlowPrivate::DataFlowCall call, DataFlowPrivate::DataFlowCallable callable,
|
||||
DataFlowPrivate::ArgumentPosition apos, DataFlowPrivate::ParameterPosition ppos
|
||||
|
|
||||
nodeFrom = call.getArgument(apos) and
|
||||
nodeTo = callable.getParameter(ppos) and
|
||||
DataFlowPrivate::parameterMatch(ppos, apos) and
|
||||
callable = call.getCallable()
|
||||
)
|
||||
}
|
||||
|
||||
|
||||
@@ -1465,7 +1465,19 @@ private module StdlibPrivate {
|
||||
t.start() and
|
||||
result = openCall and
|
||||
(
|
||||
openCall instanceof OpenCall
|
||||
openCall instanceof OpenCall and
|
||||
// don't include the open call inside of Path.open in pathlib.py since
|
||||
// the call to `path_obj.open` is covered by `PathLibOpenCall`.
|
||||
not exists(Module mod, Class cls, Function func |
|
||||
openCall.(OpenCall).asCfgNode().getScope() = func and
|
||||
func.getName() = "open" and
|
||||
func.getScope() = cls and
|
||||
cls.getName() = "Path" and
|
||||
cls.getScope() = mod and
|
||||
mod.getName() = "pathlib" and
|
||||
// do allow this call if we're analyzing pathlib.py as part of CPython though
|
||||
not exists(mod.getFile().getRelativePath())
|
||||
)
|
||||
or
|
||||
openCall instanceof PathLibOpenCall
|
||||
)
|
||||
|
||||
@@ -93,6 +93,8 @@ module Stages {
|
||||
exists(PyFlow::DefinitionNode b)
|
||||
or
|
||||
exists(any(PyFlow::SequenceNode n).getElement(_))
|
||||
or
|
||||
exists(any(PyFlow::ControlFlowNode c).toString())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -125,6 +127,45 @@ module Stages {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* The points-to stage.
|
||||
*/
|
||||
cached
|
||||
module PointsTo {
|
||||
/**
|
||||
* Always holds.
|
||||
* Ensures that a predicate is evaluated as part of the points-to stage.
|
||||
*/
|
||||
cached
|
||||
predicate ref() { 1 = 1 }
|
||||
|
||||
private import semmle.python.pointsto.Base as PointsToBase
|
||||
private import semmle.python.types.Object as TypeObject
|
||||
private import semmle.python.objects.TObject as TObject
|
||||
private import semmle.python.objects.ObjectInternal as ObjectInternal
|
||||
// have to alias since this module is also called PointsTo
|
||||
private import semmle.python.pointsto.PointsTo as RealPointsTo
|
||||
|
||||
/**
|
||||
* DONT USE!
|
||||
* Contains references to each predicate that use the above `ref` predicate.
|
||||
*/
|
||||
cached
|
||||
predicate backref() {
|
||||
1 = 1
|
||||
or
|
||||
PointsToBase::BaseFlow::scope_entry_value_transfer_from_earlier(_, _, _, _)
|
||||
or
|
||||
exists(TypeObject::Object a)
|
||||
or
|
||||
exists(TObject::TObject f)
|
||||
or
|
||||
exists(any(ObjectInternal::ObjectInternal o).toString())
|
||||
or
|
||||
RealPointsTo::AttributePointsTo::variableAttributePointsTo(_, _, _, _, _)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* The `dataflow` stage.
|
||||
*/
|
||||
@@ -138,14 +179,9 @@ module Stages {
|
||||
predicate ref() { 1 = 1 }
|
||||
|
||||
private import semmle.python.dataflow.new.internal.DataFlowPublic as DataFlowPublic
|
||||
private import semmle.python.dataflow.new.internal.DataFlowDispatch as DataFlowDispatch
|
||||
private import semmle.python.dataflow.new.internal.LocalSources as LocalSources
|
||||
private import semmle.python.internal.Awaited as Awaited
|
||||
private import semmle.python.pointsto.Base as PointsToBase
|
||||
private import semmle.python.types.Object as TypeObject
|
||||
private import semmle.python.objects.TObject as TObject
|
||||
private import semmle.python.Flow as Flow
|
||||
private import semmle.python.objects.ObjectInternal as ObjectInternal
|
||||
private import semmle.python.pointsto.PointsTo as PointsTo
|
||||
|
||||
/**
|
||||
* DONT USE!
|
||||
@@ -159,21 +195,13 @@ module Stages {
|
||||
or
|
||||
any(DataFlowPublic::Node node).hasLocationInfo(_, _, _, _, _)
|
||||
or
|
||||
DataFlowDispatch::resolveCall(_, _, _)
|
||||
or
|
||||
DataFlowDispatch::getCallArg(_, _, _, _, _)
|
||||
or
|
||||
any(LocalSources::LocalSourceNode n).flowsTo(_)
|
||||
or
|
||||
exists(Awaited::awaited(_))
|
||||
or
|
||||
PointsToBase::BaseFlow::scope_entry_value_transfer_from_earlier(_, _, _, _)
|
||||
or
|
||||
exists(TypeObject::Object a)
|
||||
or
|
||||
exists(TObject::TObject f)
|
||||
or
|
||||
exists(any(Flow::ControlFlowNode c).toString())
|
||||
or
|
||||
exists(any(ObjectInternal::ObjectInternal o).toString())
|
||||
or
|
||||
PointsTo::AttributePointsTo::variableAttributePointsTo(_, _, _, _, _)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -216,7 +216,7 @@ class BuiltinOpaqueObjectInternal extends ObjectInternal, TBuiltinOpaqueObject {
|
||||
override Builtin getBuiltin() { this = TBuiltinOpaqueObject(result) }
|
||||
|
||||
override string toString() {
|
||||
Stages::DataFlow::ref() and
|
||||
Stages::PointsTo::ref() and
|
||||
result = this.getBuiltin().getClass().getName() + " object"
|
||||
}
|
||||
|
||||
|
||||
@@ -318,7 +318,7 @@ module BaseFlow {
|
||||
predicate scope_entry_value_transfer_from_earlier(
|
||||
EssaVariable pred_var, Scope pred_scope, ScopeEntryDefinition succ_def, Scope succ_scope
|
||||
) {
|
||||
Stages::DataFlow::ref() and
|
||||
Stages::PointsTo::ref() and
|
||||
exists(SsaSourceVariable var |
|
||||
essa_var_scope(var, pred_scope, pred_var) and
|
||||
scope_entry_def_scope(var, succ_scope, succ_def)
|
||||
|
||||
@@ -2566,7 +2566,7 @@ module AttributePointsTo {
|
||||
predicate variableAttributePointsTo(
|
||||
EssaVariable var, Context context, string name, ObjectInternal value, CfgOrigin origin
|
||||
) {
|
||||
Stages::DataFlow::ref() and
|
||||
Stages::PointsTo::ref() and
|
||||
definitionAttributePointsTo(var.getDefinition(), context, name, value, origin)
|
||||
or
|
||||
exists(EssaVariable prev |
|
||||
|
||||
@@ -57,16 +57,43 @@ module CleartextLogging {
|
||||
/** A piece of data printed, considered as a flow sink. */
|
||||
class PrintedDataAsSink extends Sink {
|
||||
PrintedDataAsSink() {
|
||||
this = API::builtin("print").getACall().getArg(_)
|
||||
or
|
||||
// special handling of writing to `sys.stdout` and `sys.stderr`, which is
|
||||
// essentially the same as printing
|
||||
this =
|
||||
API::moduleImport("sys")
|
||||
.getMember(["stdout", "stderr"])
|
||||
.getMember("write")
|
||||
.getACall()
|
||||
.getArg(0)
|
||||
(
|
||||
this = API::builtin("print").getACall().getArg(_)
|
||||
or
|
||||
// special handling of writing to `sys.stdout` and `sys.stderr`, which is
|
||||
// essentially the same as printing
|
||||
this =
|
||||
API::moduleImport("sys")
|
||||
.getMember(["stdout", "stderr"])
|
||||
.getMember("write")
|
||||
.getACall()
|
||||
.getArg(0)
|
||||
) and
|
||||
// since some of the inner error handling implementation of the logging module is
|
||||
// ```py
|
||||
// sys.stderr.write('Message: %r\n'
|
||||
// 'Arguments: %s\n' % (record.msg,
|
||||
// record.args))
|
||||
// ```
|
||||
// any time we would report flow to such a logging sink, we can ALSO report
|
||||
// the flow to the `record.msg`/`record.args` sinks -- obviously we
|
||||
// don't want that.
|
||||
//
|
||||
// However, simply removing taint edges out of a sink is not a good enough solution,
|
||||
// since we would only flag one of the `logging.info` calls in the following example
|
||||
// due to use-use flow
|
||||
// ```py
|
||||
// logging.info(user_controlled)
|
||||
// logging.info(user_controlled)
|
||||
// ```
|
||||
//
|
||||
// The same approach is used in the command injection query.
|
||||
not exists(Module loggingInit |
|
||||
loggingInit.getName() = "logging.__init__" and
|
||||
this.getScope().getEnclosingModule() = loggingInit and
|
||||
// do allow this call if we're analyzing logging/__init__.py as part of CPython though
|
||||
not exists(loggingInit.getFile().getRelativePath())
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -50,7 +50,34 @@ module CleartextStorage {
|
||||
|
||||
/** The data written to a file, considered as a flow sink. */
|
||||
class FileWriteDataAsSink extends Sink {
|
||||
FileWriteDataAsSink() { this = any(FileSystemWriteAccess write).getADataNode() }
|
||||
FileWriteDataAsSink() {
|
||||
this = any(FileSystemWriteAccess write).getADataNode() and
|
||||
// since implementation of Path.write_bytes in pathlib.py is like
|
||||
// ```py
|
||||
// def write_bytes(self, data):
|
||||
// with self.open(mode='wb') as f:
|
||||
// return f.write(data)
|
||||
// ```
|
||||
// any time we would report flow to the `Path.write_bytes` sink, we can ALSO report
|
||||
// the flow from the `data` parameter to the `f.write` sink -- obviously we
|
||||
// don't want that.
|
||||
//
|
||||
// However, simply removing taint edges out of a sink is not a good enough solution,
|
||||
// since we would only flag one of the `p.write` calls in the following example
|
||||
// due to use-use flow
|
||||
// ```py
|
||||
// p.write(user_controlled)
|
||||
// p.write(user_controlled)
|
||||
// ```
|
||||
//
|
||||
// The same approach is used in the command injection query.
|
||||
not exists(Module pathlib |
|
||||
pathlib.getName() = "pathlib" and
|
||||
this.getScope().getEnclosingModule() = pathlib and
|
||||
// do allow this call if we're analyzing pathlib.py as part of CPython though
|
||||
not exists(pathlib.getFile().getRelativePath())
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/** The data written to a cookie on a HTTP response, considered as a flow sink. */
|
||||
|
||||
@@ -76,6 +76,9 @@ module CommandInjection {
|
||||
// `subprocess`. See:
|
||||
// https://github.com/python/cpython/blob/fa7ce080175f65d678a7d5756c94f82887fc9803/Lib/os.py#L974
|
||||
// https://github.com/python/cpython/blob/fa7ce080175f65d678a7d5756c94f82887fc9803/Lib/subprocess.py#L341
|
||||
//
|
||||
// The same approach is used in the path-injection, cleartext-storage, and
|
||||
// cleartext-logging queries.
|
||||
not this.getScope().getEnclosingModule().getName() in [
|
||||
"os", "subprocess", "platform", "popen2"
|
||||
]
|
||||
|
||||
@@ -58,7 +58,33 @@ module PathInjection {
|
||||
* A file system access, considered as a flow sink.
|
||||
*/
|
||||
class FileSystemAccessAsSink extends Sink {
|
||||
FileSystemAccessAsSink() { this = any(FileSystemAccess e).getAPathArgument() }
|
||||
FileSystemAccessAsSink() {
|
||||
this = any(FileSystemAccess e).getAPathArgument() and
|
||||
// since implementation of Path.open in pathlib.py is like
|
||||
// ```py
|
||||
// def open(self, ...):
|
||||
// return io.open(self, ...)
|
||||
// ```
|
||||
// any time we would report flow to the `path_obj.open` sink, we can ALSO report
|
||||
// the flow from the `self` parameter to the `io.open` sink -- obviously we
|
||||
// don't want that.
|
||||
//
|
||||
// However, simply removing taint edges out of a sink is not a good enough solution,
|
||||
// since we would only flag one of the `p.open` calls in the following example
|
||||
// due to use-use flow
|
||||
// ```py
|
||||
// p.open()
|
||||
// p.open()
|
||||
// ```
|
||||
//
|
||||
// The same approach is used in the command injection query.
|
||||
not exists(Module pathlib |
|
||||
pathlib.getName() = "pathlib" and
|
||||
this.getScope().getEnclosingModule() = pathlib and
|
||||
// do allow this call if we're analyzing pathlib.py as part of CPython though
|
||||
not exists(pathlib.getFile().getRelativePath())
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
private import semmle.python.frameworks.data.ModelsAsData
|
||||
|
||||
@@ -41,7 +41,32 @@ module StackTraceExposure {
|
||||
/**
|
||||
* A source of exception info, considered as a flow source.
|
||||
*/
|
||||
class ExceptionInfoAsSource extends Source instanceof ExceptionInfo { }
|
||||
class ExceptionInfoAsSource extends Source instanceof ExceptionInfo {
|
||||
ExceptionInfoAsSource() {
|
||||
// since `traceback.format_exc()` in Python 2 is internally implemented as
|
||||
// ```py
|
||||
// def format_exc(limit=None):
|
||||
// """Like print_exc() but return a string."""
|
||||
// try:
|
||||
// etype, value, tb = sys.exc_info()
|
||||
// return ''.join(format_exception(etype, value, tb, limit))
|
||||
// finally:
|
||||
// etype = value = tb = None
|
||||
// ```
|
||||
// any time we would report flow to such from a call to format_exc, we can ALSO report
|
||||
// the flow from the `sys.exc_info()` source -- obviously we don't want that.
|
||||
//
|
||||
//
|
||||
// To avoid this, we use the same approach as for sinks in the command injection
|
||||
// query (and others).
|
||||
not exists(Module traceback |
|
||||
traceback.getName() = "traceback" and
|
||||
this.getScope().getEnclosingModule() = traceback and
|
||||
// do allow this call if we're analyzing traceback.py as part of CPython though
|
||||
not exists(traceback.getFile().getRelativePath())
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* The body of a HTTP response that will be returned from a server, considered as a flow sink.
|
||||
|
||||
@@ -5,7 +5,7 @@ private import semmle.python.internal.CachedStages
|
||||
|
||||
cached
|
||||
private predicate is_an_object(@py_object obj) {
|
||||
Stages::DataFlow::ref() and
|
||||
Stages::PointsTo::ref() and
|
||||
/* CFG nodes for numeric literals, all of which have a @py_cobject for the value of that literal */
|
||||
obj instanceof ControlFlowNode and
|
||||
not obj.(ControlFlowNode).getNode() instanceof IntegerLiteral and
|
||||
@@ -78,7 +78,7 @@ class Object extends @py_object {
|
||||
predicate hasLocationInfo(
|
||||
string filepath, int startline, int startcolumn, int endline, int endcolumn
|
||||
) {
|
||||
Stages::DataFlow::ref() and
|
||||
Stages::PointsTo::ref() and
|
||||
this.hasOrigin() and
|
||||
this.getOrigin()
|
||||
.getLocation()
|
||||
@@ -98,7 +98,7 @@ class Object extends @py_object {
|
||||
/** Gets a textual representation of this element. */
|
||||
cached
|
||||
string toString() {
|
||||
Stages::DataFlow::ref() and
|
||||
Stages::PointsTo::ref() and
|
||||
not this = undefinedVariable() and
|
||||
not this = unknownValue() and
|
||||
exists(ClassObject type | type.asBuiltin() = this.asBuiltin().getClass() |
|
||||
|
||||
@@ -1,48 +1,36 @@
|
||||
/**
|
||||
* Definitions for reasoning about untrusted data used in APIs defined outside the
|
||||
* database.
|
||||
* user-written code.
|
||||
*/
|
||||
|
||||
import python
|
||||
private import python
|
||||
import semmle.python.dataflow.new.DataFlow
|
||||
import semmle.python.dataflow.new.TaintTracking
|
||||
import semmle.python.Concepts
|
||||
import semmle.python.dataflow.new.RemoteFlowSources
|
||||
private import semmle.python.dataflow.new.TaintTracking
|
||||
private import semmle.python.dataflow.new.RemoteFlowSources
|
||||
private import semmle.python.ApiGraphs
|
||||
private import semmle.python.dataflow.new.internal.DataFlowPrivate as DataFlowPrivate
|
||||
private import semmle.python.dataflow.new.internal.TaintTrackingPrivate as TaintTrackingPrivate
|
||||
private import semmle.python.types.Builtins
|
||||
private import semmle.python.objects.ObjectInternal
|
||||
|
||||
// IMPLEMENTATION NOTES:
|
||||
//
|
||||
// This query uses *both* the new data-flow library, and points-to. Why? To get this
|
||||
// finished quickly, so it can provide value for our field team and ourselves.
|
||||
//
|
||||
// In the long run, it should not need to use points-to for anything. Possibly this can
|
||||
// even be helpful in figuring out what we need from TypeTrackers and the new data-flow
|
||||
// library to be fully operational.
|
||||
//
|
||||
// At least it will allow us to provide a baseline comparison against a solution that
|
||||
// doesn't use points-to at all
|
||||
//
|
||||
// There is a few dirty things we do here:
|
||||
// 1. DataFlowPrivate: since `DataFlowCall` and `DataFlowCallable` are not exposed
|
||||
// publicly, but we really want access to them.
|
||||
// 2. points-to: we kinda need to do this since this is what powers `DataFlowCall` and
|
||||
// `DataFlowCallable`
|
||||
// 3. ObjectInternal: to provide better names for built-in functions and methods. If we
|
||||
// really wanted to polish our points-to implementation, we could move this
|
||||
// functionality into `BuiltinFunctionValue` and `BuiltinMethodValue`, but will
|
||||
// probably require some more work: for this query, it's totally ok to use
|
||||
// `builtins.open` for the code `open(f)`, but well, it requires a bit of thinking to
|
||||
// figure out if that is desirable in general. I simply skipped a corner here!
|
||||
// 4. TaintTrackingPrivate: Nothing else gives us access to `defaultAdditionalTaintStep` :(
|
||||
/**
|
||||
* A callable that is considered a "safe" external API from a security perspective.
|
||||
* An external API that is considered "safe" from a security perspective.
|
||||
*/
|
||||
class SafeExternalApi extends Unit {
|
||||
/** Gets a callable that is considered a "safe" external API from a security perspective. */
|
||||
abstract DataFlowPrivate::DataFlowCallable getSafeCallable();
|
||||
/**
|
||||
* Gets a call that is considered "safe" from a security perspective. You can use API
|
||||
* graphs to find calls to functions you know are safe.
|
||||
*
|
||||
* Which works even when the external library isn't extracted.
|
||||
*/
|
||||
abstract DataFlow::CallCfgNode getSafeCall();
|
||||
|
||||
/**
|
||||
* Gets a callable that is considered a "safe" external API from a security
|
||||
* perspective.
|
||||
*
|
||||
* You probably want to define this as `none()` and use `getSafeCall` instead, since
|
||||
* that can handle the external library not being extracted.
|
||||
*/
|
||||
DataFlowPrivate::DataFlowCallable getSafeCallable() { none() }
|
||||
}
|
||||
|
||||
/** DEPRECATED: Alias for SafeExternalApi */
|
||||
@@ -50,42 +38,127 @@ deprecated class SafeExternalAPI = SafeExternalApi;
|
||||
|
||||
/** The default set of "safe" external APIs. */
|
||||
private class DefaultSafeExternalApi extends SafeExternalApi {
|
||||
override DataFlowPrivate::DataFlowCallable getSafeCallable() {
|
||||
exists(CallableValue cv | cv = result.getCallableValue() |
|
||||
cv = Value::named(["len", "isinstance", "getattr", "hasattr"])
|
||||
or
|
||||
exists(ClassValue cls, string attr |
|
||||
cls = Value::named("dict") and attr in ["__getitem__", "__setitem__"]
|
||||
|
|
||||
cls.lookup(attr) = cv
|
||||
)
|
||||
override DataFlow::CallCfgNode getSafeCall() {
|
||||
result =
|
||||
API::builtin([
|
||||
"len", "enumerate", "isinstance", "getattr", "hasattr", "bool", "float", "int", "repr",
|
||||
"str", "type"
|
||||
]).getACall()
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a human readable representation of `node`.
|
||||
*
|
||||
* Note that this is only defined for API nodes that are allowed as external APIs,
|
||||
* so `None.json.dumps` will for example not be allowed.
|
||||
*/
|
||||
string apiNodeToStringRepr(API::Node node) {
|
||||
node = API::builtin(result)
|
||||
or
|
||||
node = API::moduleImport(result)
|
||||
or
|
||||
exists(API::Node base, string basename |
|
||||
base.getDepth() < node.getDepth() and
|
||||
basename = apiNodeToStringRepr(base) and
|
||||
not base = API::builtin(["None", "True", "False"])
|
||||
|
|
||||
exists(string m | node = base.getMember(m) | result = basename + "." + m)
|
||||
or
|
||||
node = base.getReturn() and
|
||||
result = basename + "()" and
|
||||
not base.getACall() = any(SafeExternalApi safe).getSafeCall()
|
||||
or
|
||||
node = base.getAwaited() and
|
||||
result = basename
|
||||
)
|
||||
}
|
||||
|
||||
predicate resolvedCall(CallNode call) {
|
||||
DataFlowPrivate::resolveCall(call, _, _) or
|
||||
DataFlowPrivate::resolveClassCall(call, _)
|
||||
}
|
||||
|
||||
newtype TInterestingExternalApiCall =
|
||||
TUnresolvedCall(DataFlow::CallCfgNode call) {
|
||||
exists(call.getLocation().getFile().getRelativePath()) and
|
||||
not resolvedCall(call.getNode()) and
|
||||
not call = any(SafeExternalApi safe).getSafeCall()
|
||||
} or
|
||||
TResolvedCall(DataFlowPrivate::DataFlowCall call) {
|
||||
exists(call.getLocation().getFile().getRelativePath()) and
|
||||
exists(call.getCallable()) and
|
||||
not call.getCallable() = any(SafeExternalApi safe).getSafeCallable() and
|
||||
// ignore calls inside codebase, and ignore calls that are marked as safe. This is
|
||||
// only needed as long as we extract dependencies. When we stop doing that, all
|
||||
// targets of resolved calls will be from user-written code.
|
||||
not exists(call.getCallable().getLocation().getFile().getRelativePath()) and
|
||||
not exists(DataFlow::CallCfgNode callCfgNode | callCfgNode.getNode() = call.getNode() |
|
||||
any(SafeExternalApi safe).getSafeCall() = callCfgNode
|
||||
)
|
||||
}
|
||||
|
||||
abstract class InterestingExternalApiCall extends TInterestingExternalApiCall {
|
||||
/** Gets the argument at position `apos`, if any */
|
||||
abstract DataFlow::Node getArgument(DataFlowPrivate::ArgumentPosition apos);
|
||||
|
||||
/** Gets a textual representation of this element. */
|
||||
abstract string toString();
|
||||
|
||||
/**
|
||||
* Gets a human-readable name for the external API.
|
||||
*/
|
||||
abstract string getApiName();
|
||||
}
|
||||
|
||||
class UnresolvedCall extends InterestingExternalApiCall, TUnresolvedCall {
|
||||
DataFlow::CallCfgNode call;
|
||||
|
||||
UnresolvedCall() { this = TUnresolvedCall(call) }
|
||||
|
||||
override DataFlow::Node getArgument(DataFlowPrivate::ArgumentPosition apos) {
|
||||
exists(int i | apos.isPositional(i) | result = call.getArg(i))
|
||||
or
|
||||
exists(string name | apos.isKeyword(name) | result = call.getArgByName(name))
|
||||
}
|
||||
|
||||
override string toString() {
|
||||
result = "ExternalAPI:UnresolvedCall: " + call.getNode().getNode().toString()
|
||||
}
|
||||
|
||||
override string getApiName() {
|
||||
exists(API::Node apiNode |
|
||||
result = apiNodeToStringRepr(apiNode) and
|
||||
apiNode.getACall() = call
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
class ResolvedCall extends InterestingExternalApiCall, TResolvedCall {
|
||||
DataFlowPrivate::DataFlowCall dfCall;
|
||||
|
||||
ResolvedCall() { this = TResolvedCall(dfCall) }
|
||||
|
||||
override DataFlow::Node getArgument(DataFlowPrivate::ArgumentPosition apos) {
|
||||
result = dfCall.getArgument(apos)
|
||||
}
|
||||
|
||||
override string toString() {
|
||||
result = "ExternalAPI:ResolvedCall: " + dfCall.getNode().getNode().toString()
|
||||
}
|
||||
|
||||
override string getApiName() {
|
||||
exists(DataFlow::CallCfgNode call, API::Node apiNode | dfCall.getNode() = call.getNode() |
|
||||
result = apiNodeToStringRepr(apiNode) and
|
||||
apiNode.getACall() = call
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/** A node representing data being passed to an external API through a call. */
|
||||
class ExternalApiDataNode extends DataFlow::Node {
|
||||
DataFlowPrivate::DataFlowCallable callable;
|
||||
int i;
|
||||
|
||||
ExternalApiDataNode() {
|
||||
exists(DataFlowPrivate::DataFlowCall call |
|
||||
exists(call.getLocation().getFile().getRelativePath())
|
||||
|
|
||||
callable = call.getCallable() and
|
||||
// TODO: this ignores some complexity of keyword arguments (especially keyword-only args)
|
||||
this = call.getArg(i)
|
||||
) and
|
||||
not any(SafeExternalApi safe).getSafeCallable() = callable and
|
||||
exists(Value cv | cv = callable.getCallableValue() |
|
||||
cv.isAbsent()
|
||||
or
|
||||
cv.isBuiltin()
|
||||
or
|
||||
cv.(CallableValue).getScope().getLocation().getFile().inStdlib()
|
||||
or
|
||||
not exists(cv.(CallableValue).getScope().getLocation().getFile().getRelativePath())
|
||||
) and
|
||||
exists(InterestingExternalApiCall call | this = call.getArgument(_)) and
|
||||
// Not already modeled as a taint step
|
||||
not TaintTrackingPrivate::defaultAdditionalTaintStep(this, _) and
|
||||
// for `list.append(x)`, we have a additional taint step from x -> [post] list.
|
||||
@@ -95,12 +168,6 @@ class ExternalApiDataNode extends DataFlow::Node {
|
||||
TaintTrackingPrivate::defaultAdditionalTaintStep(_, post)
|
||||
)
|
||||
}
|
||||
|
||||
/** Gets the index for the parameter that will receive this untrusted data */
|
||||
int getIndex() { result = i }
|
||||
|
||||
/** Gets the callable to which this argument is passed. */
|
||||
DataFlowPrivate::DataFlowCallable getCallable() { result = callable }
|
||||
}
|
||||
|
||||
/** DEPRECATED: Alias for ExternalApiDataNode */
|
||||
@@ -133,19 +200,26 @@ deprecated class UntrustedExternalAPIDataNode = UntrustedExternalApiDataNode;
|
||||
|
||||
/** An external API which is used with untrusted data. */
|
||||
private newtype TExternalApi =
|
||||
/** An untrusted API method `m` where untrusted data is passed at `index`. */
|
||||
TExternalApiParameter(DataFlowPrivate::DataFlowCallable callable, int index) {
|
||||
exists(UntrustedExternalApiDataNode n |
|
||||
callable = n.getCallable() and
|
||||
index = n.getIndex()
|
||||
MkExternalApi(string repr, DataFlowPrivate::ArgumentPosition apos) {
|
||||
exists(UntrustedExternalApiDataNode ex, InterestingExternalApiCall call |
|
||||
ex = call.getArgument(apos) and
|
||||
repr = call.getApiName()
|
||||
)
|
||||
}
|
||||
|
||||
/** An external API which is used with untrusted data. */
|
||||
class ExternalApiUsedWithUntrustedData extends TExternalApi {
|
||||
/** A argument of an external API which is used with untrusted data. */
|
||||
class ExternalApiUsedWithUntrustedData extends MkExternalApi {
|
||||
string repr;
|
||||
DataFlowPrivate::ArgumentPosition apos;
|
||||
|
||||
ExternalApiUsedWithUntrustedData() { this = MkExternalApi(repr, apos) }
|
||||
|
||||
/** Gets a possibly untrusted use of this external API. */
|
||||
UntrustedExternalApiDataNode getUntrustedDataNode() {
|
||||
this = TExternalApiParameter(result.getCallable(), result.getIndex())
|
||||
exists(InterestingExternalApiCall call |
|
||||
result = call.getArgument(apos) and
|
||||
call.getApiName() = repr
|
||||
)
|
||||
}
|
||||
|
||||
/** Gets the number of untrusted sources used with this external API. */
|
||||
@@ -154,63 +228,8 @@ class ExternalApiUsedWithUntrustedData extends TExternalApi {
|
||||
}
|
||||
|
||||
/** Gets a textual representation of this element. */
|
||||
string toString() {
|
||||
exists(
|
||||
DataFlowPrivate::DataFlowCallable callable, int index, string callableString,
|
||||
string indexString
|
||||
|
|
||||
this = TExternalApiParameter(callable, index) and
|
||||
indexString = "param " + index and
|
||||
exists(CallableValue cv | cv = callable.getCallableValue() |
|
||||
callableString =
|
||||
cv.getScope().getEnclosingModule().getName() + "." + cv.getScope().getQualifiedName()
|
||||
or
|
||||
not exists(cv.getScope()) and
|
||||
(
|
||||
cv instanceof BuiltinFunctionValue and
|
||||
callableString = pretty_builtin_function_value(cv)
|
||||
or
|
||||
cv instanceof BuiltinMethodValue and
|
||||
callableString = pretty_builtin_method_value(cv)
|
||||
or
|
||||
not cv instanceof BuiltinFunctionValue and
|
||||
not cv instanceof BuiltinMethodValue and
|
||||
callableString = cv.toString()
|
||||
)
|
||||
) and
|
||||
result = callableString + " [" + indexString + "]"
|
||||
)
|
||||
}
|
||||
string toString() { result = repr + " [" + apos + "]" }
|
||||
}
|
||||
|
||||
/** DEPRECATED: Alias for ExternalApiUsedWithUntrustedData */
|
||||
deprecated class ExternalAPIUsedWithUntrustedData = ExternalApiUsedWithUntrustedData;
|
||||
|
||||
/** Gets the fully qualified name for the `BuiltinFunctionValue` bfv. */
|
||||
private string pretty_builtin_function_value(BuiltinFunctionValue bfv) {
|
||||
exists(Builtin b | b = bfv.(BuiltinFunctionObjectInternal).getBuiltin() |
|
||||
result = prefix_with_module_if_found(b)
|
||||
)
|
||||
}
|
||||
|
||||
/** Gets the fully qualified name for the `BuiltinMethodValue` bmv. */
|
||||
private string pretty_builtin_method_value(BuiltinMethodValue bmv) {
|
||||
exists(Builtin b | b = bmv.(BuiltinMethodObjectInternal).getBuiltin() |
|
||||
exists(Builtin cls | cls.isClass() and cls.getMember(b.getName()) = b |
|
||||
result = prefix_with_module_if_found(cls) + "." + b.getName()
|
||||
)
|
||||
or
|
||||
not exists(Builtin cls | cls.isClass() and cls.getMember(b.getName()) = b) and
|
||||
result = b.getName()
|
||||
)
|
||||
}
|
||||
|
||||
/** Helper predicate that tries to adds module qualifier to `b`. Will succeed even if module not found. */
|
||||
private string prefix_with_module_if_found(Builtin b) {
|
||||
exists(Builtin mod | mod.isModule() and mod.getMember(b.getName()) = b |
|
||||
result = mod.getName() + "." + b.getName()
|
||||
)
|
||||
or
|
||||
not exists(Builtin mod | mod.isModule() and mod.getMember(b.getName()) = b) and
|
||||
result = b.getName()
|
||||
}
|
||||
|
||||
@@ -11,11 +11,9 @@ relevant for security analysis of this application.</p>
|
||||
|
||||
<p>An external API is defined as a call to a method that is not defined in the source
|
||||
code, and is not modeled as a taint step in the default taint library. External APIs may
|
||||
be from the Python standard library or dependencies. The query will report the fully qualified name,
|
||||
along with <code>[param x]</code>, where <code>x</code> indicates the position of
|
||||
the parameter receiving the untrusted data. Note that for methods and
|
||||
<code>classmethod</code>s, parameter 0 represents the class instance or class itself
|
||||
respectively.</p>
|
||||
be from the Python standard library or dependencies. The query will report the fully
|
||||
qualified name, along with <code>[position index]</code> or <code>[keyword name]</code>,
|
||||
to indicate the argument passing the untrusted data.</p>
|
||||
|
||||
<p>Note that an excepted sink might not be included in the results, if it also defines a
|
||||
taint step. This is the case for <code>pickle.loads</code> which is a sink for the
|
||||
@@ -24,8 +22,6 @@ Unsafe Deserialization query, but is also a taint step for other queries.</p>
|
||||
<p>Note: Compared to the Java version of this query, we currently do not give special
|
||||
care to methods that are overridden in the source code.</p>
|
||||
|
||||
<p>Note: Currently this query will only report results for external packages that are extracted.</p>
|
||||
|
||||
</overview>
|
||||
<recommendation>
|
||||
|
||||
|
||||
@@ -11,11 +11,9 @@ be modeled as either taint steps, or sinks for specific problems.</p>
|
||||
|
||||
<p>An external API is defined as a call to a method that is not defined in the source
|
||||
code, and is not modeled as a taint step in the default taint library. External APIs may
|
||||
be from the Python standard library or dependencies. The query will report the fully qualified name,
|
||||
along with <code>[param x]</code>, where <code>x</code> indicates the position of
|
||||
the parameter receiving the untrusted data. Note that for methods and
|
||||
<code>classmethod</code>s, parameter 0 represents the class instance or class itself
|
||||
respectively.</p>
|
||||
be from the Python standard library or dependencies. The query will report the fully
|
||||
qualified name, along with <code>[position index]</code> or <code>[keyword name]</code>,
|
||||
to indicate the argument passing the untrusted data.</p>
|
||||
|
||||
<p>Note that an excepted sink might not be included in the results, if it also defines a
|
||||
taint step. This is the case for <code>pickle.loads</code> which is a sink for the
|
||||
@@ -24,8 +22,6 @@ Unsafe Deserialization query, but is also a taint step for other queries.</p>
|
||||
<p>Note: Compared to the Java version of this query, we currently do not give special
|
||||
care to methods that are overridden in the source code.</p>
|
||||
|
||||
<p>Note: Currently this query will only report results for external packages that are extracted.</p>
|
||||
|
||||
</overview>
|
||||
<recommendation>
|
||||
|
||||
|
||||
@@ -59,12 +59,11 @@ module InsecureRandomness {
|
||||
*/
|
||||
class RandomFnSink extends Sink {
|
||||
RandomFnSink() {
|
||||
exists(DataFlowCallable randomFn |
|
||||
randomFn
|
||||
.getName()
|
||||
exists(Function func |
|
||||
func.getName()
|
||||
.regexpMatch("(?i).*(gen(erate)?|make|mk|create).*(nonce|salt|pepper|Password).*")
|
||||
|
|
||||
this.getEnclosingCallable() = randomFn
|
||||
this.asExpr().getScope() = func
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
/**
|
||||
* @name Call graph
|
||||
* @description An edge in the points-to call graph.
|
||||
* @description An edge in the call graph.
|
||||
* @kind problem
|
||||
* @problem.severity recommendation
|
||||
* @id py/meta/points-to-call-graph
|
||||
* @id py/meta/call-graph
|
||||
* @tags meta
|
||||
* @precision very-low
|
||||
*/
|
||||
@@ -12,9 +12,9 @@ import python
|
||||
import semmle.python.dataflow.new.internal.DataFlowPrivate
|
||||
import meta.MetaMetrics
|
||||
|
||||
from DataFlowCall c, DataFlowCallableValue f
|
||||
from DataFlowCall call, DataFlowCallable target
|
||||
where
|
||||
c.getCallable() = f and
|
||||
not c.getLocation().getFile() instanceof IgnoredFile and
|
||||
not f.getScope().getLocation().getFile() instanceof IgnoredFile
|
||||
select c, "Call to $@", f.getScope(), f.toString()
|
||||
target = viableCallable(call) and
|
||||
not call.getLocation().getFile() instanceof IgnoredFile and
|
||||
not target.getScope().getLocation().getFile() instanceof IgnoredFile
|
||||
select call, "Call to $@", target.getScope(), target.toString()
|
||||
|
||||
@@ -1,16 +1,55 @@
|
||||
/**
|
||||
* Provides predicates for measuring the quality of the call graph, that is,
|
||||
* the number of calls that could be resolved to a callee.
|
||||
* the number of calls that could be resolved to a target.
|
||||
*/
|
||||
|
||||
import python
|
||||
import meta.MetaMetrics
|
||||
|
||||
newtype TTarget =
|
||||
TFunction(Function func) or
|
||||
TClass(Class cls)
|
||||
|
||||
class Target extends TTarget {
|
||||
/** Gets a textual representation of this element. */
|
||||
abstract string toString();
|
||||
|
||||
/** Gets the location of this dataflow call. */
|
||||
abstract Location getLocation();
|
||||
|
||||
/** Whether this target is relevant. */
|
||||
predicate isRelevant() { exists(this.getLocation().getFile().getRelativePath()) }
|
||||
}
|
||||
|
||||
class TargetFunction extends Target, TFunction {
|
||||
Function func;
|
||||
|
||||
TargetFunction() { this = TFunction(func) }
|
||||
|
||||
override string toString() { result = func.toString() }
|
||||
|
||||
override Location getLocation() { result = func.getLocation() }
|
||||
|
||||
Function getFunction() { result = func }
|
||||
}
|
||||
|
||||
class TargetClass extends Target, TClass {
|
||||
Class cls;
|
||||
|
||||
TargetClass() { this = TClass(cls) }
|
||||
|
||||
override string toString() { result = cls.toString() }
|
||||
|
||||
override Location getLocation() { result = cls.getLocation() }
|
||||
|
||||
Class getClass() { result = cls }
|
||||
}
|
||||
|
||||
/**
|
||||
* A call that is (possibly) relevant for analysis quality.
|
||||
* See `IgnoredFile` for details on what is excluded.
|
||||
*/
|
||||
class RelevantCall extends Call {
|
||||
class RelevantCall extends CallNode {
|
||||
RelevantCall() { not this.getLocation().getFile() instanceof IgnoredFile }
|
||||
}
|
||||
|
||||
@@ -18,12 +57,16 @@ class RelevantCall extends Call {
|
||||
module PointsToBasedCallGraph {
|
||||
/** A call that can be resolved by points-to. */
|
||||
class ResolvableCall extends RelevantCall {
|
||||
Value callee;
|
||||
Value targetValue;
|
||||
|
||||
ResolvableCall() { callee.getACall() = this.getAFlowNode() }
|
||||
ResolvableCall() { targetValue.getACall() = this }
|
||||
|
||||
/** Gets a resolved callee of this call. */
|
||||
Value getCallee() { result = callee }
|
||||
/** Gets a resolved target of this call. */
|
||||
Target getTarget() {
|
||||
result.(TargetFunction).getFunction() = targetValue.(CallableValue).getScope()
|
||||
or
|
||||
result.(TargetClass).getClass() = targetValue.(ClassValue).getScope()
|
||||
}
|
||||
}
|
||||
|
||||
/** A call that cannot be resolved by points-to. */
|
||||
@@ -32,34 +75,79 @@ module PointsToBasedCallGraph {
|
||||
}
|
||||
|
||||
/**
|
||||
* A call that can be resolved by points-to, where the resolved callee is relevant.
|
||||
* Relevant callees include:
|
||||
* - builtins
|
||||
* - standard library
|
||||
* A call that can be resolved by points-to, where the resolved target is relevant.
|
||||
* Relevant targets include:
|
||||
* - source code of the project
|
||||
*/
|
||||
class ResolvableCallRelevantCallee extends ResolvableCall {
|
||||
ResolvableCallRelevantCallee() {
|
||||
callee.isBuiltin()
|
||||
or
|
||||
exists(File file |
|
||||
file = callee.(CallableValue).getScope().getLocation().getFile()
|
||||
or
|
||||
file = callee.(ClassValue).getScope().getLocation().getFile()
|
||||
|
|
||||
file.inStdlib()
|
||||
or
|
||||
// part of the source code of the project
|
||||
exists(file.getRelativePath())
|
||||
class ResolvableCallRelevantTarget extends ResolvableCall {
|
||||
ResolvableCallRelevantTarget() {
|
||||
exists(Target target | target = this.getTarget() |
|
||||
exists(target.getLocation().getFile().getRelativePath())
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A call that can be resolved by points-to, where the resolved callee is not considered relevant.
|
||||
* See `ResolvableCallRelevantCallee` for the definition of relevance.
|
||||
* A call that can be resolved by points-to, where the resolved target is not considered relevant.
|
||||
* See `ResolvableCallRelevantTarget` for the definition of relevance.
|
||||
*/
|
||||
class ResolvableCallIrrelevantCallee extends ResolvableCall {
|
||||
ResolvableCallIrrelevantCallee() { not this instanceof ResolvableCallRelevantCallee }
|
||||
class ResolvableCallIrrelevantTarget extends ResolvableCall {
|
||||
ResolvableCallIrrelevantTarget() { not this instanceof ResolvableCallRelevantTarget }
|
||||
}
|
||||
}
|
||||
|
||||
/** Provides classes for call-graph resolution by using type-tracking. */
|
||||
module TypeTrackingBasedCallGraph {
|
||||
private import semmle.python.dataflow.new.internal.DataFlowDispatch as TT
|
||||
|
||||
/** A call that can be resolved by type-tracking. */
|
||||
class ResolvableCall extends RelevantCall {
|
||||
ResolvableCall() {
|
||||
exists(TT::TNormalCall(this, _, _))
|
||||
or
|
||||
TT::resolveClassCall(this, _)
|
||||
}
|
||||
|
||||
/** Gets a resolved target of this call. */
|
||||
Target getTarget() {
|
||||
exists(TT::DataFlowCall call, TT::CallType ct, Function targetFunc |
|
||||
call = TT::TNormalCall(this, targetFunc, ct) and
|
||||
not ct instanceof TT::CallTypeClass and
|
||||
targetFunc = result.(TargetFunction).getFunction()
|
||||
)
|
||||
or
|
||||
// a TT::TNormalCall only exists when the call can be resolved to a function.
|
||||
// Since points-to just says the call goes directly to the class itself, and
|
||||
// type-tracking based wants to resolve this to the constructor, which might not
|
||||
// exist. So to do a proper comparison, we don't require the call to be resolve to
|
||||
// a specific function.
|
||||
TT::resolveClassCall(this, result.(TargetClass).getClass())
|
||||
}
|
||||
}
|
||||
|
||||
/** A call that cannot be resolved by type-tracking. */
|
||||
class UnresolvableCall extends RelevantCall {
|
||||
UnresolvableCall() { not this instanceof ResolvableCall }
|
||||
}
|
||||
|
||||
/**
|
||||
* A call that can be resolved by type-tracking, where the resolved callee is relevant.
|
||||
* Relevant targets include:
|
||||
* - source code of the project
|
||||
*/
|
||||
class ResolvableCallRelevantTarget extends ResolvableCall {
|
||||
ResolvableCallRelevantTarget() {
|
||||
exists(Target target | target = this.getTarget() |
|
||||
exists(target.getLocation().getFile().getRelativePath())
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A call that can be resolved by type-tracking, where the resolved target is not considered relevant.
|
||||
* See `ResolvableCallRelevantTarget` for the definition of relevance.
|
||||
*/
|
||||
class ResolvableCallIrrelevantTarget extends ResolvableCall {
|
||||
ResolvableCallIrrelevantTarget() { not this instanceof ResolvableCallRelevantTarget }
|
||||
}
|
||||
}
|
||||
|
||||
@@ -11,4 +11,4 @@
|
||||
import python
|
||||
import CallGraphQuality
|
||||
|
||||
select projectRoot(), count(PointsToBasedCallGraph::ResolvableCallRelevantCallee call)
|
||||
select projectRoot(), count(PointsToBasedCallGraph::ResolvableCallRelevantTarget call)
|
||||
|
||||
17
python/ql/src/meta/analysis-quality/TTCallGraph.ql
Normal file
17
python/ql/src/meta/analysis-quality/TTCallGraph.ql
Normal file
@@ -0,0 +1,17 @@
|
||||
/**
|
||||
* @name New call graph edge from using type-tracking instead of points-to
|
||||
* @kind problem
|
||||
* @problem.severity recommendation
|
||||
* @id py/meta/type-tracking-call-graph
|
||||
* @tags meta
|
||||
* @precision very-low
|
||||
*/
|
||||
|
||||
import python
|
||||
import CallGraphQuality
|
||||
|
||||
from CallNode call, Target target
|
||||
where
|
||||
target.isRelevant() and
|
||||
call.(TypeTrackingBasedCallGraph::ResolvableCall).getTarget() = target
|
||||
select call, "$@ to $@", call, "Call", target, target.toString()
|
||||
18
python/ql/src/meta/analysis-quality/TTCallGraphMissing.ql
Normal file
18
python/ql/src/meta/analysis-quality/TTCallGraphMissing.ql
Normal file
@@ -0,0 +1,18 @@
|
||||
/**
|
||||
* @name Missing call graph edge from using type-tracking instead of points-to
|
||||
* @kind problem
|
||||
* @problem.severity recommendation
|
||||
* @id py/meta/call-graph-missing
|
||||
* @tags meta
|
||||
* @precision very-low
|
||||
*/
|
||||
|
||||
import python
|
||||
import CallGraphQuality
|
||||
|
||||
from CallNode call, Target target
|
||||
where
|
||||
target.isRelevant() and
|
||||
call.(PointsToBasedCallGraph::ResolvableCall).getTarget() = target and
|
||||
not call.(TypeTrackingBasedCallGraph::ResolvableCall).getTarget() = target
|
||||
select call, "MISSING: $@ to $@", call, "Call", target, target.toString()
|
||||
18
python/ql/src/meta/analysis-quality/TTCallGraphNew.ql
Normal file
18
python/ql/src/meta/analysis-quality/TTCallGraphNew.ql
Normal file
@@ -0,0 +1,18 @@
|
||||
/**
|
||||
* @name New call graph edge from using type-tracking instead of points-to
|
||||
* @kind problem
|
||||
* @problem.severity recommendation
|
||||
* @id py/meta/call-graph-new
|
||||
* @tags meta
|
||||
* @precision very-low
|
||||
*/
|
||||
|
||||
import python
|
||||
import CallGraphQuality
|
||||
|
||||
from CallNode call, Target target
|
||||
where
|
||||
target.isRelevant() and
|
||||
not call.(PointsToBasedCallGraph::ResolvableCall).getTarget() = target and
|
||||
call.(TypeTrackingBasedCallGraph::ResolvableCall).getTarget() = target
|
||||
select call, "NEW: $@ to $@", call, "Call", target, target.toString()
|
||||
@@ -0,0 +1,19 @@
|
||||
/**
|
||||
* @name New call graph edge from using type-tracking instead of points-to, that is ambiguous
|
||||
* @kind problem
|
||||
* @problem.severity recommendation
|
||||
* @id py/meta/call-graph-new-ambiguous
|
||||
* @tags meta
|
||||
* @precision very-low
|
||||
*/
|
||||
|
||||
import python
|
||||
import CallGraphQuality
|
||||
|
||||
from CallNode call, Target target
|
||||
where
|
||||
target.isRelevant() and
|
||||
not call.(PointsToBasedCallGraph::ResolvableCall).getTarget() = target and
|
||||
call.(TypeTrackingBasedCallGraph::ResolvableCall).getTarget() = target and
|
||||
1 < count(call.(TypeTrackingBasedCallGraph::ResolvableCall).getTarget())
|
||||
select call, "NEW: $@ to $@", call, "Call", target, target.toString()
|
||||
35
python/ql/src/meta/analysis-quality/TTCallGraphOverview.ql
Normal file
35
python/ql/src/meta/analysis-quality/TTCallGraphOverview.ql
Normal file
@@ -0,0 +1,35 @@
|
||||
/**
|
||||
* @name Call graph edge overview from using type-tracking instead of points-to
|
||||
* @id py/meta/call-graph-overview
|
||||
* @precision very-low
|
||||
*/
|
||||
|
||||
import python
|
||||
import CallGraphQuality
|
||||
|
||||
from string tag, int c
|
||||
where
|
||||
tag = "SHARED" and
|
||||
c =
|
||||
count(CallNode call, Target target |
|
||||
target.isRelevant() and
|
||||
call.(PointsToBasedCallGraph::ResolvableCall).getTarget() = target and
|
||||
call.(TypeTrackingBasedCallGraph::ResolvableCall).getTarget() = target
|
||||
)
|
||||
or
|
||||
tag = "NEW" and
|
||||
c =
|
||||
count(CallNode call, Target target |
|
||||
target.isRelevant() and
|
||||
not call.(PointsToBasedCallGraph::ResolvableCall).getTarget() = target and
|
||||
call.(TypeTrackingBasedCallGraph::ResolvableCall).getTarget() = target
|
||||
)
|
||||
or
|
||||
tag = "MISSING" and
|
||||
c =
|
||||
count(CallNode call, Target target |
|
||||
target.isRelevant() and
|
||||
call.(PointsToBasedCallGraph::ResolvableCall).getTarget() = target and
|
||||
not call.(TypeTrackingBasedCallGraph::ResolvableCall).getTarget() = target
|
||||
)
|
||||
select tag, c
|
||||
18
python/ql/src/meta/analysis-quality/TTCallGraphShared.ql
Normal file
18
python/ql/src/meta/analysis-quality/TTCallGraphShared.ql
Normal file
@@ -0,0 +1,18 @@
|
||||
/**
|
||||
* @name Shared call graph edge from using type-tracking instead of points-to
|
||||
* @kind problem
|
||||
* @problem.severity recommendation
|
||||
* @id py/meta/call-graph-shared
|
||||
* @tags meta
|
||||
* @precision very-low
|
||||
*/
|
||||
|
||||
import python
|
||||
import CallGraphQuality
|
||||
|
||||
from CallNode call, Target target
|
||||
where
|
||||
target.isRelevant() and
|
||||
call.(PointsToBasedCallGraph::ResolvableCall).getTarget() = target and
|
||||
call.(TypeTrackingBasedCallGraph::ResolvableCall).getTarget() = target
|
||||
select call, "SHARED: $@ to $@", call, "Call", target, target.toString()
|
||||
@@ -0,0 +1,47 @@
|
||||
import python
|
||||
import semmle.python.dataflow.new.DataFlow::DataFlow
|
||||
import semmle.python.dataflow.new.internal.DataFlowPrivate
|
||||
import semmle.python.dataflow.new.internal.DataFlowImplConsistency::Consistency
|
||||
|
||||
// TODO: this should be promoted to be a REAL consistency query by being placed in
|
||||
// `python/ql/consistency-queries`. For for now it resides here.
|
||||
private class MyConsistencyConfiguration extends ConsistencyConfiguration {
|
||||
override predicate argHasPostUpdateExclude(ArgumentNode n) {
|
||||
exists(ArgumentPosition apos | n.argumentOf(_, apos) and apos.isStarArgs(_))
|
||||
or
|
||||
exists(ArgumentPosition apos | n.argumentOf(_, apos) and apos.isDictSplat())
|
||||
}
|
||||
|
||||
override predicate reverseReadExclude(Node n) {
|
||||
// since `self`/`cls` parameters can be marked as implicit argument to `super()`,
|
||||
// they will have PostUpdateNodes. We have a read-step from the synthetic `**kwargs`
|
||||
// parameter, but dataflow-consistency queries should _not_ complain about there not
|
||||
// being a post-update node for the synthetic `**kwargs` parameter.
|
||||
n instanceof SynthDictSplatParameterNode
|
||||
}
|
||||
|
||||
override predicate uniqueParameterNodeAtPositionExclude(
|
||||
DataFlowCallable c, ParameterPosition pos, Node p
|
||||
) {
|
||||
// TODO: This can be removed once we solve the overlap of dictionary splat parameters
|
||||
c.getParameter(pos) = p and
|
||||
pos.isDictSplat() and
|
||||
not exists(p.getLocation().getFile().getRelativePath())
|
||||
}
|
||||
|
||||
override predicate uniqueParameterNodePositionExclude(
|
||||
DataFlowCallable c, ParameterPosition pos, Node p
|
||||
) {
|
||||
// For normal parameters that can both be passed as positional arguments or keyword
|
||||
// arguments, we currently have parameter positions for both cases..
|
||||
//
|
||||
// TODO: Figure out how bad breaking this consistency check is
|
||||
exists(Function func, Parameter param |
|
||||
c.getScope() = func and
|
||||
p = parameterNode(param) and
|
||||
c.getParameter(pos) = p and
|
||||
param = func.getArg(_) and
|
||||
param = func.getArgByName(_)
|
||||
)
|
||||
}
|
||||
}
|
||||
@@ -26,29 +26,30 @@ abstract class RoutingTest extends InlineExpectationsTest {
|
||||
element = fromNode.toString() and
|
||||
(
|
||||
tag = this.flowTag() and
|
||||
if "\"" + tag + "\"" = this.fromValue(fromNode)
|
||||
then value = ""
|
||||
else value = this.fromValue(fromNode)
|
||||
if "\"" + tag + "\"" = fromValue(fromNode) then value = "" else value = fromValue(fromNode)
|
||||
or
|
||||
// only have result for `func` tag if the function where `arg<n>` is used, is
|
||||
// different from the function name of the call where `arg<n>` was specified as
|
||||
// an argument
|
||||
tag = "func" and
|
||||
value = this.toFunc(toNode) and
|
||||
not value = this.fromFunc(fromNode)
|
||||
value = toFunc(toNode) and
|
||||
not value = fromFunc(fromNode)
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
pragma[inline]
|
||||
private string fromValue(DataFlow::Node fromNode) {
|
||||
result = "\"" + prettyNode(fromNode).replaceAll("\"", "'") + "\""
|
||||
}
|
||||
|
||||
pragma[inline]
|
||||
private string fromFunc(DataFlow::ArgumentNode fromNode) {
|
||||
result = fromNode.getCall().getNode().(CallNode).getFunction().getNode().(Name).getId()
|
||||
}
|
||||
|
||||
pragma[inline]
|
||||
private string toFunc(DataFlow::Node toNode) {
|
||||
result = toNode.getEnclosingCallable().getCallableValue().getScope().getQualifiedName() // TODO: More robust pretty printing?
|
||||
}
|
||||
}
|
||||
|
||||
pragma[inline]
|
||||
private string fromValue(DataFlow::Node fromNode) {
|
||||
result = "\"" + prettyNode(fromNode).replaceAll("\"", "'") + "\""
|
||||
}
|
||||
|
||||
pragma[inline]
|
||||
private string fromFunc(DataFlow::ArgumentNode fromNode) {
|
||||
result = fromNode.getCall().getNode().(CallNode).getFunction().getNode().(Name).getId()
|
||||
}
|
||||
|
||||
pragma[inline]
|
||||
private string toFunc(DataFlow::Node toNode) {
|
||||
result = toNode.getEnclosingCallable().getQualifiedName()
|
||||
}
|
||||
|
||||
@@ -12,13 +12,10 @@ class UnresolvedCallExpectations extends InlineExpectationsTest {
|
||||
override predicate hasActualResult(Location location, string element, string tag, string value) {
|
||||
exists(location.getFile().getRelativePath()) and
|
||||
exists(CallNode call |
|
||||
not exists(DataFlowPrivate::DataFlowCall dfc | dfc.getNode() = call |
|
||||
// For every `CallNode`, there is a `DataFlowCall` in the form of a `NormalCall`.
|
||||
// It does not really count, as it has some abstract overrides. For instance, it does not
|
||||
// define `getCallable`, so checking for the existence of this guarantees that we are in a
|
||||
// properly resolved call.
|
||||
exists(dfc.getCallable())
|
||||
not exists(DataFlowPrivate::DataFlowCall dfc |
|
||||
exists(dfc.getCallable()) and dfc.getNode() = call
|
||||
) and
|
||||
not DataFlowPrivate::resolveClassCall(call, _) and
|
||||
not call = API::builtin(_).getACall().asCfgNode() and
|
||||
location = call.getLocation() and
|
||||
tag = "unresolved_call" and
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
| file://:0:0:0:0 | parameter 0 of builtins.reversed |
|
||||
| file://:0:0:0:0 | parameter position 0 of builtins.reversed |
|
||||
| test.py:1:1:1:21 | SynthDictSplatParameterNode |
|
||||
| test.py:1:19:1:19 | ControlFlowNode for x |
|
||||
| test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() |
|
||||
|
||||
@@ -1 +1,2 @@
|
||||
import semmle.python.dataflow.new.internal.DataFlowImplConsistency::Consistency
|
||||
import python
|
||||
import experimental.dataflow.TestUtil.DataFlowConsistency
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
| file://:0:0:0:0 | [summary] read: argument 0.List element in builtins.reversed | file://:0:0:0:0 | [summary] to write: return (return).List element in builtins.reversed |
|
||||
| file://:0:0:0:0 | [summary] read: argument position 0.List element in builtins.reversed | file://:0:0:0:0 | [summary] to write: return (return).List element in builtins.reversed |
|
||||
| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:1:5:1:17 | GSSA Variable obfuscated_id |
|
||||
| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:7:5:7:17 | ControlFlowNode for obfuscated_id |
|
||||
| test.py:1:5:1:17 | GSSA Variable obfuscated_id | test.py:7:5:7:17 | ControlFlowNode for obfuscated_id |
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
| file://:0:0:0:0 | [summary] read: argument 0.List element in builtins.reversed | file://:0:0:0:0 | [summary] to write: return (return).List element in builtins.reversed |
|
||||
| file://:0:0:0:0 | [summary] read: argument position 0.List element in builtins.reversed | file://:0:0:0:0 | [summary] to write: return (return).List element in builtins.reversed |
|
||||
| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:1:5:1:17 | GSSA Variable obfuscated_id |
|
||||
| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:1:5:1:17 | GSSA Variable obfuscated_id |
|
||||
| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:7:5:7:17 | ControlFlowNode for obfuscated_id |
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
| file://:0:0:0:0 | [summary] read: argument 0.List element in builtins.reversed | file://:0:0:0:0 | [summary] read: argument 0.List element in builtins.reversed |
|
||||
| file://:0:0:0:0 | [summary] read: argument 0.List element in builtins.reversed | file://:0:0:0:0 | [summary] to write: return (return).List element in builtins.reversed |
|
||||
| file://:0:0:0:0 | [summary] read: argument position 0.List element in builtins.reversed | file://:0:0:0:0 | [summary] read: argument position 0.List element in builtins.reversed |
|
||||
| file://:0:0:0:0 | [summary] read: argument position 0.List element in builtins.reversed | file://:0:0:0:0 | [summary] to write: return (return).List element in builtins.reversed |
|
||||
| file://:0:0:0:0 | [summary] to write: return (return) in builtins.reversed | file://:0:0:0:0 | [summary] to write: return (return) in builtins.reversed |
|
||||
| file://:0:0:0:0 | [summary] to write: return (return).List element in builtins.reversed | file://:0:0:0:0 | [summary] to write: return (return).List element in builtins.reversed |
|
||||
| file://:0:0:0:0 | parameter 0 of builtins.reversed | file://:0:0:0:0 | parameter 0 of builtins.reversed |
|
||||
| file://:0:0:0:0 | parameter position 0 of builtins.reversed | file://:0:0:0:0 | parameter position 0 of builtins.reversed |
|
||||
| test.py:0:0:0:0 | GSSA Variable __name__ | test.py:0:0:0:0 | GSSA Variable __name__ |
|
||||
| test.py:0:0:0:0 | GSSA Variable __package__ | test.py:0:0:0:0 | GSSA Variable __package__ |
|
||||
| test.py:0:0:0:0 | GSSA Variable b | test.py:0:0:0:0 | GSSA Variable b |
|
||||
@@ -10,6 +10,7 @@
|
||||
| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:1:1:1:21 | ControlFlowNode for FunctionExpr |
|
||||
| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:1:5:1:17 | GSSA Variable obfuscated_id |
|
||||
| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:7:5:7:17 | ControlFlowNode for obfuscated_id |
|
||||
| test.py:1:1:1:21 | SynthDictSplatParameterNode | test.py:1:1:1:21 | SynthDictSplatParameterNode |
|
||||
| test.py:1:5:1:17 | ControlFlowNode for obfuscated_id | test.py:1:5:1:17 | ControlFlowNode for obfuscated_id |
|
||||
| test.py:1:5:1:17 | GSSA Variable obfuscated_id | test.py:1:5:1:17 | GSSA Variable obfuscated_id |
|
||||
| test.py:1:5:1:17 | GSSA Variable obfuscated_id | test.py:7:5:7:17 | ControlFlowNode for obfuscated_id |
|
||||
@@ -52,8 +53,10 @@
|
||||
| test.py:7:1:7:1 | ControlFlowNode for b | test.py:7:1:7:1 | ControlFlowNode for b |
|
||||
| test.py:7:1:7:1 | GSSA Variable b | test.py:7:1:7:1 | GSSA Variable b |
|
||||
| test.py:7:5:7:17 | ControlFlowNode for obfuscated_id | test.py:7:5:7:17 | ControlFlowNode for obfuscated_id |
|
||||
| test.py:7:5:7:17 | [post] ControlFlowNode for obfuscated_id | test.py:7:5:7:17 | [post] ControlFlowNode for obfuscated_id |
|
||||
| test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() | test.py:7:1:7:1 | GSSA Variable b |
|
||||
| test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() |
|
||||
| test.py:7:5:7:20 | GSSA Variable a | test.py:7:5:7:20 | GSSA Variable a |
|
||||
| test.py:7:5:7:20 | [pre] ControlFlowNode for obfuscated_id() | test.py:7:5:7:20 | [pre] ControlFlowNode for obfuscated_id() |
|
||||
| test.py:7:19:7:19 | ControlFlowNode for a | test.py:7:19:7:19 | ControlFlowNode for a |
|
||||
| test.py:7:19:7:19 | [post arg] ControlFlowNode for a | test.py:7:19:7:19 | [post arg] ControlFlowNode for a |
|
||||
| test.py:7:19:7:19 | [post] ControlFlowNode for a | test.py:7:19:7:19 | [post] ControlFlowNode for a |
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
| file://:0:0:0:0 | [summary] read: argument 0.List element in builtins.reversed | file://:0:0:0:0 | [summary] to write: return (return).List element in builtins.reversed |
|
||||
| file://:0:0:0:0 | [summary] read: argument position 0.List element in builtins.reversed | file://:0:0:0:0 | [summary] to write: return (return).List element in builtins.reversed |
|
||||
| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:1:5:1:17 | GSSA Variable obfuscated_id |
|
||||
| test.py:1:5:1:17 | GSSA Variable obfuscated_id | test.py:7:5:7:17 | ControlFlowNode for obfuscated_id |
|
||||
| test.py:1:19:1:19 | ControlFlowNode for x | test.py:1:19:1:19 | SSA variable x |
|
||||
|
||||
@@ -1,12 +1,13 @@
|
||||
| file://:0:0:0:0 | [summary] read: argument 0.List element in builtins.reversed |
|
||||
| file://:0:0:0:0 | [summary] read: argument position 0.List element in builtins.reversed |
|
||||
| file://:0:0:0:0 | [summary] to write: return (return) in builtins.reversed |
|
||||
| file://:0:0:0:0 | [summary] to write: return (return).List element in builtins.reversed |
|
||||
| file://:0:0:0:0 | parameter 0 of builtins.reversed |
|
||||
| file://:0:0:0:0 | parameter position 0 of builtins.reversed |
|
||||
| test.py:0:0:0:0 | GSSA Variable __name__ |
|
||||
| test.py:0:0:0:0 | GSSA Variable __package__ |
|
||||
| test.py:0:0:0:0 | GSSA Variable b |
|
||||
| test.py:0:0:0:0 | SSA variable $ |
|
||||
| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr |
|
||||
| test.py:1:1:1:21 | SynthDictSplatParameterNode |
|
||||
| test.py:1:5:1:17 | ControlFlowNode for obfuscated_id |
|
||||
| test.py:1:5:1:17 | GSSA Variable obfuscated_id |
|
||||
| test.py:1:19:1:19 | ControlFlowNode for x |
|
||||
@@ -24,7 +25,9 @@
|
||||
| test.py:7:1:7:1 | ControlFlowNode for b |
|
||||
| test.py:7:1:7:1 | GSSA Variable b |
|
||||
| test.py:7:5:7:17 | ControlFlowNode for obfuscated_id |
|
||||
| test.py:7:5:7:17 | [post] ControlFlowNode for obfuscated_id |
|
||||
| test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() |
|
||||
| test.py:7:5:7:20 | GSSA Variable a |
|
||||
| test.py:7:5:7:20 | [pre] ControlFlowNode for obfuscated_id() |
|
||||
| test.py:7:19:7:19 | ControlFlowNode for a |
|
||||
| test.py:7:19:7:19 | [post arg] ControlFlowNode for a |
|
||||
| test.py:7:19:7:19 | [post] ControlFlowNode for a |
|
||||
|
||||
@@ -1,12 +1,13 @@
|
||||
| file://:0:0:0:0 | [summary] read: argument 0.List element in builtins.reversed |
|
||||
| file://:0:0:0:0 | [summary] read: argument position 0.List element in builtins.reversed |
|
||||
| file://:0:0:0:0 | [summary] to write: return (return) in builtins.reversed |
|
||||
| file://:0:0:0:0 | [summary] to write: return (return).List element in builtins.reversed |
|
||||
| file://:0:0:0:0 | parameter 0 of builtins.reversed |
|
||||
| file://:0:0:0:0 | parameter position 0 of builtins.reversed |
|
||||
| test.py:0:0:0:0 | GSSA Variable __name__ |
|
||||
| test.py:0:0:0:0 | GSSA Variable __package__ |
|
||||
| test.py:0:0:0:0 | GSSA Variable b |
|
||||
| test.py:0:0:0:0 | SSA variable $ |
|
||||
| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr |
|
||||
| test.py:1:1:1:21 | SynthDictSplatParameterNode |
|
||||
| test.py:1:5:1:17 | ControlFlowNode for obfuscated_id |
|
||||
| test.py:1:5:1:17 | GSSA Variable obfuscated_id |
|
||||
| test.py:1:19:1:19 | ControlFlowNode for x |
|
||||
@@ -24,7 +25,9 @@
|
||||
| test.py:7:1:7:1 | ControlFlowNode for b |
|
||||
| test.py:7:1:7:1 | GSSA Variable b |
|
||||
| test.py:7:5:7:17 | ControlFlowNode for obfuscated_id |
|
||||
| test.py:7:5:7:17 | [post] ControlFlowNode for obfuscated_id |
|
||||
| test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() |
|
||||
| test.py:7:5:7:20 | GSSA Variable a |
|
||||
| test.py:7:5:7:20 | [pre] ControlFlowNode for obfuscated_id() |
|
||||
| test.py:7:19:7:19 | ControlFlowNode for a |
|
||||
| test.py:7:19:7:19 | [post arg] ControlFlowNode for a |
|
||||
| test.py:7:19:7:19 | [post] ControlFlowNode for a |
|
||||
|
||||
@@ -0,0 +1,13 @@
|
||||
| test.py:32:8:32:23 | CrosstalkTestX() | test.py:9:5:9:23 | Function __init__ | test.py:32:8:32:23 | [pre] ControlFlowNode for CrosstalkTestX() | self |
|
||||
| test.py:33:8:33:23 | CrosstalkTestY() | test.py:21:5:21:23 | Function __init__ | test.py:33:8:33:23 | [pre] ControlFlowNode for CrosstalkTestY() | self |
|
||||
| test.py:43:1:43:8 | func() | test.py:13:5:13:26 | Function setx | test.py:36:12:36:15 | ControlFlowNode for objx | self |
|
||||
| test.py:43:1:43:8 | func() | test.py:13:5:13:26 | Function setx | test.py:43:6:43:7 | ControlFlowNode for IntegerLiteral | position 0 |
|
||||
| test.py:43:1:43:8 | func() | test.py:25:5:25:26 | Function sety | test.py:38:12:38:15 | ControlFlowNode for objy | self |
|
||||
| test.py:43:1:43:8 | func() | test.py:25:5:25:26 | Function sety | test.py:43:6:43:7 | ControlFlowNode for IntegerLiteral | position 0 |
|
||||
| test.py:51:1:51:8 | func() | test.py:16:5:16:30 | Function setvalue | test.py:47:12:47:15 | ControlFlowNode for objx | self |
|
||||
| test.py:51:1:51:8 | func() | test.py:16:5:16:30 | Function setvalue | test.py:51:6:51:7 | ControlFlowNode for IntegerLiteral | position 0 |
|
||||
| test.py:51:1:51:8 | func() | test.py:28:5:28:30 | Function setvalue | test.py:49:12:49:15 | ControlFlowNode for objy | self |
|
||||
| test.py:51:1:51:8 | func() | test.py:28:5:28:30 | Function setvalue | test.py:51:6:51:7 | ControlFlowNode for IntegerLiteral | position 0 |
|
||||
| test.py:70:1:70:8 | func() | test.py:58:5:58:33 | Function foo | test.py:63:12:63:12 | ControlFlowNode for a | self |
|
||||
| test.py:70:1:70:8 | func() | test.py:58:5:58:33 | Function foo | test.py:70:6:70:7 | ControlFlowNode for IntegerLiteral | position 0 |
|
||||
| test.py:70:1:70:8 | func() | test.py:58:5:58:33 | Function foo | test.py:70:6:70:7 | ControlFlowNode for IntegerLiteral | self |
|
||||
@@ -0,0 +1,9 @@
|
||||
private import python
|
||||
private import semmle.python.dataflow.new.internal.DataFlowPrivate
|
||||
private import semmle.python.dataflow.new.internal.DataFlowPublic
|
||||
|
||||
from DataFlowCall call, DataFlowCallable callable, ArgumentNode arg, ArgumentPosition apos
|
||||
where
|
||||
callable = call.getCallable() and
|
||||
arg = call.getArgument(apos)
|
||||
select call, callable, arg, apos
|
||||
@@ -0,0 +1,24 @@
|
||||
uniqueEnclosingCallable
|
||||
uniqueType
|
||||
uniqueNodeLocation
|
||||
missingLocation
|
||||
uniqueNodeToString
|
||||
missingToString
|
||||
parameterCallable
|
||||
localFlowIsLocal
|
||||
readStepIsLocal
|
||||
storeStepIsLocal
|
||||
compatibleTypesReflexive
|
||||
unreachableNodeCCtx
|
||||
localCallNodes
|
||||
postIsNotPre
|
||||
postHasUniquePre
|
||||
uniquePostUpdate
|
||||
postIsInSameCallable
|
||||
reverseRead
|
||||
argHasPostUpdate
|
||||
postWithInFlow
|
||||
viableImplInCallContextTooLarge
|
||||
uniqueParameterNodeAtPosition
|
||||
uniqueParameterNodePosition
|
||||
uniqueContentApprox
|
||||
@@ -0,0 +1,2 @@
|
||||
import python
|
||||
import experimental.dataflow.TestUtil.DataFlowConsistency
|
||||
@@ -0,0 +1 @@
|
||||
semmle-extractor-options: --max-import-depth=0
|
||||
@@ -0,0 +1,70 @@
|
||||
import random
|
||||
cond = random.randint(0,1) == 1
|
||||
|
||||
# ------------------------------------------------------------------------------
|
||||
# Calling different bound-methods based on conditional
|
||||
# ------------------------------------------------------------------------------
|
||||
|
||||
class CrosstalkTestX:
|
||||
def __init__(self):
|
||||
self.x = None
|
||||
self.y = None
|
||||
|
||||
def setx(self, value):
|
||||
self.x = value
|
||||
|
||||
def setvalue(self, value):
|
||||
self.x = value
|
||||
|
||||
|
||||
class CrosstalkTestY:
|
||||
def __init__(self):
|
||||
self.x = None
|
||||
self.y = None
|
||||
|
||||
def sety(self ,value):
|
||||
self.y = value
|
||||
|
||||
def setvalue(self, value):
|
||||
self.y = value
|
||||
|
||||
|
||||
objx = CrosstalkTestX()
|
||||
objy = CrosstalkTestY()
|
||||
|
||||
if cond:
|
||||
func = objx.setx
|
||||
else:
|
||||
func = objy.sety
|
||||
|
||||
# What we're testing for is whether both objects are passed as self to both methods,
|
||||
# which is wrong.
|
||||
|
||||
func(42)
|
||||
|
||||
|
||||
if cond:
|
||||
func = objx.setvalue
|
||||
else:
|
||||
func = objy.setvalue
|
||||
|
||||
func(43)
|
||||
|
||||
# ------------------------------------------------------------------------------
|
||||
# Calling methods in different ways
|
||||
# ------------------------------------------------------------------------------
|
||||
|
||||
class A(object):
|
||||
def foo(self, arg="Default"):
|
||||
print("A.foo", self, arg)
|
||||
|
||||
a = A()
|
||||
if cond:
|
||||
func = a.foo # `44` is passed as arg
|
||||
else:
|
||||
func = A.foo # `44` is passed as self
|
||||
|
||||
# What we're testing for is whether a single call ends up having both `a` and `44` is
|
||||
# passed as self to `A.foo`, which is wrong.
|
||||
|
||||
func(44)
|
||||
@@ -1,6 +1,6 @@
|
||||
import python
|
||||
import semmle.python.dataflow.new.DataFlow
|
||||
import semmle.python.dataflow.new.internal.DataFlowPrivate
|
||||
import semmle.python.dataflow.new.internal.DataFlowDispatch as DataFlowDispatch
|
||||
import TestUtilities.InlineExpectationsTest
|
||||
private import semmle.python.dataflow.new.internal.PrintNode
|
||||
|
||||
@@ -8,26 +8,29 @@ class DataFlowCallTest extends InlineExpectationsTest {
|
||||
DataFlowCallTest() { this = "DataFlowCallTest" }
|
||||
|
||||
override string getARelevantTag() {
|
||||
result in ["call", "qlclass"]
|
||||
result in ["call", "callType"]
|
||||
or
|
||||
result = "arg_" + [0 .. 10]
|
||||
result = "arg[" + any(DataFlowDispatch::ArgumentPosition pos).toString() + "]"
|
||||
}
|
||||
|
||||
override predicate hasActualResult(Location location, string element, string tag, string value) {
|
||||
exists(location.getFile().getRelativePath()) and
|
||||
exists(DataFlowCall call |
|
||||
exists(DataFlowDispatch::DataFlowCall call |
|
||||
location = call.getLocation() and
|
||||
element = call.toString()
|
||||
element = call.toString() and
|
||||
exists(call.getCallable())
|
||||
|
|
||||
value = prettyExpr(call.getNode().getNode()) and
|
||||
tag = "call"
|
||||
or
|
||||
value = call.getAQlClass() and
|
||||
tag = "qlclass"
|
||||
value = call.(DataFlowDispatch::NormalCall).getCallType().toString() and
|
||||
tag = "callType"
|
||||
or
|
||||
exists(int n, DataFlow::Node arg | arg = call.getArg(n) |
|
||||
exists(DataFlowDispatch::ArgumentPosition pos, DataFlow::Node arg |
|
||||
arg = call.getArgument(pos)
|
||||
|
|
||||
value = prettyNodeForInlineTest(arg) and
|
||||
tag = "arg_" + n
|
||||
tag = "arg[" + pos + "]"
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
@@ -1 +1,2 @@
|
||||
import semmle.python.dataflow.new.internal.DataFlowImplConsistency::Consistency
|
||||
import python
|
||||
import experimental.dataflow.TestUtil.DataFlowConsistency
|
||||
|
||||
16
python/ql/test/experimental/dataflow/calls/new_cls_param.py
Normal file
16
python/ql/test/experimental/dataflow/calls/new_cls_param.py
Normal file
@@ -0,0 +1,16 @@
|
||||
# We want to ensure that the __new__ method is considered a classmethod even though it
|
||||
# doesn't have a decorator. This means that the `cls` parameter should be considered a
|
||||
# reference to the class (or subclass), and not an instance of the class. We can detect
|
||||
# this from looking at the arguments passed in the `cls.foo` call. if we see a `self`
|
||||
# argument, this means it has correct behavior (because we're targeting a classmethod),
|
||||
# if there is no `self` argument, this means we've only considered `cls` to be a class
|
||||
# instance, since we don't want to pass that to the `cls` parameter of the classmethod `WithNewImpl.foo`.
|
||||
|
||||
class WithNewImpl(object):
|
||||
def __new__(cls):
|
||||
print("WithNewImpl.foo")
|
||||
cls.foo() # $ call=cls.foo() callType=CallTypeClassMethod arg[self]=cls
|
||||
|
||||
@classmethod
|
||||
def foo(cls):
|
||||
print("WithNewImpl.foo")
|
||||
@@ -14,24 +14,69 @@ class MyClass(object):
|
||||
def my_method(self, arg):
|
||||
pass
|
||||
|
||||
def other_method(self):
|
||||
self.my_method(42) # $ arg[self]=self call=self.my_method(..) callType=CallTypeNormalMethod arg[position 0]=42
|
||||
self.sm(42) # $ call=self.sm(..) callType=CallTypeStaticMethod arg[position 0]=42
|
||||
|
||||
@staticmethod
|
||||
def sm(arg):
|
||||
pass
|
||||
|
||||
@classmethod
|
||||
def cm(cls, arg):
|
||||
pass
|
||||
|
||||
@classmethod
|
||||
def other_classmethod(cls):
|
||||
cls.cm(42) # $ call=cls.cm(..) callType=CallTypeClassMethod arg[position 0]=42 arg[self]=cls
|
||||
cls.sm(42) # $ call=cls.sm(..) callType=CallTypeStaticMethod arg[position 0]=42
|
||||
|
||||
def __getitem__(self, key):
|
||||
pass
|
||||
|
||||
func(0) # $ call=func(..) arg[position 0]=0 callType=CallTypePlainFunction
|
||||
|
||||
func("foo") # $ call=func(..) qlclass=FunctionCall arg_0="foo"
|
||||
x = MyClass(1) # $ call=MyClass(..) qlclass=ClassCall arg_0=[pre]MyClass(..) arg_1=1
|
||||
x.my_method(2) # $ call=x.my_method(..) qlclass=MethodCall arg_0=x arg_1=2
|
||||
x = MyClass(1) # $ call=MyClass(..) arg[self]=[pre]MyClass(..) arg[position 0]=1 callType=CallTypeClass
|
||||
|
||||
x.my_method(2) # $ call=x.my_method(..) arg[self]=x arg[position 0]=2 callType=CallTypeNormalMethod
|
||||
mm = x.my_method
|
||||
mm(2) # $ call=mm(..) qlclass=MethodCall arg_1=2 MISSING: arg_0=x
|
||||
x[3] # $ call=x[3] qlclass=SpecialCall arg_0=x arg_1=3
|
||||
mm(2) # $ call=mm(..) arg[self]=x arg[position 0]=2 callType=CallTypeNormalMethod
|
||||
MyClass.my_method(x, 2) # $ call=MyClass.my_method(..) arg[position 0]=2 arg[self]=x callType=CallTypeMethodAsPlainFunction
|
||||
|
||||
x.sm(3) # $ call=x.sm(..) arg[position 0]=3 callType=CallTypeStaticMethod
|
||||
MyClass.sm(3) # $ call=MyClass.sm(..) arg[position 0]=3 callType=CallTypeStaticMethod
|
||||
|
||||
x.cm(4) # $ call=x.cm(..) arg[position 0]=4 callType=CallTypeClassMethod
|
||||
MyClass.cm(4) # $ call=MyClass.cm(..) arg[position 0]=4 arg[self]=MyClass callType=CallTypeClassMethod
|
||||
|
||||
x[5] # $ MISSING: call=x[5] arg[self]=x arg[position 0]=5
|
||||
|
||||
|
||||
class Subclass(MyClass):
|
||||
pass
|
||||
|
||||
y = Subclass(1) # $ call=Subclass(..) arg[self]=[pre]Subclass(..) arg[position 0]=1 callType=CallTypeClass
|
||||
|
||||
y.my_method(2) # $ call=y.my_method(..) arg[self]=y arg[position 0]=2 callType=CallTypeNormalMethod
|
||||
mm = y.my_method
|
||||
mm(2) # $ call=mm(..) arg[self]=y arg[position 0]=2 callType=CallTypeNormalMethod
|
||||
Subclass.my_method(y, 2) # $ call=Subclass.my_method(..) arg[self]=y arg[position 0]=2 callType=CallTypeMethodAsPlainFunction
|
||||
|
||||
y.sm(3) # $ call=y.sm(..) arg[position 0]=3 callType=CallTypeStaticMethod
|
||||
Subclass.sm(3) # $ call=Subclass.sm(..) arg[position 0]=3 callType=CallTypeStaticMethod
|
||||
|
||||
y.cm(4) # $ call=y.cm(..) arg[position 0]=4 callType=CallTypeClassMethod
|
||||
Subclass.cm(4) # $ call=Subclass.cm(..) arg[self]=Subclass arg[position 0]=4 callType=CallTypeClassMethod
|
||||
|
||||
y[5] # $ MISSING: call=y[5] arg[self]=y arg[position 0]=5
|
||||
|
||||
|
||||
try:
|
||||
# These are included to show how we handle absent things with points-to where
|
||||
# `mypkg.foo` is a `missing module variable`, but `mypkg.subpkg.bar` is compeltely
|
||||
# ignored.
|
||||
# These are included to show whether we have a DataFlowCall for things we can't
|
||||
# resolve. Both are interesting since with points-to we used to have a DataFlowCall
|
||||
# for _one_ but not the other
|
||||
import mypkg
|
||||
mypkg.foo(42) # $ call=mypkg.foo(..) qlclass=NormalCall
|
||||
mypkg.subpkg.bar(43) # $ call=mypkg.subpkg.bar(..) qlclass=LibraryCall arg_0=43
|
||||
mypkg.foo(42)
|
||||
mypkg.subpkg.bar(43)
|
||||
except:
|
||||
pass
|
||||
|
||||
@@ -1 +1,2 @@
|
||||
import semmle.python.dataflow.new.internal.DataFlowImplConsistency::Consistency
|
||||
import python
|
||||
import experimental.dataflow.TestUtil.DataFlowConsistency
|
||||
|
||||
@@ -1 +0,0 @@
|
||||
| test.py:239:27:239:27 | Parameter | There is no `ParameterNode` associated with this parameter. |
|
||||
|
||||
@@ -38,6 +38,14 @@ SINK5 = functools.partial(SINK, expected=arg5)
|
||||
SINK6 = functools.partial(SINK, expected=arg6)
|
||||
SINK7 = functools.partial(SINK, expected=arg7)
|
||||
|
||||
SINK1_F = functools.partial(SINK_F, unexpected=arg1)
|
||||
SINK2_F = functools.partial(SINK_F, unexpected=arg2)
|
||||
SINK3_F = functools.partial(SINK_F, unexpected=arg3)
|
||||
SINK4_F = functools.partial(SINK_F, unexpected=arg4)
|
||||
SINK5_F = functools.partial(SINK_F, unexpected=arg5)
|
||||
SINK6_F = functools.partial(SINK_F, unexpected=arg6)
|
||||
SINK7_F = functools.partial(SINK_F, unexpected=arg7)
|
||||
|
||||
|
||||
def argument_passing(
|
||||
a,
|
||||
@@ -64,12 +72,12 @@ def argument_passing(
|
||||
|
||||
@expects(7)
|
||||
def test_argument_passing1():
|
||||
argument_passing(arg1, *(arg2, arg3, arg4), e=arg5, **{"f": arg6, "g": arg7}) #$ arg1 arg7 func=argument_passing MISSING: arg2 arg3="arg3 arg4 arg5 arg6
|
||||
argument_passing(arg1, *(arg2, arg3, arg4), e=arg5, **{"f": arg6, "g": arg7}) #$ arg1 arg5 arg6 arg7 func=argument_passing MISSING: arg2 arg3 arg4
|
||||
|
||||
|
||||
@expects(7)
|
||||
def test_argument_passing2():
|
||||
argument_passing(arg1, arg2, arg3, f=arg6) #$ arg1 arg2 arg3
|
||||
argument_passing(arg1, arg2, arg3, f=arg6) #$ arg1 arg2 arg3 arg6
|
||||
|
||||
|
||||
def with_pos_only(a, /, b):
|
||||
@@ -94,7 +102,7 @@ def with_multiple_kw_args(a, b, c):
|
||||
def test_multiple_kw_args():
|
||||
with_multiple_kw_args(b=arg2, c=arg3, a=arg1) #$ arg1 arg2 arg3
|
||||
with_multiple_kw_args(arg1, *(arg2,), arg3) #$ arg1 MISSING: arg2 arg3
|
||||
with_multiple_kw_args(arg1, **{"c": arg3}, b=arg2) #$ arg1 arg2 arg3 func=with_multiple_kw_args MISSING:
|
||||
with_multiple_kw_args(arg1, **{"c": arg3}, b=arg2) #$ arg1 arg2 arg3 func=with_multiple_kw_args
|
||||
with_multiple_kw_args(**{"b": arg2}, **{"c": arg3}, **{"a": arg1}) #$ arg1 arg2 arg3 func=with_multiple_kw_args
|
||||
|
||||
|
||||
@@ -112,32 +120,6 @@ def test_default_arguments():
|
||||
with_default_arguments(**{"c": arg3}) #$ arg3 func=with_default_arguments
|
||||
|
||||
|
||||
# Nested constructor pattern
|
||||
def grab_foo_bar_baz(foo, **kwargs):
|
||||
SINK1(foo)
|
||||
grab_bar_baz(**kwargs)
|
||||
|
||||
|
||||
# It is not possible to pass `bar` into `kwargs`,
|
||||
# since `bar` is a valid keyword argument.
|
||||
def grab_bar_baz(bar, **kwargs):
|
||||
SINK2(bar)
|
||||
try:
|
||||
SINK2_F(kwargs["bar"])
|
||||
except:
|
||||
print("OK")
|
||||
grab_baz(**kwargs)
|
||||
|
||||
|
||||
def grab_baz(baz):
|
||||
SINK3(baz)
|
||||
|
||||
|
||||
@expects(4)
|
||||
def test_grab():
|
||||
grab_foo_bar_baz(baz=arg3, bar=arg2, foo=arg1) #$ arg1 arg2 arg3 func=grab_bar_baz func=grab_baz
|
||||
|
||||
|
||||
# All combinations
|
||||
def test_pos_pos():
|
||||
def with_pos(a):
|
||||
@@ -183,7 +165,95 @@ def test_kw_kw():
|
||||
|
||||
|
||||
def test_kw_doublestar():
|
||||
def with_doublestar(**a):
|
||||
SINK1(a["a"])
|
||||
def with_doublestar(**kwargs):
|
||||
SINK1(kwargs["a"])
|
||||
|
||||
with_doublestar(a=arg1) #$ arg1 func=test_kw_doublestar.with_doublestar
|
||||
|
||||
|
||||
def only_kwargs(**kwargs):
|
||||
SINK1(kwargs["a"])
|
||||
SINK2(kwargs["b"])
|
||||
# testing precise content tracking, that content from `a` or `b` does not end up here.
|
||||
SINK3_F(kwargs["c"])
|
||||
|
||||
@expects(3)
|
||||
def test_kwargs():
|
||||
args = {"a": arg1, "b": arg2, "c": "safe"} # $ arg1 arg2 func=only_kwargs
|
||||
only_kwargs(**args)
|
||||
|
||||
|
||||
def mixed(a, **kwargs):
|
||||
SINK1(a)
|
||||
try:
|
||||
SINK1_F(kwargs["a"]) # since 'a' is a keyword argument, it cannot be part of **kwargs
|
||||
except KeyError:
|
||||
print("OK")
|
||||
SINK2(kwargs["b"])
|
||||
# testing precise content tracking, that content from `a` or `b` does not end up here.
|
||||
SINK3_F(kwargs["c"])
|
||||
|
||||
@expects(4*3)
|
||||
def test_mixed():
|
||||
mixed(a=arg1, b=arg2, c="safe") # $ arg1 arg2
|
||||
|
||||
args = {"b": arg2, "c": "safe"} # $ arg2 func=mixed
|
||||
mixed(a=arg1, **args) # $ arg1
|
||||
|
||||
args = {"a": arg1, "b": arg2, "c": "safe"} # $ arg1 arg2 func=mixed
|
||||
mixed(**args)
|
||||
|
||||
|
||||
def starargs_only(*args):
|
||||
SINK1(args[0])
|
||||
SINK2(args[1])
|
||||
SINK3_F(args[2])
|
||||
|
||||
@expects(5*3)
|
||||
def test_only_starargs():
|
||||
starargs_only(arg1, arg2, "safe") # $ arg1 arg2 SPURIOUS: bad2,bad3="arg1" bad1,bad3="arg2"
|
||||
|
||||
args = (arg2, "safe") # $ MISSING: arg2
|
||||
starargs_only(arg1, *args) # $ arg1 SPURIOUS: bad2,bad3="arg1"
|
||||
|
||||
args = (arg1, arg2, "safe") # $ arg1 arg2 func=starargs_only
|
||||
starargs_only(*args)
|
||||
|
||||
empty_args = ()
|
||||
|
||||
args = (arg1, arg2, "safe") # $ arg1 arg2 func=starargs_only
|
||||
starargs_only(*args, *empty_args)
|
||||
args = (arg1, arg2, "safe") # $ MISSING: arg1 arg2 func=starargs_only
|
||||
starargs_only(*empty_args, *args)
|
||||
|
||||
|
||||
def starargs_mixed(a, *args):
|
||||
SINK1(a)
|
||||
SINK2(args[0])
|
||||
SINK3_F(args[1])
|
||||
|
||||
@expects(3*8)
|
||||
def test_stararg_mixed():
|
||||
starargs_mixed(arg1, arg2, "safe") # $ arg1 arg2 SPURIOUS: bad3="arg2"
|
||||
|
||||
args = (arg2, "safe") # $ arg2 func=starargs_mixed
|
||||
starargs_mixed(arg1, *args) # $ arg1
|
||||
|
||||
args = (arg1, arg2, "safe")
|
||||
starargs_mixed(*args) # $ MISSING: arg1 arg2
|
||||
|
||||
args = (arg1, arg2, "safe")
|
||||
more_args = ("foo", "bar")
|
||||
starargs_mixed(*args, *more_args) # $ MISSING: arg1 arg2
|
||||
|
||||
empty_args = ()
|
||||
|
||||
# adding first/last
|
||||
starargs_mixed(arg1, arg2, "safe", *empty_args) # $ arg1 arg2 SPURIOUS: bad3="arg2"
|
||||
starargs_mixed(*empty_args, arg1, arg2, "safe") # $ MISSING: arg1 arg2
|
||||
|
||||
# adding before/after *args
|
||||
args = (arg2, "safe") # $ arg2 func=starargs_mixed
|
||||
starargs_mixed(arg1, *args, *empty_args) # $ arg1
|
||||
args = (arg2, "safe")
|
||||
starargs_mixed(arg1, *empty_args, *args) # $ arg1 MISSING: arg2
|
||||
|
||||
@@ -0,0 +1,63 @@
|
||||
import sys
|
||||
import os
|
||||
import functools
|
||||
|
||||
sys.path.append(os.path.dirname(os.path.dirname((__file__))))
|
||||
from testlib import expects
|
||||
|
||||
arg = "source"
|
||||
arg1 = "source1"
|
||||
arg2 = "source2"
|
||||
arg3 = "source3"
|
||||
arg4 = "source4"
|
||||
arg5 = "source5"
|
||||
arg6 = "source6"
|
||||
arg7 = "source7"
|
||||
|
||||
|
||||
def SINK_TEST(x, test):
|
||||
if test(x):
|
||||
print("OK")
|
||||
else:
|
||||
print("Unexpected flow", x)
|
||||
|
||||
|
||||
def SINK(x, expected=arg):
|
||||
SINK_TEST(x, test=lambda x: x == expected)
|
||||
|
||||
|
||||
def SINK_F(x, unexpected=arg):
|
||||
SINK_TEST(x, test=lambda x: x != unexpected)
|
||||
|
||||
|
||||
SINK1 = functools.partial(SINK, expected=arg1)
|
||||
SINK2 = functools.partial(SINK, expected=arg2)
|
||||
SINK3 = functools.partial(SINK, expected=arg3)
|
||||
SINK4 = functools.partial(SINK, expected=arg4)
|
||||
SINK5 = functools.partial(SINK, expected=arg5)
|
||||
SINK6 = functools.partial(SINK, expected=arg6)
|
||||
SINK7 = functools.partial(SINK, expected=arg7)
|
||||
|
||||
SINK1_F = functools.partial(SINK_F, unexpected=arg1)
|
||||
SINK2_F = functools.partial(SINK_F, unexpected=arg2)
|
||||
SINK3_F = functools.partial(SINK_F, unexpected=arg3)
|
||||
SINK4_F = functools.partial(SINK_F, unexpected=arg4)
|
||||
SINK5_F = functools.partial(SINK_F, unexpected=arg5)
|
||||
SINK6_F = functools.partial(SINK_F, unexpected=arg6)
|
||||
SINK7_F = functools.partial(SINK_F, unexpected=arg7)
|
||||
|
||||
|
||||
def bad_argument_flow_func(arg):
|
||||
SINK1_F(arg)
|
||||
|
||||
def bad_argument_flow_func2(arg):
|
||||
SINK2(arg)
|
||||
|
||||
def test_bad_argument_flow():
|
||||
# this is just a test to show that the testing setup works
|
||||
|
||||
# in the first one, we pretend we expected no flow for arg1
|
||||
bad_argument_flow_func(arg1) # $ bad1="arg1"
|
||||
|
||||
# in the second one, we pretend we wanted flow for arg2 instead
|
||||
bad_argument_flow_func2(arg1) # $ bad2="arg1"
|
||||
@@ -9,23 +9,64 @@ class Argument1RoutingTest extends RoutingTest {
|
||||
override string flowTag() { result = "arg1" }
|
||||
|
||||
override predicate relevantFlow(DataFlow::Node source, DataFlow::Node sink) {
|
||||
exists(Argument1RoutingConfig cfg | cfg.hasFlow(source, sink))
|
||||
exists(Argument1ExtraRoutingConfig cfg | cfg.hasFlow(source, sink))
|
||||
or
|
||||
exists(ArgumentRoutingConfig cfg |
|
||||
cfg.hasFlow(source, sink) and
|
||||
cfg.isArgSource(source, 1) and
|
||||
cfg.isGoodSink(sink, 1)
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A configuration to check routing of arguments through magic methods.
|
||||
*/
|
||||
class Argument1RoutingConfig extends DataFlow::Configuration {
|
||||
Argument1RoutingConfig() { this = "Argument1RoutingConfig" }
|
||||
class ArgNumber extends int {
|
||||
ArgNumber() { this in [1 .. 7] }
|
||||
}
|
||||
|
||||
class ArgumentRoutingConfig extends DataFlow::Configuration {
|
||||
ArgumentRoutingConfig() { this = "ArgumentRoutingConfig" }
|
||||
|
||||
predicate isArgSource(DataFlow::Node node, ArgNumber argNumber) {
|
||||
node.(DataFlow::CfgNode).getNode().(NameNode).getId() = "arg" + argNumber
|
||||
}
|
||||
|
||||
override predicate isSource(DataFlow::Node node) { this.isArgSource(node, _) }
|
||||
|
||||
predicate isGoodSink(DataFlow::Node node, ArgNumber argNumber) {
|
||||
exists(CallNode call |
|
||||
call.getFunction().(NameNode).getId() = "SINK" + argNumber and
|
||||
node.(DataFlow::CfgNode).getNode() = call.getAnArg()
|
||||
)
|
||||
}
|
||||
|
||||
predicate isBadSink(DataFlow::Node node, ArgNumber argNumber) {
|
||||
exists(CallNode call |
|
||||
call.getFunction().(NameNode).getId() = "SINK" + argNumber + "_F" and
|
||||
node.(DataFlow::CfgNode).getNode() = call.getAnArg()
|
||||
)
|
||||
}
|
||||
|
||||
override predicate isSink(DataFlow::Node node) {
|
||||
this.isGoodSink(node, _) or this.isBadSink(node, _)
|
||||
}
|
||||
|
||||
/**
|
||||
* We want to be able to use `arg` in a sequence of calls such as `func(kw=arg); ... ; func(arg)`.
|
||||
* Use-use flow lets the argument to the first call reach the sink inside the second call,
|
||||
* making it seem like we handle all cases even if we only handle the last one.
|
||||
* We make the test honest by preventing flow into source nodes.
|
||||
*/
|
||||
override predicate isBarrierIn(DataFlow::Node node) { this.isSource(node) }
|
||||
}
|
||||
|
||||
class Argument1ExtraRoutingConfig extends DataFlow::Configuration {
|
||||
Argument1ExtraRoutingConfig() { this = "Argument1ExtraRoutingConfig" }
|
||||
|
||||
override predicate isSource(DataFlow::Node node) {
|
||||
node.(DataFlow::CfgNode).getNode().(NameNode).getId() = "arg1"
|
||||
or
|
||||
exists(AssignmentDefinition def, DataFlowPrivate::DataFlowCall call |
|
||||
exists(AssignmentDefinition def, DataFlow::CallCfgNode call |
|
||||
def.getVariable() = node.(DataFlow::EssaNode).getVar() and
|
||||
def.getValue() = call.getNode() and
|
||||
call.getNode().(CallNode).getFunction().(NameNode).getId().matches("With\\_%")
|
||||
call.getFunction().asCfgNode().(NameNode).getId().matches("With\\_%")
|
||||
) and
|
||||
node.(DataFlow::EssaNode).getVar().getName().matches("with\\_%")
|
||||
}
|
||||
@@ -46,57 +87,59 @@ class Argument1RoutingConfig extends DataFlow::Configuration {
|
||||
override predicate isBarrierIn(DataFlow::Node node) { this.isSource(node) }
|
||||
}
|
||||
|
||||
// for argument 2 and up, we use a generic approach. Change `maxNumArgs` below if we
|
||||
// need to increase the maximum number of arguments.
|
||||
private int maxNumArgs() { result = 7 }
|
||||
|
||||
class RestArgumentRoutingTest extends RoutingTest {
|
||||
int argNumber;
|
||||
ArgNumber argNumber;
|
||||
|
||||
RestArgumentRoutingTest() {
|
||||
argNumber in [2 .. maxNumArgs()] and
|
||||
argNumber > 1 and
|
||||
this = "Argument" + argNumber + "RoutingTest"
|
||||
}
|
||||
|
||||
override string flowTag() { result = "arg" + argNumber }
|
||||
|
||||
override predicate relevantFlow(DataFlow::Node source, DataFlow::Node sink) {
|
||||
exists(RestArgumentRoutingConfig cfg | cfg.getArgNumber() = argNumber |
|
||||
cfg.hasFlow(source, sink)
|
||||
exists(ArgumentRoutingConfig cfg |
|
||||
cfg.hasFlow(source, sink) and
|
||||
cfg.isArgSource(source, argNumber) and
|
||||
cfg.isGoodSink(sink, argNumber)
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A configuration to check routing of arguments through magic methods.
|
||||
*/
|
||||
class RestArgumentRoutingConfig extends DataFlow::Configuration {
|
||||
int argNumber;
|
||||
/** Bad flow from `arg<n>` to `SINK<N>_F` */
|
||||
class BadArgumentRoutingTestSinkF extends RoutingTest {
|
||||
ArgNumber argNumber;
|
||||
|
||||
RestArgumentRoutingConfig() {
|
||||
argNumber in [2 .. maxNumArgs()] and
|
||||
this = "Argument" + argNumber + "RoutingConfig"
|
||||
}
|
||||
BadArgumentRoutingTestSinkF() { this = "BadArgumentRoutingTestSinkF" + argNumber }
|
||||
|
||||
/** Gets the argument number this configuration is for. */
|
||||
int getArgNumber() { result = argNumber }
|
||||
override string flowTag() { result = "bad" + argNumber }
|
||||
|
||||
override predicate isSource(DataFlow::Node node) {
|
||||
node.(DataFlow::CfgNode).getNode().(NameNode).getId() = "arg" + argNumber
|
||||
}
|
||||
|
||||
override predicate isSink(DataFlow::Node node) {
|
||||
exists(CallNode call |
|
||||
call.getFunction().(NameNode).getId() = "SINK" + argNumber and
|
||||
node.(DataFlow::CfgNode).getNode() = call.getAnArg()
|
||||
override predicate relevantFlow(DataFlow::Node source, DataFlow::Node sink) {
|
||||
exists(ArgumentRoutingConfig cfg |
|
||||
cfg.hasFlow(source, sink) and
|
||||
cfg.isArgSource(source, argNumber) and
|
||||
cfg.isBadSink(sink, argNumber)
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/** Bad flow from `arg<n>` to `SINK<M>` or `SINK<M>_F`, where `n != m`. */
|
||||
class BadArgumentRoutingTestWrongSink extends RoutingTest {
|
||||
ArgNumber argNumber;
|
||||
|
||||
BadArgumentRoutingTestWrongSink() { this = "BadArgumentRoutingTestWrongSink" + argNumber }
|
||||
|
||||
override string flowTag() { result = "bad" + argNumber }
|
||||
|
||||
override predicate relevantFlow(DataFlow::Node source, DataFlow::Node sink) {
|
||||
exists(ArgumentRoutingConfig cfg |
|
||||
cfg.hasFlow(source, sink) and
|
||||
cfg.isArgSource(source, any(ArgNumber i | not i = argNumber)) and
|
||||
(
|
||||
cfg.isGoodSink(sink, argNumber)
|
||||
or
|
||||
cfg.isBadSink(sink, argNumber)
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* We want to be able to use `arg` in a sequence of calls such as `func(kw=arg); ... ; func(arg)`.
|
||||
* Use-use flow lets the argument to the first call reach the sink inside the second call,
|
||||
* making it seem like we handle all cases even if we only handle the last one.
|
||||
* We make the test honest by preventing flow into source nodes.
|
||||
*/
|
||||
override predicate isBarrierIn(DataFlow::Node node) { this.isSource(node) }
|
||||
}
|
||||
|
||||
@@ -506,7 +506,7 @@ class With_call:
|
||||
|
||||
|
||||
def test_call():
|
||||
with_call = With_call() #$ MISSING: arg1="SSA variable with_call" func=With_call.__call__
|
||||
with_call = With_call() #$ arg1="SSA variable with_call" func=With_call.__call__
|
||||
with_call()
|
||||
|
||||
|
||||
@@ -560,9 +560,9 @@ class With_getitem:
|
||||
|
||||
|
||||
def test_getitem():
|
||||
with_getitem = With_getitem() #$ arg1="SSA variable with_getitem" func=With_getitem.__getitem__
|
||||
with_getitem = With_getitem() #$ MISSING: arg1="SSA variable with_getitem" func=With_getitem.__getitem__
|
||||
arg2 = 0
|
||||
with_getitem[arg2] #$ arg2 func=With_getitem.__getitem__
|
||||
with_getitem[arg2] #$ MISSING: arg2 func=With_getitem.__getitem__
|
||||
|
||||
|
||||
# object.__setitem__(self, key, value)
|
||||
@@ -575,10 +575,10 @@ class With_setitem:
|
||||
|
||||
|
||||
def test_setitem():
|
||||
with_setitem = With_setitem() #$ arg1="SSA variable with_setitem" func=With_setitem.__setitem__
|
||||
with_setitem = With_setitem() #$ MISSING: arg1="SSA variable with_setitem" func=With_setitem.__setitem__
|
||||
arg2 = 0
|
||||
arg3 = ""
|
||||
with_setitem[arg2] = arg3 #$ arg2 arg3 func=With_setitem.__setitem__
|
||||
with_setitem[arg2] = arg3 #$ MISSING: arg2 arg3 func=With_setitem.__setitem__
|
||||
|
||||
|
||||
# object.__delitem__(self, key)
|
||||
@@ -590,9 +590,9 @@ class With_delitem:
|
||||
|
||||
|
||||
def test_delitem():
|
||||
with_delitem = With_delitem() #$ arg1="SSA variable with_delitem" func=With_delitem.__delitem__
|
||||
with_delitem = With_delitem() #$ MISSING: arg1="SSA variable with_delitem" func=With_delitem.__delitem__
|
||||
arg2 = 0
|
||||
del with_delitem[arg2] #$ arg2 func=With_delitem.__delitem__
|
||||
del with_delitem[arg2] #$ MISSING: arg2 func=With_delitem.__delitem__
|
||||
|
||||
|
||||
# object.__missing__(self, key)
|
||||
@@ -662,9 +662,9 @@ class With_add:
|
||||
|
||||
|
||||
def test_add():
|
||||
with_add = With_add() #$ arg1="SSA variable with_add" func=With_add.__add__
|
||||
with_add = With_add() #$ MISSING: arg1="SSA variable with_add" func=With_add.__add__
|
||||
arg2 = with_add
|
||||
with_add + arg2 #$ arg2 func=With_add.__add__
|
||||
with_add + arg2 #$ MISSING: arg2 func=With_add.__add__
|
||||
|
||||
|
||||
# object.__sub__(self, other)
|
||||
@@ -677,9 +677,9 @@ class With_sub:
|
||||
|
||||
|
||||
def test_sub():
|
||||
with_sub = With_sub() #$ arg1="SSA variable with_sub" func=With_sub.__sub__
|
||||
with_sub = With_sub() #$ MISSING: arg1="SSA variable with_sub" func=With_sub.__sub__
|
||||
arg2 = with_sub
|
||||
with_sub - arg2 #$ arg2 func=With_sub.__sub__
|
||||
with_sub - arg2 #$ MISSING: arg2 func=With_sub.__sub__
|
||||
|
||||
|
||||
# object.__mul__(self, other)
|
||||
@@ -692,9 +692,9 @@ class With_mul:
|
||||
|
||||
|
||||
def test_mul():
|
||||
with_mul = With_mul() #$ arg1="SSA variable with_mul" func=With_mul.__mul__
|
||||
with_mul = With_mul() #$ MISSING: arg1="SSA variable with_mul" func=With_mul.__mul__
|
||||
arg2 = with_mul
|
||||
with_mul * arg2 #$ arg2 func=With_mul.__mul__
|
||||
with_mul * arg2 #$ MISSING: arg2 func=With_mul.__mul__
|
||||
|
||||
|
||||
# object.__matmul__(self, other)
|
||||
@@ -707,9 +707,9 @@ class With_matmul:
|
||||
|
||||
|
||||
def test_matmul():
|
||||
with_matmul = With_matmul() #$ arg1="SSA variable with_matmul" func=With_matmul.__matmul__
|
||||
with_matmul = With_matmul() #$ MISSING: arg1="SSA variable with_matmul" func=With_matmul.__matmul__
|
||||
arg2 = with_matmul
|
||||
with_matmul @ arg2 #$ arg2 func=With_matmul.__matmul__
|
||||
with_matmul @ arg2 #$ MISSING: arg2 func=With_matmul.__matmul__
|
||||
|
||||
|
||||
# object.__truediv__(self, other)
|
||||
@@ -722,9 +722,9 @@ class With_truediv:
|
||||
|
||||
|
||||
def test_truediv():
|
||||
with_truediv = With_truediv() #$ arg1="SSA variable with_truediv" func=With_truediv.__truediv__
|
||||
with_truediv = With_truediv() #$ MISSING: arg1="SSA variable with_truediv" func=With_truediv.__truediv__
|
||||
arg2 = with_truediv
|
||||
with_truediv / arg2 #$ arg2 func=With_truediv.__truediv__
|
||||
with_truediv / arg2 #$ MISSING: arg2 func=With_truediv.__truediv__
|
||||
|
||||
|
||||
# object.__floordiv__(self, other)
|
||||
@@ -737,9 +737,9 @@ class With_floordiv:
|
||||
|
||||
|
||||
def test_floordiv():
|
||||
with_floordiv = With_floordiv() #$ arg1="SSA variable with_floordiv" func=With_floordiv.__floordiv__
|
||||
with_floordiv = With_floordiv() #$ MISSING: arg1="SSA variable with_floordiv" func=With_floordiv.__floordiv__
|
||||
arg2 = with_floordiv
|
||||
with_floordiv // arg2 #$ arg2 func=With_floordiv.__floordiv__
|
||||
with_floordiv // arg2 #$ MISSING: arg2 func=With_floordiv.__floordiv__
|
||||
|
||||
|
||||
# object.__mod__(self, other)
|
||||
@@ -752,9 +752,9 @@ class With_mod:
|
||||
|
||||
|
||||
def test_mod():
|
||||
with_mod = With_mod() #$ arg1="SSA variable with_mod" func=With_mod.__mod__
|
||||
with_mod = With_mod() #$ MISSING: arg1="SSA variable with_mod" func=With_mod.__mod__
|
||||
arg2 = with_mod
|
||||
with_mod % arg2 #$ arg2 func=With_mod.__mod__
|
||||
with_mod % arg2 #$ MISSING: arg2 func=With_mod.__mod__
|
||||
|
||||
|
||||
# object.__divmod__(self, other)
|
||||
@@ -788,9 +788,9 @@ def test_pow():
|
||||
|
||||
|
||||
def test_pow_op():
|
||||
with_pow = With_pow() #$ arg1="SSA variable with_pow" func=With_pow.__pow__
|
||||
with_pow = With_pow() #$ MISSING: arg1="SSA variable with_pow" func=With_pow.__pow__
|
||||
arg2 = with_pow
|
||||
with_pow ** arg2 #$ arg2 func=With_pow.__pow__
|
||||
with_pow ** arg2 #$ MISSING: arg2 func=With_pow.__pow__
|
||||
|
||||
|
||||
# object.__lshift__(self, other)
|
||||
@@ -803,9 +803,9 @@ class With_lshift:
|
||||
|
||||
|
||||
def test_lshift():
|
||||
with_lshift = With_lshift() #$ arg1="SSA variable with_lshift" func=With_lshift.__lshift__
|
||||
with_lshift = With_lshift() #$ MISSING: arg1="SSA variable with_lshift" func=With_lshift.__lshift__
|
||||
arg2 = with_lshift
|
||||
with_lshift << arg2 #$ arg2 func=With_lshift.__lshift__
|
||||
with_lshift << arg2 #$ MISSING: arg2 func=With_lshift.__lshift__
|
||||
|
||||
|
||||
# object.__rshift__(self, other)
|
||||
@@ -818,9 +818,9 @@ class With_rshift:
|
||||
|
||||
|
||||
def test_rshift():
|
||||
with_rshift = With_rshift() #$ arg1="SSA variable with_rshift" func=With_rshift.__rshift__
|
||||
with_rshift = With_rshift() #$ MISSING: arg1="SSA variable with_rshift" func=With_rshift.__rshift__
|
||||
arg2 = with_rshift
|
||||
with_rshift >> arg2 #$ arg2 func=With_rshift.__rshift__
|
||||
with_rshift >> arg2 #$ MISSING: arg2 func=With_rshift.__rshift__
|
||||
|
||||
|
||||
# object.__and__(self, other)
|
||||
@@ -833,9 +833,9 @@ class With_and:
|
||||
|
||||
|
||||
def test_and():
|
||||
with_and = With_and() #$ arg1="SSA variable with_and" func=With_and.__and__
|
||||
with_and = With_and() #$ MISSING: arg1="SSA variable with_and" func=With_and.__and__
|
||||
arg2 = with_and
|
||||
with_and & arg2 #$ arg2 func=With_and.__and__
|
||||
with_and & arg2 #$ MISSING: arg2 func=With_and.__and__
|
||||
|
||||
|
||||
# object.__xor__(self, other)
|
||||
@@ -848,9 +848,9 @@ class With_xor:
|
||||
|
||||
|
||||
def test_xor():
|
||||
with_xor = With_xor() #$ arg1="SSA variable with_xor" func=With_xor.__xor__
|
||||
with_xor = With_xor() #$ MISSING: arg1="SSA variable with_xor" func=With_xor.__xor__
|
||||
arg2 = with_xor
|
||||
with_xor ^ arg2 #$ arg2 func=With_xor.__xor__
|
||||
with_xor ^ arg2 #$ MISSING: arg2 func=With_xor.__xor__
|
||||
|
||||
|
||||
# object.__or__(self, other)
|
||||
@@ -863,9 +863,9 @@ class With_or:
|
||||
|
||||
|
||||
def test_or():
|
||||
with_or = With_or() #$ arg1="SSA variable with_or" func=With_or.__or__
|
||||
with_or = With_or() #$ MISSING: arg1="SSA variable with_or" func=With_or.__or__
|
||||
arg2 = with_or
|
||||
with_or | arg2 #$ arg2 func=With_or.__or__
|
||||
with_or | arg2 #$ MISSING: arg2 func=With_or.__or__
|
||||
|
||||
|
||||
# object.__radd__(self, other)
|
||||
|
||||
@@ -1 +1,2 @@
|
||||
import semmle.python.dataflow.new.internal.DataFlowImplConsistency::Consistency
|
||||
import python
|
||||
import experimental.dataflow.TestUtil.DataFlowConsistency
|
||||
|
||||
@@ -124,6 +124,40 @@ def test_staticmethod_call():
|
||||
C.staticmethod(arg1, arg2) # $ func=C.staticmethod arg1 arg2
|
||||
|
||||
|
||||
# subclass
|
||||
class SC(C):
|
||||
pass
|
||||
sc = SC()
|
||||
|
||||
@expects(6)
|
||||
def test_subclass_method_call():
|
||||
func_obj = sc.method.__func__
|
||||
|
||||
sc.method(arg1, arg2) # $ func=C.method arg1 arg2
|
||||
SC.method(sc, arg1, arg2) # $ func=C.method arg1 arg2
|
||||
func_obj(sc, arg1, arg2) # $ MISSING: func=C.method arg1 arg2
|
||||
|
||||
|
||||
@expects(6)
|
||||
def test_subclass_classmethod_call():
|
||||
c_func_obj = SC.classmethod.__func__
|
||||
|
||||
sc.classmethod(arg1, arg2) # $ func=C.classmethod arg1 arg2
|
||||
SC.classmethod(arg1, arg2) # $ func=C.classmethod arg1 arg2
|
||||
c_func_obj(SC, arg1, arg2) # $ MISSING: func=C.classmethod arg1 arg2
|
||||
|
||||
|
||||
@expects(5)
|
||||
def test_subclass_staticmethod_call():
|
||||
try:
|
||||
SC.staticmethod.__func__
|
||||
except AttributeError:
|
||||
print("OK")
|
||||
|
||||
sc.staticmethod(arg1, arg2) # $ func=C.staticmethod arg1 arg2
|
||||
SC.staticmethod(arg1, arg2) # $ func=C.staticmethod arg1 arg2
|
||||
|
||||
|
||||
# Generator functions
|
||||
# A function or method which uses the yield statement (see section The yield statement) is called a generator function. Such a function, when called, always returns an iterator object which can be used to execute the body of the function: calling the iterator’s iterator.__next__() method will cause the function to execute until it provides a value using the yield statement. When the function executes a return statement or falls off the end, a StopIteration exception is raised and the iterator will have reached the end of the set of values to be returned.
|
||||
def gen(x, count):
|
||||
@@ -198,5 +232,16 @@ class Customized:
|
||||
customized = Customized()
|
||||
SINK(Customized.a) #$ MISSING:flow="SOURCE, l:-8 -> customized.a"
|
||||
SINK_F(Customized.b)
|
||||
SINK(customized.a) #$ MISSING:flow="SOURCE, l:-10 -> customized.a"
|
||||
SINK(customized.a) #$ MISSING: flow="SOURCE, l:-10 -> customized.a"
|
||||
SINK(customized.b) #$ flow="SOURCE, l:-7 -> customized.b"
|
||||
|
||||
|
||||
class Test2:
|
||||
|
||||
def __init__(self, arg):
|
||||
self.x = SOURCE
|
||||
self.y = arg
|
||||
|
||||
t = Test2(SOURCE)
|
||||
SINK(t.x) # $ flow="SOURCE, l:-4 -> t.x"
|
||||
SINK(t.y) # $ flow="SOURCE, l:-2 -> t.y"
|
||||
|
||||
@@ -4,5 +4,5 @@ import semmle.python.dataflow.new.DataFlow
|
||||
from DataFlow::Node nodeFrom, DataFlow::Node nodeTo
|
||||
where
|
||||
DataFlow::localFlowStep(nodeFrom, nodeTo) and
|
||||
nodeFrom.getEnclosingCallable().getName().matches("%\\_with\\_local\\_flow")
|
||||
nodeFrom.getEnclosingCallable().getQualifiedName().matches("%\\_with\\_local\\_flow")
|
||||
select nodeFrom, nodeTo
|
||||
|
||||
@@ -697,9 +697,16 @@ def test_overflow_iteration():
|
||||
s = SOURCE
|
||||
iterate_star_args(NONSOURCE, NONSOURCE, SOURCE, s)
|
||||
|
||||
@expects(6)
|
||||
def test_deep_callgraph():
|
||||
# port of python/ql/test/library-tests/taint/general/deep.py
|
||||
|
||||
# based on the fact that `test_deep_callgraph_defined_in_module` works the problem
|
||||
# seems to be that we're defining these functions inside another function and that
|
||||
# the flow of these function definitions DOESN'T flow into the body of the `f<n>`
|
||||
# functions (they DO flow into the body of `test_deep_callgraph`, otherwise the
|
||||
# `f1` call wouldn't work).
|
||||
|
||||
def f1(arg):
|
||||
return arg
|
||||
|
||||
@@ -720,8 +727,51 @@ def test_deep_callgraph():
|
||||
|
||||
x = f6(SOURCE)
|
||||
SINK(x) #$ MISSING:flow="SOURCE, l:-1 -> x"
|
||||
x = f5(SOURCE)
|
||||
SINK(x) #$ MISSING:flow="SOURCE, l:-1 -> x"
|
||||
x = f4(SOURCE)
|
||||
SINK(x) #$ MISSING:flow="SOURCE, l:-1 -> x"
|
||||
x = f3(SOURCE)
|
||||
SINK(x) #$ MISSING:flow="SOURCE, l:-1 -> x"
|
||||
x = f2(SOURCE)
|
||||
SINK(x) #$ MISSING:flow="SOURCE, l:-1 -> x"
|
||||
x = f1(SOURCE)
|
||||
SINK(x) #$ flow="SOURCE, l:-1 -> x"
|
||||
|
||||
|
||||
def wat_f1(arg):
|
||||
return arg
|
||||
|
||||
def wat_f2(arg):
|
||||
return wat_f1(arg)
|
||||
|
||||
def wat_f3(arg):
|
||||
return wat_f2(arg)
|
||||
|
||||
def wat_f4(arg):
|
||||
return wat_f3(arg)
|
||||
|
||||
def wat_f5(arg):
|
||||
return wat_f4(arg)
|
||||
|
||||
def wat_f6(arg):
|
||||
return wat_f5(arg)
|
||||
|
||||
@expects(6)
|
||||
def test_deep_callgraph_defined_in_module():
|
||||
x = wat_f6(SOURCE)
|
||||
SINK(x) #$ flow="SOURCE, l:-1 -> x"
|
||||
x = wat_f5(SOURCE)
|
||||
SINK(x) #$ flow="SOURCE, l:-1 -> x"
|
||||
x = wat_f4(SOURCE)
|
||||
SINK(x) #$ flow="SOURCE, l:-1 -> x"
|
||||
x = wat_f3(SOURCE)
|
||||
SINK(x) #$ flow="SOURCE, l:-1 -> x"
|
||||
x = wat_f2(SOURCE)
|
||||
SINK(x) #$ flow="SOURCE, l:-1 -> x"
|
||||
x = wat_f1(SOURCE)
|
||||
SINK(x) #$ flow="SOURCE, l:-1 -> x"
|
||||
|
||||
@expects(2)
|
||||
def test_dynamic_tuple_creation_1():
|
||||
tup = tuple()
|
||||
|
||||
@@ -1,24 +1,24 @@
|
||||
| file://:0:0:0:0 | Function generator_func | generator.py:1:20:1:21 | ControlFlowNode for xs |
|
||||
| file://:0:0:0:0 | Function generator_func | generator.py:2:12:2:26 | ControlFlowNode for .0 |
|
||||
| file://:0:0:0:0 | Function generator_func | generator.py:2:12:2:26 | ControlFlowNode for .0 |
|
||||
| file://:0:0:0:0 | Function generator_func | generator.py:2:12:2:26 | ControlFlowNode for ListComp |
|
||||
| file://:0:0:0:0 | Function generator_func | generator.py:2:13:2:13 | ControlFlowNode for Yield |
|
||||
| file://:0:0:0:0 | Function generator_func | generator.py:2:13:2:13 | ControlFlowNode for x |
|
||||
| file://:0:0:0:0 | Function generator_func | generator.py:2:19:2:19 | ControlFlowNode for x |
|
||||
| file://:0:0:0:0 | Function generator_func | generator.py:2:24:2:25 | ControlFlowNode for xs |
|
||||
| file://:0:0:0:0 | Module class_example | class_example.py:1:1:1:3 | ControlFlowNode for wat |
|
||||
| file://:0:0:0:0 | Module class_example | class_example.py:1:7:1:7 | ControlFlowNode for IntegerLiteral |
|
||||
| file://:0:0:0:0 | Module class_example | class_example.py:3:1:3:10 | ControlFlowNode for ClassExpr |
|
||||
| file://:0:0:0:0 | Module class_example | class_example.py:3:7:3:9 | ControlFlowNode for Wat |
|
||||
| file://:0:0:0:0 | Module class_example | class_example.py:4:5:4:7 | ControlFlowNode for wat |
|
||||
| file://:0:0:0:0 | Module class_example | class_example.py:4:11:4:11 | ControlFlowNode for IntegerLiteral |
|
||||
| file://:0:0:0:0 | Module class_example | class_example.py:5:5:5:9 | ControlFlowNode for print |
|
||||
| file://:0:0:0:0 | Module class_example | class_example.py:5:5:5:26 | ControlFlowNode for print() |
|
||||
| file://:0:0:0:0 | Module class_example | class_example.py:5:11:5:20 | ControlFlowNode for Str |
|
||||
| file://:0:0:0:0 | Module class_example | class_example.py:5:23:5:25 | ControlFlowNode for wat |
|
||||
| file://:0:0:0:0 | Module class_example | class_example.py:7:1:7:5 | ControlFlowNode for print |
|
||||
| file://:0:0:0:0 | Module class_example | class_example.py:7:1:7:23 | ControlFlowNode for print() |
|
||||
| file://:0:0:0:0 | Module class_example | class_example.py:7:7:7:17 | ControlFlowNode for Str |
|
||||
| file://:0:0:0:0 | Module class_example | class_example.py:7:20:7:22 | ControlFlowNode for wat |
|
||||
| file://:0:0:0:0 | Module generator | generator.py:1:1:1:23 | ControlFlowNode for FunctionExpr |
|
||||
| file://:0:0:0:0 | Module generator | generator.py:1:5:1:18 | ControlFlowNode for generator_func |
|
||||
| class_example.py:0:0:0:0 | Module class_example | class_example.py:1:1:1:3 | ControlFlowNode for wat |
|
||||
| class_example.py:0:0:0:0 | Module class_example | class_example.py:1:7:1:7 | ControlFlowNode for IntegerLiteral |
|
||||
| class_example.py:0:0:0:0 | Module class_example | class_example.py:3:1:3:10 | ControlFlowNode for ClassExpr |
|
||||
| class_example.py:0:0:0:0 | Module class_example | class_example.py:3:7:3:9 | ControlFlowNode for Wat |
|
||||
| class_example.py:0:0:0:0 | Module class_example | class_example.py:4:5:4:7 | ControlFlowNode for wat |
|
||||
| class_example.py:0:0:0:0 | Module class_example | class_example.py:4:11:4:11 | ControlFlowNode for IntegerLiteral |
|
||||
| class_example.py:0:0:0:0 | Module class_example | class_example.py:5:5:5:9 | ControlFlowNode for print |
|
||||
| class_example.py:0:0:0:0 | Module class_example | class_example.py:5:5:5:26 | ControlFlowNode for print() |
|
||||
| class_example.py:0:0:0:0 | Module class_example | class_example.py:5:11:5:20 | ControlFlowNode for Str |
|
||||
| class_example.py:0:0:0:0 | Module class_example | class_example.py:5:23:5:25 | ControlFlowNode for wat |
|
||||
| class_example.py:0:0:0:0 | Module class_example | class_example.py:7:1:7:5 | ControlFlowNode for print |
|
||||
| class_example.py:0:0:0:0 | Module class_example | class_example.py:7:1:7:23 | ControlFlowNode for print() |
|
||||
| class_example.py:0:0:0:0 | Module class_example | class_example.py:7:7:7:17 | ControlFlowNode for Str |
|
||||
| class_example.py:0:0:0:0 | Module class_example | class_example.py:7:20:7:22 | ControlFlowNode for wat |
|
||||
| generator.py:0:0:0:0 | Module generator | generator.py:1:1:1:23 | ControlFlowNode for FunctionExpr |
|
||||
| generator.py:0:0:0:0 | Module generator | generator.py:1:5:1:18 | ControlFlowNode for generator_func |
|
||||
| generator.py:1:1:1:23 | Function generator_func | generator.py:1:20:1:21 | ControlFlowNode for xs |
|
||||
| generator.py:1:1:1:23 | Function generator_func | generator.py:2:12:2:26 | ControlFlowNode for .0 |
|
||||
| generator.py:1:1:1:23 | Function generator_func | generator.py:2:12:2:26 | ControlFlowNode for .0 |
|
||||
| generator.py:1:1:1:23 | Function generator_func | generator.py:2:12:2:26 | ControlFlowNode for ListComp |
|
||||
| generator.py:1:1:1:23 | Function generator_func | generator.py:2:13:2:13 | ControlFlowNode for Yield |
|
||||
| generator.py:1:1:1:23 | Function generator_func | generator.py:2:13:2:13 | ControlFlowNode for x |
|
||||
| generator.py:1:1:1:23 | Function generator_func | generator.py:2:19:2:19 | ControlFlowNode for x |
|
||||
| generator.py:1:1:1:23 | Function generator_func | generator.py:2:24:2:25 | ControlFlowNode for xs |
|
||||
|
||||
@@ -1 +1,2 @@
|
||||
import semmle.python.dataflow.new.internal.DataFlowImplConsistency::Consistency
|
||||
import python
|
||||
import experimental.dataflow.TestUtil.DataFlowConsistency
|
||||
|
||||
@@ -1 +1,2 @@
|
||||
import semmle.python.dataflow.new.internal.DataFlowImplConsistency::Consistency
|
||||
import python
|
||||
import experimental.dataflow.TestUtil.DataFlowConsistency
|
||||
|
||||
@@ -84,10 +84,10 @@ def test_indirect_assign_bound_method():
|
||||
sf = myobj.setFoo
|
||||
|
||||
sf(SOURCE)
|
||||
SINK(myobj.foo) # $ MISSING: flow="SOURCE, l:-1 -> myobj.foo"
|
||||
SINK(myobj.foo) # $ flow="SOURCE, l:-1 -> myobj.foo"
|
||||
|
||||
sf(NONSOURCE)
|
||||
SINK_F(myobj.foo)
|
||||
SINK_F(myobj.foo) # $ SPURIOUS: flow="SOURCE, l:-4 -> myobj.foo"
|
||||
|
||||
|
||||
@expects(3) # $ unresolved_call=expects(..) unresolved_call=expects(..)(..)
|
||||
@@ -167,6 +167,17 @@ def fields_with_local_flow(x):
|
||||
def test_fields():
|
||||
SINK(fields_with_local_flow(SOURCE)) # $ flow="SOURCE -> fields_with_local_flow(..)"
|
||||
|
||||
|
||||
def call_with_source(func):
|
||||
func(SOURCE)
|
||||
|
||||
|
||||
def test_bound_method_passed_as_arg():
|
||||
myobj = MyObj(NONSOURCE)
|
||||
call_with_source(myobj.setFoo)
|
||||
SINK(myobj.foo) # $ MISSING: flow="SOURCE, l:-5 -> foo.x"
|
||||
|
||||
|
||||
# ------------------------------------------------------------------------------
|
||||
# Nested Object
|
||||
# ------------------------------------------------------------------------------
|
||||
@@ -244,6 +255,9 @@ class CrosstalkTestX:
|
||||
def setvalue(self, value):
|
||||
self.x = value
|
||||
|
||||
def do_nothing(self, value):
|
||||
pass
|
||||
|
||||
|
||||
class CrosstalkTestY:
|
||||
def __init__(self):
|
||||
@@ -295,10 +309,10 @@ def test_potential_crosstalk_different_name(cond=True):
|
||||
|
||||
func(SOURCE)
|
||||
|
||||
SINK(objx.x) # $ MISSING: flow="SOURCE, l:-2 -> objx.x"
|
||||
SINK(objx.x) # $ flow="SOURCE, l:-2 -> objx.x"
|
||||
SINK_F(objx.y)
|
||||
SINK_F(objy.x)
|
||||
SINK(objy.y, not_present_at_runtime=True) # $ MISSING: flow="SOURCE, l:-5 -> objy.y"
|
||||
SINK(objy.y, not_present_at_runtime=True) # $ flow="SOURCE, l:-5 -> objy.y"
|
||||
|
||||
|
||||
@expects(8) # $ unresolved_call=expects(..) unresolved_call=expects(..)(..)
|
||||
@@ -318,10 +332,10 @@ def test_potential_crosstalk_same_name(cond=True):
|
||||
|
||||
func(SOURCE)
|
||||
|
||||
SINK(objx.x) # $ MISSING: flow="SOURCE, l:-2 -> objx.x"
|
||||
SINK(objx.x) # $ flow="SOURCE, l:-2 -> objx.x"
|
||||
SINK_F(objx.y)
|
||||
SINK_F(objy.x)
|
||||
SINK(objy.y, not_present_at_runtime=True) # $ MISSING: flow="SOURCE, l:-5 -> objy.y"
|
||||
SINK(objy.y, not_present_at_runtime=True) # $ flow="SOURCE, l:-5 -> objy.y"
|
||||
|
||||
|
||||
@expects(10) # $ unresolved_call=expects(..) unresolved_call=expects(..)(..)
|
||||
@@ -350,6 +364,53 @@ def test_potential_crosstalk_same_name_object_reference(cond=True):
|
||||
SINK(obj.y, not_present_at_runtime=True) # $ flow="SOURCE, l:-8 -> obj.y"
|
||||
|
||||
|
||||
@expects(4) # $ unresolved_call=expects(..) unresolved_call=expects(..)(..)
|
||||
def test_potential_crosstalk_same_class(cond=True):
|
||||
objx1 = CrosstalkTestX()
|
||||
SINK_F(objx1.x)
|
||||
|
||||
objx2 = CrosstalkTestX()
|
||||
SINK_F(objx2.x)
|
||||
|
||||
if cond:
|
||||
func = objx1.setvalue
|
||||
else:
|
||||
func = objx2.do_nothing
|
||||
|
||||
# We want to ensure that objx2.x does not end up getting tainted, since that would
|
||||
# be cross-talk between the self arguments are their functions.
|
||||
func(SOURCE)
|
||||
|
||||
SINK(objx1.x) # $ flow="SOURCE, l:-2 -> objx1.x"
|
||||
SINK_F(objx2.x)
|
||||
|
||||
|
||||
class NewTest(object):
|
||||
def __new__(cls, arg):
|
||||
cls.foo = arg
|
||||
return super().__new__(cls) # $ unresolved_call=super().__new__(..)
|
||||
|
||||
@expects(4) # $ unresolved_call=expects(..) unresolved_call=expects(..)(..)
|
||||
def test__new__():
|
||||
# we want to make sure that we DON'T pass the synthetic pre-update node for
|
||||
# the class instance to __new__, like we do for __init__.
|
||||
nt = NewTest(SOURCE)
|
||||
# the __new__ implementation sets the foo attribute on THE CLASS itself. The
|
||||
# attribute lookup on the class instance will go to the class itself when the
|
||||
# attribute isn't defined on the class instance, so we will actually see `nt.foo`
|
||||
# contain the source, but the point of this test is that we should see identical
|
||||
# behavior between NewTest.foo and nt.foo, which we dont!
|
||||
#
|
||||
# Also note that we currently (October 2022) dont' model writes to classes very
|
||||
# well.
|
||||
|
||||
SINK(NewTest.foo) # $ MISSING: flow="SOURCE, l:-10 -> NewTest.foo"
|
||||
SINK(nt.foo) # $ MISSING: flow="SOURCE, l:-11 -> nt.foo"
|
||||
|
||||
NewTest.foo = NONSOURCE
|
||||
SINK_F(NewTest.foo)
|
||||
SINK_F(nt.foo)
|
||||
|
||||
# ------------------------------------------------------------------------------
|
||||
# Global scope
|
||||
# ------------------------------------------------------------------------------
|
||||
@@ -400,7 +461,7 @@ SINK(obj2.foo) # $ flow="SOURCE, l:-1 -> obj2.foo"
|
||||
|
||||
# apparently these if statements below makes a difference :O
|
||||
# but one is not enough
|
||||
cond = os.urandom(1)[0] > 128
|
||||
cond = os.urandom(1)[0] > 128 # $ unresolved_call=os.urandom(..)
|
||||
|
||||
if cond:
|
||||
pass
|
||||
|
||||
@@ -1 +1,2 @@
|
||||
import semmle.python.dataflow.new.internal.DataFlowImplConsistency::Consistency
|
||||
import python
|
||||
import experimental.dataflow.TestUtil.DataFlowConsistency
|
||||
|
||||
@@ -1 +1,2 @@
|
||||
import semmle.python.dataflow.new.internal.DataFlowImplConsistency::Consistency
|
||||
import python
|
||||
import experimental.dataflow.TestUtil.DataFlowConsistency
|
||||
|
||||
@@ -1 +1,2 @@
|
||||
import semmle.python.dataflow.new.internal.DataFlowImplConsistency::Consistency
|
||||
import python
|
||||
import experimental.dataflow.TestUtil.DataFlowConsistency
|
||||
|
||||
@@ -1 +1,2 @@
|
||||
import semmle.python.dataflow.new.internal.DataFlowImplConsistency::Consistency
|
||||
import python
|
||||
import experimental.dataflow.TestUtil.DataFlowConsistency
|
||||
|
||||
@@ -1 +1,2 @@
|
||||
import semmle.python.dataflow.new.internal.DataFlowImplConsistency::Consistency
|
||||
import python
|
||||
import experimental.dataflow.TestUtil.DataFlowConsistency
|
||||
|
||||
@@ -1,96 +0,0 @@
|
||||
While working on the field-flow tests, I encountered some very strange behavior. By moving some tests into a new file, they suddenly started working :O
|
||||
|
||||
This folder contains the artifacts from investigating this problem, so we can recall the facts (but besides that, don't have much value in itself).
|
||||
|
||||
The test files can be found in `src/`, and I have set of a bunch of different tests with different extractor options in the `test-*` folders.
|
||||
|
||||
The core of the problem is that in _some_ configuration of extractor options, after seeing the code below, points-to gives up trying to resolve calls :flushed:
|
||||
|
||||
```py
|
||||
import os
|
||||
cond = os.urandom(1)[0] > 128
|
||||
|
||||
if cond:
|
||||
pass
|
||||
|
||||
if cond:
|
||||
pass
|
||||
```
|
||||
|
||||
This seems to have been caused by not allowing enough imports to be resolved. There is also some interaction with splitting, since turning that off also removes the problem.
|
||||
|
||||
But allowing our test to see more imports is more representative of what happens when analyzing real code, so that's the better approach :+1: (and going above 3 does not seem to change anything in this case).
|
||||
|
||||
I've thought about whether we can write a query to reliably cases such as this, but I don't see any solutions. However, we can easily try running all our tests with `--max-import-depth=100` and see if anything changes from this.
|
||||
|
||||
# Seeing the solutions work
|
||||
|
||||
Doing `diff -u -r test-1-normal/ test-5-max-import-depth-3/` shows that all the calls we should be able to resolve, are now resolved properly. and critically this line is added:
|
||||
|
||||
```diff
|
||||
+| ../src/urandom_problem.py:43:6:43:8 | ControlFlowNode for foo | Fixed missing result:flow="SOURCE, l:-15 -> foo" |
|
||||
```
|
||||
|
||||
<details>
|
||||
<summary>full diff</summary>
|
||||
|
||||
```diff
|
||||
diff '--color=auto' -u -r test-1-normal/NormalDataflowTest.expected test-5-max-import-depth-3/NormalDataflowTest.expected
|
||||
--- test-1-normal/NormalDataflowTest.expected 2022-02-27 10:33:00.603882599 +0100
|
||||
+++ test-5-max-import-depth-3/NormalDataflowTest.expected 2022-02-28 10:10:08.930743800 +0100
|
||||
@@ -1,2 +1,3 @@
|
||||
missingAnnotationOnSink
|
||||
failures
|
||||
+| ../src/urandom_problem.py:43:6:43:8 | ControlFlowNode for foo | Fixed missing result:flow="SOURCE, l:-15 -> foo" |
|
||||
diff '--color=auto' -u -r test-1-normal/options test-5-max-import-depth-3/options
|
||||
--- test-1-normal/options 2022-02-27 10:36:51.124793909 +0100
|
||||
+++ test-5-max-import-depth-3/options 2022-02-27 11:01:43.908098372 +0100
|
||||
@@ -1 +1 @@
|
||||
-semmle-extractor-options: --max-import-depth=1 -R ../src
|
||||
+semmle-extractor-options: --max-import-depth=3 -R ../src
|
||||
diff '--color=auto' -u -r test-1-normal/UnresolvedCalls.expected test-5-max-import-depth-3/UnresolvedCalls.expected
|
||||
--- test-1-normal/UnresolvedCalls.expected 2022-02-28 10:09:19.213742437 +0100
|
||||
+++ test-5-max-import-depth-3/UnresolvedCalls.expected 2022-02-28 10:10:08.638737921 +0100
|
||||
@@ -0,0 +1,5 @@
|
||||
+| ../src/isfile_no_problem.py:34:33:34:70 | Comment # $ unresolved_call=os.path.isfile(..) | Missing result:unresolved_call=os.path.isfile(..) |
|
||||
+| ../src/urandom_no_if_no_problem.py:34:31:34:64 | Comment # $ unresolved_call=os.urandom(..) | Missing result:unresolved_call=os.urandom(..) |
|
||||
+| ../src/urandom_problem.py:34:31:34:64 | Comment # $ unresolved_call=os.urandom(..) | Missing result:unresolved_call=os.urandom(..) |
|
||||
+| ../src/urandom_problem.py:42:18:42:47 | Comment # $ unresolved_call=give_src() | Missing result:unresolved_call=give_src() |
|
||||
+| ../src/urandom_problem.py:43:11:43:75 | Comment # $ unresolved_call=SINK(..) MISSING: flow="SOURCE, l:-15 -> foo" | Missing result:unresolved_call=SINK(..) |
|
||||
diff '--color=auto' -u -r test-1-normal/UnresolvedPointsToCalls.expected test-5-max-import-depth-3/UnresolvedPointsToCalls.expected
|
||||
--- test-1-normal/UnresolvedPointsToCalls.expected 2022-02-28 10:09:19.033738812 +0100
|
||||
+++ test-5-max-import-depth-3/UnresolvedPointsToCalls.expected 2022-02-28 10:12:48.572752108 +0100
|
||||
@@ -1,5 +1 @@
|
||||
-| ../src/urandom_no_if_no_problem.py:34:8:34:20 | ../src/urandom_no_if_no_problem.py:34 | os.urandom(..) |
|
||||
| ../src/urandom_no_import_no_problem.py:34:8:34:20 | ../src/urandom_no_import_no_problem.py:34 | os.urandom(..) |
|
||||
-| ../src/urandom_problem.py:34:8:34:20 | ../src/urandom_problem.py:34 | os.urandom(..) |
|
||||
-| ../src/urandom_problem.py:42:7:42:16 | ../src/urandom_problem.py:42 | give_src() |
|
||||
-| ../src/urandom_problem.py:43:1:43:9 | ../src/urandom_problem.py:43 | SINK(..) |
|
||||
```
|
||||
|
||||
</details>
|
||||
|
||||
There are no benefit in increasing import depth above 3 for this test-example:
|
||||
|
||||
```diff
|
||||
$ diff -u -r test-4-max-import-depth-100/ test-5-max-import-depth-3/
|
||||
--- test-4-max-import-depth-100/options 2022-02-28 10:02:09.269071781 +0100
|
||||
+++ test-5-max-import-depth-3/options 2022-02-27 11:01:43.908098372 +0100
|
||||
@@ -1 +1 @@
|
||||
-semmle-extractor-options: --max-import-depth=100 -R ../src
|
||||
+semmle-extractor-options: --max-import-depth=3 -R ../src
|
||||
```
|
||||
|
||||
Also notice that using import depth 2 actually makes things worse, as we no longer handle the `isfile_no_problem.py` file properly :facepalm: :sweat_smile: NOTE: This was only for Python 3, for Python 2 there was no change :flushed:
|
||||
|
||||
```diff
|
||||
diff '--color=auto' -u -r test-4-max-import-depth-100/NormalDataflowTest.expected test-6-max-import-depth-2/NormalDataflowTest.expected
|
||||
--- test-4-max-import-depth-100/NormalDataflowTest.expected 2022-02-28 10:10:02.206608379 +0100
|
||||
+++ test-6-max-import-depth-2/NormalDataflowTest.expected 2022-02-28 10:10:13.882716665 +0100
|
||||
@@ -1,3 +1,5 @@
|
||||
missingAnnotationOnSink
|
||||
+| ../src/isfile_no_problem.py:43:6:43:8 | ../src/isfile_no_problem.py:43 | ERROR, you should add `# $ MISSING: flow` annotation | foo |
|
||||
failures
|
||||
+| ../src/isfile_no_problem.py:43:11:43:41 | Comment # $ flow="SOURCE, l:-15 -> foo" | Missing result:flow="SOURCE, l:-15 -> foo" |
|
||||
| ../src/urandom_problem.py:43:6:43:8 | ControlFlowNode for foo | Fixed missing result:flow="SOURCE, l:-15 -> foo" |
|
||||
```
|
||||
@@ -1,43 +0,0 @@
|
||||
# These are defined so that we can evaluate the test code.
|
||||
NONSOURCE = "not a source"
|
||||
SOURCE = "source"
|
||||
|
||||
|
||||
def is_source(x):
|
||||
return x == "source" or x == b"source" or x == 42 or x == 42.0 or x == 42j
|
||||
|
||||
|
||||
def SINK(x):
|
||||
if is_source(x):
|
||||
print("OK")
|
||||
else:
|
||||
print("Unexpected flow", x)
|
||||
|
||||
|
||||
def SINK_F(x):
|
||||
if is_source(x):
|
||||
print("Unexpected flow", x)
|
||||
else:
|
||||
print("OK")
|
||||
|
||||
# ------------------------------------------------------------------------------
|
||||
# Actual tests
|
||||
# ------------------------------------------------------------------------------
|
||||
|
||||
def give_src():
|
||||
return SOURCE
|
||||
|
||||
foo = give_src()
|
||||
SINK(foo) # $ flow="SOURCE, l:-3 -> foo"
|
||||
|
||||
import os
|
||||
cond = eval("False")
|
||||
|
||||
if cond:
|
||||
pass
|
||||
|
||||
if cond:
|
||||
pass
|
||||
|
||||
foo = give_src()
|
||||
SINK(foo) # $ flow="SOURCE, l:-15 -> foo"
|
||||
@@ -1,43 +0,0 @@
|
||||
# These are defined so that we can evaluate the test code.
|
||||
NONSOURCE = "not a source"
|
||||
SOURCE = "source"
|
||||
|
||||
|
||||
def is_source(x):
|
||||
return x == "source" or x == b"source" or x == 42 or x == 42.0 or x == 42j
|
||||
|
||||
|
||||
def SINK(x):
|
||||
if is_source(x):
|
||||
print("OK")
|
||||
else:
|
||||
print("Unexpected flow", x)
|
||||
|
||||
|
||||
def SINK_F(x):
|
||||
if is_source(x):
|
||||
print("Unexpected flow", x)
|
||||
else:
|
||||
print("OK")
|
||||
|
||||
# ------------------------------------------------------------------------------
|
||||
# Actual tests
|
||||
# ------------------------------------------------------------------------------
|
||||
|
||||
def give_src():
|
||||
return SOURCE
|
||||
|
||||
foo = give_src()
|
||||
SINK(foo) # $ flow="SOURCE, l:-3 -> foo"
|
||||
|
||||
import os
|
||||
cond = os.path.isfile(__file__) # $ unresolved_call=os.path.isfile(..)
|
||||
|
||||
if cond:
|
||||
pass
|
||||
|
||||
if cond:
|
||||
pass
|
||||
|
||||
foo = give_src()
|
||||
SINK(foo) # $ flow="SOURCE, l:-15 -> foo"
|
||||
@@ -1,43 +0,0 @@
|
||||
# These are defined so that we can evaluate the test code.
|
||||
NONSOURCE = "not a source"
|
||||
SOURCE = "source"
|
||||
|
||||
|
||||
def is_source(x):
|
||||
return x == "source" or x == b"source" or x == 42 or x == 42.0 or x == 42j
|
||||
|
||||
|
||||
def SINK(x):
|
||||
if is_source(x):
|
||||
print("OK")
|
||||
else:
|
||||
print("Unexpected flow", x)
|
||||
|
||||
|
||||
def SINK_F(x):
|
||||
if is_source(x):
|
||||
print("Unexpected flow", x)
|
||||
else:
|
||||
print("OK")
|
||||
|
||||
# ------------------------------------------------------------------------------
|
||||
# Actual tests
|
||||
# ------------------------------------------------------------------------------
|
||||
|
||||
def give_src():
|
||||
return SOURCE
|
||||
|
||||
foo = give_src()
|
||||
SINK(foo) # $ flow="SOURCE, l:-3 -> foo"
|
||||
|
||||
import os
|
||||
cond = 1 + 1 == 2
|
||||
|
||||
if cond:
|
||||
pass
|
||||
|
||||
if cond:
|
||||
pass
|
||||
|
||||
foo = give_src()
|
||||
SINK(foo) # $ flow="SOURCE, l:-15 -> foo"
|
||||
@@ -1,43 +0,0 @@
|
||||
# These are defined so that we can evaluate the test code.
|
||||
NONSOURCE = "not a source"
|
||||
SOURCE = "source"
|
||||
|
||||
|
||||
def is_source(x):
|
||||
return x == "source" or x == b"source" or x == 42 or x == 42.0 or x == 42j
|
||||
|
||||
|
||||
def SINK(x):
|
||||
if is_source(x):
|
||||
print("OK")
|
||||
else:
|
||||
print("Unexpected flow", x)
|
||||
|
||||
|
||||
def SINK_F(x):
|
||||
if is_source(x):
|
||||
print("Unexpected flow", x)
|
||||
else:
|
||||
print("OK")
|
||||
|
||||
# ------------------------------------------------------------------------------
|
||||
# Actual tests
|
||||
# ------------------------------------------------------------------------------
|
||||
|
||||
def give_src():
|
||||
return SOURCE
|
||||
|
||||
foo = give_src()
|
||||
SINK(foo) # $ flow="SOURCE, l:-3 -> foo"
|
||||
|
||||
import os
|
||||
cond = os.urandom(1)[0] > 128 # $ unresolved_call=os.urandom(..)
|
||||
|
||||
# if cond:
|
||||
# pass
|
||||
#
|
||||
# if cond:
|
||||
# pass
|
||||
|
||||
foo = give_src()
|
||||
SINK(foo) # $ flow="SOURCE, l:-15 -> foo"
|
||||
@@ -1,43 +0,0 @@
|
||||
# These are defined so that we can evaluate the test code.
|
||||
NONSOURCE = "not a source"
|
||||
SOURCE = "source"
|
||||
|
||||
|
||||
def is_source(x):
|
||||
return x == "source" or x == b"source" or x == 42 or x == 42.0 or x == 42j
|
||||
|
||||
|
||||
def SINK(x):
|
||||
if is_source(x):
|
||||
print("OK")
|
||||
else:
|
||||
print("Unexpected flow", x)
|
||||
|
||||
|
||||
def SINK_F(x):
|
||||
if is_source(x):
|
||||
print("Unexpected flow", x)
|
||||
else:
|
||||
print("OK")
|
||||
|
||||
# ------------------------------------------------------------------------------
|
||||
# Actual tests
|
||||
# ------------------------------------------------------------------------------
|
||||
|
||||
def give_src():
|
||||
return SOURCE
|
||||
|
||||
foo = give_src()
|
||||
SINK(foo) # $ flow="SOURCE, l:-3 -> foo"
|
||||
|
||||
# import os
|
||||
cond = os.urandom(1)[0] > 128 # $ unresolved_call=os.urandom(..)
|
||||
|
||||
# if cond:
|
||||
# pass
|
||||
#
|
||||
# if cond:
|
||||
# pass
|
||||
|
||||
foo = give_src()
|
||||
SINK(foo) # $ flow="SOURCE, l:-15 -> foo"
|
||||
@@ -1,43 +0,0 @@
|
||||
# These are defined so that we can evaluate the test code.
|
||||
NONSOURCE = "not a source"
|
||||
SOURCE = "source"
|
||||
|
||||
|
||||
def is_source(x):
|
||||
return x == "source" or x == b"source" or x == 42 or x == 42.0 or x == 42j
|
||||
|
||||
|
||||
def SINK(x):
|
||||
if is_source(x):
|
||||
print("OK")
|
||||
else:
|
||||
print("Unexpected flow", x)
|
||||
|
||||
|
||||
def SINK_F(x):
|
||||
if is_source(x):
|
||||
print("Unexpected flow", x)
|
||||
else:
|
||||
print("OK")
|
||||
|
||||
# ------------------------------------------------------------------------------
|
||||
# Actual tests
|
||||
# ------------------------------------------------------------------------------
|
||||
|
||||
def give_src():
|
||||
return SOURCE
|
||||
|
||||
foo = give_src()
|
||||
SINK(foo) # $ flow="SOURCE, l:-3 -> foo"
|
||||
|
||||
import os
|
||||
cond = os.urandom(1)[0] > 128 # $ unresolved_call=os.urandom(..)
|
||||
|
||||
if cond:
|
||||
pass
|
||||
|
||||
if cond:
|
||||
pass
|
||||
|
||||
foo = give_src() # $ unresolved_call=give_src()
|
||||
SINK(foo) # $ unresolved_call=SINK(..) MISSING: flow="SOURCE, l:-15 -> foo"
|
||||
@@ -1,6 +0,0 @@
|
||||
| ../src/eval_no_problem.py | has splitting |
|
||||
| ../src/isfile_no_problem.py | has splitting |
|
||||
| ../src/simple_no_problem.py | has splitting |
|
||||
| ../src/urandom_no_if_no_problem.py | does not have splitting |
|
||||
| ../src/urandom_no_import_no_problem.py | does not have splitting |
|
||||
| ../src/urandom_problem.py | has splitting |
|
||||
@@ -1,16 +0,0 @@
|
||||
import python
|
||||
|
||||
// this can be quick-eval to see which ones have splitting. But that's basically just
|
||||
// anything from line 39 and further.
|
||||
predicate exprWithSplitting(Expr e) {
|
||||
exists(e.getLocation().getFile().getRelativePath()) and
|
||||
1 < count(ControlFlowNode cfn | cfn.getNode() = e)
|
||||
}
|
||||
|
||||
from File f, string msg
|
||||
where
|
||||
exists(f.getRelativePath()) and
|
||||
if exists(Expr e | e.getLocation().getFile() = f and exprWithSplitting(e))
|
||||
then msg = "has splitting"
|
||||
else msg = "does not have splitting"
|
||||
select f.toString(), msg
|
||||
@@ -1,2 +0,0 @@
|
||||
import python
|
||||
import experimental.dataflow.TestUtil.UnresolvedCalls
|
||||
@@ -1,5 +0,0 @@
|
||||
| ../src/urandom_no_if_no_problem.py:34:8:34:20 | ../src/urandom_no_if_no_problem.py:34 | os.urandom(..) |
|
||||
| ../src/urandom_no_import_no_problem.py:34:8:34:20 | ../src/urandom_no_import_no_problem.py:34 | os.urandom(..) |
|
||||
| ../src/urandom_problem.py:34:8:34:20 | ../src/urandom_problem.py:34 | os.urandom(..) |
|
||||
| ../src/urandom_problem.py:42:7:42:16 | ../src/urandom_problem.py:42 | give_src() |
|
||||
| ../src/urandom_problem.py:43:1:43:9 | ../src/urandom_problem.py:43 | SINK(..) |
|
||||
@@ -1,10 +0,0 @@
|
||||
import python
|
||||
private import semmle.python.dataflow.new.internal.PrintNode
|
||||
|
||||
from CallNode call
|
||||
where
|
||||
exists(call.getLocation().getFile().getRelativePath()) and
|
||||
not exists(Value value | call = value.getACall()) and
|
||||
// somehow print is not resolved, but that is not the focus right now
|
||||
not call.getFunction().(NameNode).getId() = "print"
|
||||
select call.getLocation(), prettyExpr(call.getNode())
|
||||
@@ -1 +0,0 @@
|
||||
semmle-extractor-options: --lang=3 --max-import-depth=1 -R ../src
|
||||
@@ -1,3 +0,0 @@
|
||||
missingAnnotationOnSink
|
||||
failures
|
||||
| ../src/urandom_problem.py:43:6:43:8 | ControlFlowNode for foo | Fixed missing result:flow="SOURCE, l:-15 -> foo" |
|
||||
@@ -1,2 +0,0 @@
|
||||
import python
|
||||
import experimental.dataflow.TestUtil.NormalDataflowTest
|
||||
@@ -1,6 +0,0 @@
|
||||
| ../src/eval_no_problem.py | does not have splitting |
|
||||
| ../src/isfile_no_problem.py | does not have splitting |
|
||||
| ../src/simple_no_problem.py | does not have splitting |
|
||||
| ../src/urandom_no_if_no_problem.py | does not have splitting |
|
||||
| ../src/urandom_no_import_no_problem.py | does not have splitting |
|
||||
| ../src/urandom_problem.py | does not have splitting |
|
||||
@@ -1,16 +0,0 @@
|
||||
import python
|
||||
|
||||
// this can be quick-eval to see which ones have splitting. But that's basically just
|
||||
// anything from line 39 and further.
|
||||
predicate exprWithSplitting(Expr e) {
|
||||
exists(e.getLocation().getFile().getRelativePath()) and
|
||||
1 < count(ControlFlowNode cfn | cfn.getNode() = e)
|
||||
}
|
||||
|
||||
from File f, string msg
|
||||
where
|
||||
exists(f.getRelativePath()) and
|
||||
if exists(Expr e | e.getLocation().getFile() = f and exprWithSplitting(e))
|
||||
then msg = "has splitting"
|
||||
else msg = "does not have splitting"
|
||||
select f.toString(), msg
|
||||
@@ -1,2 +0,0 @@
|
||||
| ../src/urandom_problem.py:42:18:42:47 | Comment # $ unresolved_call=give_src() | Missing result:unresolved_call=give_src() |
|
||||
| ../src/urandom_problem.py:43:11:43:75 | Comment # $ unresolved_call=SINK(..) MISSING: flow="SOURCE, l:-15 -> foo" | Missing result:unresolved_call=SINK(..) |
|
||||
@@ -1,2 +0,0 @@
|
||||
import python
|
||||
import experimental.dataflow.TestUtil.UnresolvedCalls
|
||||
@@ -1,3 +0,0 @@
|
||||
| ../src/urandom_no_if_no_problem.py:34:8:34:20 | ../src/urandom_no_if_no_problem.py:34 | os.urandom(..) |
|
||||
| ../src/urandom_no_import_no_problem.py:34:8:34:20 | ../src/urandom_no_import_no_problem.py:34 | os.urandom(..) |
|
||||
| ../src/urandom_problem.py:34:8:34:20 | ../src/urandom_problem.py:34 | os.urandom(..) |
|
||||
@@ -1,10 +0,0 @@
|
||||
import python
|
||||
private import semmle.python.dataflow.new.internal.PrintNode
|
||||
|
||||
from CallNode call
|
||||
where
|
||||
exists(call.getLocation().getFile().getRelativePath()) and
|
||||
not exists(Value value | call = value.getACall()) and
|
||||
// somehow print is not resolved, but that is not the focus right now
|
||||
not call.getFunction().(NameNode).getId() = "print"
|
||||
select call.getLocation(), prettyExpr(call.getNode())
|
||||
@@ -1 +0,0 @@
|
||||
semmle-extractor-options: --lang=3 --dont-split-graph --max-import-depth=1 -R ../src
|
||||
@@ -1,3 +0,0 @@
|
||||
missingAnnotationOnSink
|
||||
failures
|
||||
| ../src/urandom_problem.py:43:6:43:8 | ControlFlowNode for foo | Fixed missing result:flow="SOURCE, l:-15 -> foo" |
|
||||
@@ -1,2 +0,0 @@
|
||||
import python
|
||||
import experimental.dataflow.TestUtil.NormalDataflowTest
|
||||
@@ -1,6 +0,0 @@
|
||||
| ../src/eval_no_problem.py | has splitting |
|
||||
| ../src/isfile_no_problem.py | has splitting |
|
||||
| ../src/simple_no_problem.py | has splitting |
|
||||
| ../src/urandom_no_if_no_problem.py | does not have splitting |
|
||||
| ../src/urandom_no_import_no_problem.py | does not have splitting |
|
||||
| ../src/urandom_problem.py | has splitting |
|
||||
@@ -1,16 +0,0 @@
|
||||
import python
|
||||
|
||||
// this can be quick-eval to see which ones have splitting. But that's basically just
|
||||
// anything from line 39 and further.
|
||||
predicate exprWithSplitting(Expr e) {
|
||||
exists(e.getLocation().getFile().getRelativePath()) and
|
||||
1 < count(ControlFlowNode cfn | cfn.getNode() = e)
|
||||
}
|
||||
|
||||
from File f, string msg
|
||||
where
|
||||
exists(f.getRelativePath()) and
|
||||
if exists(Expr e | e.getLocation().getFile() = f and exprWithSplitting(e))
|
||||
then msg = "has splitting"
|
||||
else msg = "does not have splitting"
|
||||
select f.toString(), msg
|
||||
@@ -1,2 +0,0 @@
|
||||
| ../src/urandom_problem.py:42:18:42:47 | Comment # $ unresolved_call=give_src() | Missing result:unresolved_call=give_src() |
|
||||
| ../src/urandom_problem.py:43:11:43:75 | Comment # $ unresolved_call=SINK(..) MISSING: flow="SOURCE, l:-15 -> foo" | Missing result:unresolved_call=SINK(..) |
|
||||
@@ -1,2 +0,0 @@
|
||||
import python
|
||||
import experimental.dataflow.TestUtil.UnresolvedCalls
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user