Python: Support flow to *args param from positional arg

This commit is contained in:
Rasmus Wriedt Larsen
2022-09-19 16:50:48 +02:00
parent db921ac036
commit 035d083515
5 changed files with 116 additions and 12 deletions

View File

@@ -49,6 +49,7 @@ newtype TParameterPosition =
pos = 0 or
pos = any(Parameter p).getPosition() + 1
} or
TSynthStarArgsElementParameterPosition(int pos) { exists(TStarArgsParameterPosition(pos)) } or
TDictSplatParameterPosition()
/** A parameter position. */
@@ -65,6 +66,15 @@ class ParameterPosition extends TParameterPosition {
/** Holds if this position represents a `*args` parameter at (0-based) `index`. */
predicate isStarArgs(int index) { this = TStarArgsParameterPosition(index) }
/**
* Holds if this position represents a synthetic parameter at or after (0-based)
* position `index`, from which there will be made a store step to the real
* `*args` parameter.
*/
predicate isSynthStarArgsElement(int index) {
this = TSynthStarArgsElementParameterPosition(index)
}
/** Holds if this position represents a `**kwargs` parameter. */
predicate isDictSplat() { this = TDictSplatParameterPosition() }
@@ -78,6 +88,11 @@ class ParameterPosition extends TParameterPosition {
or
exists(int index | this.isStarArgs(index) and result = "*args at " + index)
or
exists(int index |
this.isSynthStarArgsElement(index) and
result = "synthetic *args element at (or after) " + index
)
or
this.isDictSplat() and result = "**"
}
}
@@ -132,6 +147,10 @@ predicate parameterMatch(ParameterPosition ppos, ArgumentPosition apos) {
or
exists(int index | ppos.isStarArgs(index) and apos.isStarArgs(index))
or
exists(int paramIndex, int argIndex | argIndex >= paramIndex |
ppos.isSynthStarArgsElement(paramIndex) and apos.isPositional(argIndex)
)
or
ppos.isDictSplat() and apos.isDictSplat()
}
@@ -219,8 +238,13 @@ abstract class DataFlowFunction extends DataFlowCallable, TFunction {
exists(string name | ppos.isKeyword(name) | result.getParameter() = func.getArgByName(name))
or
exists(int index |
ppos.isStarArgs(index) and
result.getParameter() = func.getVararg()
(
ppos.isStarArgs(index) and
result.getParameter() = func.getVararg()
or
ppos.isSynthStarArgsElement(index) and
result = TSynthStarArgsElementParameterNode(this)
)
|
// a `*args` parameter comes after the last positional parameter. We need to take
// self parameter into account, so for

View File

@@ -57,6 +57,78 @@ class SyntheticPreUpdateNode extends Node, TSyntheticPreUpdateNode {
override Location getLocation() { result = node.getLocation() }
}
// =============================================================================
// *args (StarArgs) related
// =============================================================================
/**
* A (synthetic) data-flow parameter node to capture all positional arguments that
* should be passed to the `*args` parameter.
*
* To handle
* ```py
* def func(*args):
* for arg in args:
* sink(arg)
*
* func(source1, source2, ...)
* ```
*
* we add a synthetic parameter to `func` that accepts any positional argument at (or
* after) the index for the `*args` parameter. We add a store step (at any list index) to the real
* `*args` parameter. This means we can handle the code above, but if the code had done `sink(args[0])`
* we would (wrongly) add flow for `source2` as well.
*
* To solve this more precisely, we could add a synthetic argument with position `*args`
* that had store steps with the correct index (like we do for mapping keyword arguments to a
* `**kwargs` parameter). However, if a single call could go to 2 different
* targets with `*args` parameters at different positions, as in the example below, it's unclear what
* index to store `2` at. For the `foo` callable it should be 1, for the `bar` callable it should be 0.
* So this information would need to be encoded in the arguments of a `ArgumentPosition` branch, and
* one of the arguments would be which callable is the target. However, we cannot build `ArgumentPosition`
* branches based on the call-graph, so this strategy doesn't work.
*
* Another approach to solving it precisely is to add multiple synthetic parameters that have store steps
* to the real `*args` parameter. So for the example below, `foo` would need to have synthetic parameter
* nodes for indexes 1 and 2 (which would have store step for index 0 and 1 of the `*args` parameter),
* and `bar` would need it for indexes 1, 2, and 3. The question becomes how many synthetic parameters to
* create, which _must_ be `max(Call call, int i | exists(call.getArg(i)))`, since (again) we can't base
* this on the call-graph. And each function with a `*args` parameter would need this many extra synthetic
* nodes. My gut feeling at that this simple approach will be good enough, but if we need to get it more
* precise, it should be possible to do it like this.
*
* ```py
* def foo(one, *args): ...
* def bar(*args): ...
*
* func = foo if <cond> else bar
* func(1, 2, 3)
*/
class SynthStarArgsElementParameterNode extends ParameterNodeImpl,
TSynthStarArgsElementParameterNode {
DataFlowCallable callable;
SynthStarArgsElementParameterNode() { this = TSynthStarArgsElementParameterNode(callable) }
override string toString() { result = "SynthStarArgsElementParameterNode" }
override Scope getScope() { result = callable.getScope() }
override Location getLocation() { result = callable.getLocation() }
override Parameter getParameter() { none() }
}
predicate synthStarArgsElementParameterNodeStoreStep(
SynthStarArgsElementParameterNode nodeFrom, ListElementContent c, ParameterNode nodeTo
) {
c = c and // suppress warning about unused parameter
exists(DataFlowCallable callable, ParameterPosition ppos |
nodeFrom = TSynthStarArgsElementParameterNode(callable) and
nodeTo = callable.getParameter(ppos) and
ppos.isStarArgs(_)
)
}
// =============================================================================
// **kwargs (DictSplat) related
// =============================================================================
@@ -519,6 +591,8 @@ predicate storeStep(Node nodeFrom, Content c, Node nodeTo) {
or
FlowSummaryImpl::Private::Steps::summaryStoreStep(nodeFrom, c, nodeTo)
or
synthStarArgsElementParameterNodeStoreStep(nodeFrom, c, nodeTo)
or
synthDictSplatArgumentNodeStoreStep(nodeFrom, c, nodeTo)
}
@@ -849,6 +923,8 @@ predicate nodeIsHidden(Node n) {
or
n instanceof SummaryParameterNode
or
n instanceof SynthStarArgsElementParameterNode
or
n instanceof SynthDictSplatArgumentNode
or
n instanceof SynthDictSplatParameterNode

View File

@@ -113,6 +113,10 @@ newtype TNode =
TSummaryParameterNode(FlowSummaryImpl::Public::SummarizedCallable c, ParameterPosition pos) {
FlowSummaryImpl::Private::summaryParameterNodeRange(c, pos)
} or
/** A synthetic node to capture positional arguments that are passed to a `*args` parameter. */
TSynthStarArgsElementParameterNode(DataFlowCallable callable) {
exists(ParameterPosition ppos | ppos.isStarArgs(_) | exists(callable.getParameter(ppos)))
} or
/** A synthetic node to capture keyword arguments that are passed to a `**kwargs` parameter. */
TSynthDictSplatArgumentNode(CallNode call) { exists(call.getArgByName(_)) } or
/** A synthetic node to allow flow to keyword parameters from a `**kwargs` argument. */

View File

@@ -140,7 +140,7 @@ def test_pos_star():
if len(a) > 0:
SINK1(a[0])
with_star(arg1) #$ MISSING: arg1 func=test_pos_star.with_star
with_star(arg1) #$ arg1 func=test_pos_star.with_star
def test_pos_kw():
@@ -209,10 +209,10 @@ def starargs_only(*args):
@expects(3*3)
def test_only_starargs():
starargs_only(arg1, arg2, "safe") # $ MISSING: arg1 arg2
starargs_only(arg1, arg2, "safe") # $ arg1 arg2 SPURIOUS: bad2,bad3="arg1" bad1,bad3="arg2"
args = (arg2, "safe")
starargs_only(arg1, *args) # $ MISSING: arg1 arg2
starargs_only(arg1, *args) # $ arg1 SPURIOUS: bad2,bad3="arg1" MISSING: arg2
args = (arg1, arg2, "safe") # $ arg1 arg2 func=starargs_only
starargs_only(*args)
@@ -225,7 +225,7 @@ def starargs_mixed(a, *args):
@expects(3*8)
def test_stararg_mixed():
starargs_mixed(arg1, arg2, "safe") # $ arg1 MISSING: arg2
starargs_mixed(arg1, arg2, "safe") # $ arg1 arg2 SPURIOUS: bad3="arg2"
args = (arg2, "safe") # $ arg2 func=starargs_mixed
starargs_mixed(arg1, *args) # $ arg1
@@ -240,11 +240,11 @@ def test_stararg_mixed():
empty_args = ()
# adding first/last
starargs_mixed(arg1, arg2, "safe", *empty_args) # $ arg1 MISSING: arg2
starargs_mixed(arg1, arg2, "safe", *empty_args) # $ arg1 arg2 SPURIOUS: bad3="arg2"
starargs_mixed(*empty_args, arg1, arg2, "safe") # $ MISSING: arg1 arg2
# adding before/after *args
args = (arg2, "safe")
starargs_mixed(arg1, *args, *empty_args) # $ arg1 MISSING: arg2
args = (arg2, "safe") # $ arg2 func=starargs_mixed
starargs_mixed(arg1, *args, *empty_args) # $ arg1
args = (arg2, "safe")
starargs_mixed(arg1, *empty_args, *args) # $ arg1 MISSING: arg2

View File

@@ -401,7 +401,7 @@ def f_extra_pos(a, *b):
def test_call_extra_pos():
SINK(f_extra_pos(NONSOURCE, SOURCE)) #$ MISSING: flow="SOURCE -> f_extra_pos(..)"
SINK(f_extra_pos(NONSOURCE, SOURCE)) #$ flow="SOURCE -> f_extra_pos(..)"
def f_extra_keyword(a, **b):
@@ -514,7 +514,7 @@ def test_lambda_unpack_mapping():
def test_lambda_extra_pos():
f_extra_pos = lambda a, *b: b[0]
SINK(f_extra_pos(NONSOURCE, SOURCE)) #$ MISSING: flow="SOURCE -> f_extra_pos(..)"
SINK(f_extra_pos(NONSOURCE, SOURCE)) #$ flow="SOURCE -> f_extra_pos(..)"
def test_lambda_extra_keyword():
@@ -689,7 +689,7 @@ def test_iterable_star_unpacking_in_for_2():
def iterate_star_args(first, second, *args):
for arg in args:
SINK(arg) #$ MISSING: flow="SOURCE, l:+5 -> arg" flow="SOURCE, l:+6 -> arg"
SINK(arg) #$ flow="SOURCE, l:+5 -> arg" flow="SOURCE, l:+6 -> arg"
# FP reported here: https://github.com/github/codeql-python-team/issues/49
@expects(2)