mirror of
https://github.com/github/codeql.git
synced 2025-12-20 18:56:32 +01:00
Python: Support flow to *args param from positional arg
This commit is contained in:
@@ -49,6 +49,7 @@ newtype TParameterPosition =
|
||||
pos = 0 or
|
||||
pos = any(Parameter p).getPosition() + 1
|
||||
} or
|
||||
TSynthStarArgsElementParameterPosition(int pos) { exists(TStarArgsParameterPosition(pos)) } or
|
||||
TDictSplatParameterPosition()
|
||||
|
||||
/** A parameter position. */
|
||||
@@ -65,6 +66,15 @@ class ParameterPosition extends TParameterPosition {
|
||||
/** Holds if this position represents a `*args` parameter at (0-based) `index`. */
|
||||
predicate isStarArgs(int index) { this = TStarArgsParameterPosition(index) }
|
||||
|
||||
/**
|
||||
* Holds if this position represents a synthetic parameter at or after (0-based)
|
||||
* position `index`, from which there will be made a store step to the real
|
||||
* `*args` parameter.
|
||||
*/
|
||||
predicate isSynthStarArgsElement(int index) {
|
||||
this = TSynthStarArgsElementParameterPosition(index)
|
||||
}
|
||||
|
||||
/** Holds if this position represents a `**kwargs` parameter. */
|
||||
predicate isDictSplat() { this = TDictSplatParameterPosition() }
|
||||
|
||||
@@ -78,6 +88,11 @@ class ParameterPosition extends TParameterPosition {
|
||||
or
|
||||
exists(int index | this.isStarArgs(index) and result = "*args at " + index)
|
||||
or
|
||||
exists(int index |
|
||||
this.isSynthStarArgsElement(index) and
|
||||
result = "synthetic *args element at (or after) " + index
|
||||
)
|
||||
or
|
||||
this.isDictSplat() and result = "**"
|
||||
}
|
||||
}
|
||||
@@ -132,6 +147,10 @@ predicate parameterMatch(ParameterPosition ppos, ArgumentPosition apos) {
|
||||
or
|
||||
exists(int index | ppos.isStarArgs(index) and apos.isStarArgs(index))
|
||||
or
|
||||
exists(int paramIndex, int argIndex | argIndex >= paramIndex |
|
||||
ppos.isSynthStarArgsElement(paramIndex) and apos.isPositional(argIndex)
|
||||
)
|
||||
or
|
||||
ppos.isDictSplat() and apos.isDictSplat()
|
||||
}
|
||||
|
||||
@@ -219,8 +238,13 @@ abstract class DataFlowFunction extends DataFlowCallable, TFunction {
|
||||
exists(string name | ppos.isKeyword(name) | result.getParameter() = func.getArgByName(name))
|
||||
or
|
||||
exists(int index |
|
||||
ppos.isStarArgs(index) and
|
||||
result.getParameter() = func.getVararg()
|
||||
(
|
||||
ppos.isStarArgs(index) and
|
||||
result.getParameter() = func.getVararg()
|
||||
or
|
||||
ppos.isSynthStarArgsElement(index) and
|
||||
result = TSynthStarArgsElementParameterNode(this)
|
||||
)
|
||||
|
|
||||
// a `*args` parameter comes after the last positional parameter. We need to take
|
||||
// self parameter into account, so for
|
||||
|
||||
@@ -57,6 +57,78 @@ class SyntheticPreUpdateNode extends Node, TSyntheticPreUpdateNode {
|
||||
override Location getLocation() { result = node.getLocation() }
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// *args (StarArgs) related
|
||||
// =============================================================================
|
||||
/**
|
||||
* A (synthetic) data-flow parameter node to capture all positional arguments that
|
||||
* should be passed to the `*args` parameter.
|
||||
*
|
||||
* To handle
|
||||
* ```py
|
||||
* def func(*args):
|
||||
* for arg in args:
|
||||
* sink(arg)
|
||||
*
|
||||
* func(source1, source2, ...)
|
||||
* ```
|
||||
*
|
||||
* we add a synthetic parameter to `func` that accepts any positional argument at (or
|
||||
* after) the index for the `*args` parameter. We add a store step (at any list index) to the real
|
||||
* `*args` parameter. This means we can handle the code above, but if the code had done `sink(args[0])`
|
||||
* we would (wrongly) add flow for `source2` as well.
|
||||
*
|
||||
* To solve this more precisely, we could add a synthetic argument with position `*args`
|
||||
* that had store steps with the correct index (like we do for mapping keyword arguments to a
|
||||
* `**kwargs` parameter). However, if a single call could go to 2 different
|
||||
* targets with `*args` parameters at different positions, as in the example below, it's unclear what
|
||||
* index to store `2` at. For the `foo` callable it should be 1, for the `bar` callable it should be 0.
|
||||
* So this information would need to be encoded in the arguments of a `ArgumentPosition` branch, and
|
||||
* one of the arguments would be which callable is the target. However, we cannot build `ArgumentPosition`
|
||||
* branches based on the call-graph, so this strategy doesn't work.
|
||||
*
|
||||
* Another approach to solving it precisely is to add multiple synthetic parameters that have store steps
|
||||
* to the real `*args` parameter. So for the example below, `foo` would need to have synthetic parameter
|
||||
* nodes for indexes 1 and 2 (which would have store step for index 0 and 1 of the `*args` parameter),
|
||||
* and `bar` would need it for indexes 1, 2, and 3. The question becomes how many synthetic parameters to
|
||||
* create, which _must_ be `max(Call call, int i | exists(call.getArg(i)))`, since (again) we can't base
|
||||
* this on the call-graph. And each function with a `*args` parameter would need this many extra synthetic
|
||||
* nodes. My gut feeling at that this simple approach will be good enough, but if we need to get it more
|
||||
* precise, it should be possible to do it like this.
|
||||
*
|
||||
* ```py
|
||||
* def foo(one, *args): ...
|
||||
* def bar(*args): ...
|
||||
*
|
||||
* func = foo if <cond> else bar
|
||||
* func(1, 2, 3)
|
||||
*/
|
||||
class SynthStarArgsElementParameterNode extends ParameterNodeImpl,
|
||||
TSynthStarArgsElementParameterNode {
|
||||
DataFlowCallable callable;
|
||||
|
||||
SynthStarArgsElementParameterNode() { this = TSynthStarArgsElementParameterNode(callable) }
|
||||
|
||||
override string toString() { result = "SynthStarArgsElementParameterNode" }
|
||||
|
||||
override Scope getScope() { result = callable.getScope() }
|
||||
|
||||
override Location getLocation() { result = callable.getLocation() }
|
||||
|
||||
override Parameter getParameter() { none() }
|
||||
}
|
||||
|
||||
predicate synthStarArgsElementParameterNodeStoreStep(
|
||||
SynthStarArgsElementParameterNode nodeFrom, ListElementContent c, ParameterNode nodeTo
|
||||
) {
|
||||
c = c and // suppress warning about unused parameter
|
||||
exists(DataFlowCallable callable, ParameterPosition ppos |
|
||||
nodeFrom = TSynthStarArgsElementParameterNode(callable) and
|
||||
nodeTo = callable.getParameter(ppos) and
|
||||
ppos.isStarArgs(_)
|
||||
)
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// **kwargs (DictSplat) related
|
||||
// =============================================================================
|
||||
@@ -519,6 +591,8 @@ predicate storeStep(Node nodeFrom, Content c, Node nodeTo) {
|
||||
or
|
||||
FlowSummaryImpl::Private::Steps::summaryStoreStep(nodeFrom, c, nodeTo)
|
||||
or
|
||||
synthStarArgsElementParameterNodeStoreStep(nodeFrom, c, nodeTo)
|
||||
or
|
||||
synthDictSplatArgumentNodeStoreStep(nodeFrom, c, nodeTo)
|
||||
}
|
||||
|
||||
@@ -849,6 +923,8 @@ predicate nodeIsHidden(Node n) {
|
||||
or
|
||||
n instanceof SummaryParameterNode
|
||||
or
|
||||
n instanceof SynthStarArgsElementParameterNode
|
||||
or
|
||||
n instanceof SynthDictSplatArgumentNode
|
||||
or
|
||||
n instanceof SynthDictSplatParameterNode
|
||||
|
||||
@@ -113,6 +113,10 @@ newtype TNode =
|
||||
TSummaryParameterNode(FlowSummaryImpl::Public::SummarizedCallable c, ParameterPosition pos) {
|
||||
FlowSummaryImpl::Private::summaryParameterNodeRange(c, pos)
|
||||
} or
|
||||
/** A synthetic node to capture positional arguments that are passed to a `*args` parameter. */
|
||||
TSynthStarArgsElementParameterNode(DataFlowCallable callable) {
|
||||
exists(ParameterPosition ppos | ppos.isStarArgs(_) | exists(callable.getParameter(ppos)))
|
||||
} or
|
||||
/** A synthetic node to capture keyword arguments that are passed to a `**kwargs` parameter. */
|
||||
TSynthDictSplatArgumentNode(CallNode call) { exists(call.getArgByName(_)) } or
|
||||
/** A synthetic node to allow flow to keyword parameters from a `**kwargs` argument. */
|
||||
|
||||
@@ -140,7 +140,7 @@ def test_pos_star():
|
||||
if len(a) > 0:
|
||||
SINK1(a[0])
|
||||
|
||||
with_star(arg1) #$ MISSING: arg1 func=test_pos_star.with_star
|
||||
with_star(arg1) #$ arg1 func=test_pos_star.with_star
|
||||
|
||||
|
||||
def test_pos_kw():
|
||||
@@ -209,10 +209,10 @@ def starargs_only(*args):
|
||||
|
||||
@expects(3*3)
|
||||
def test_only_starargs():
|
||||
starargs_only(arg1, arg2, "safe") # $ MISSING: arg1 arg2
|
||||
starargs_only(arg1, arg2, "safe") # $ arg1 arg2 SPURIOUS: bad2,bad3="arg1" bad1,bad3="arg2"
|
||||
|
||||
args = (arg2, "safe")
|
||||
starargs_only(arg1, *args) # $ MISSING: arg1 arg2
|
||||
starargs_only(arg1, *args) # $ arg1 SPURIOUS: bad2,bad3="arg1" MISSING: arg2
|
||||
|
||||
args = (arg1, arg2, "safe") # $ arg1 arg2 func=starargs_only
|
||||
starargs_only(*args)
|
||||
@@ -225,7 +225,7 @@ def starargs_mixed(a, *args):
|
||||
|
||||
@expects(3*8)
|
||||
def test_stararg_mixed():
|
||||
starargs_mixed(arg1, arg2, "safe") # $ arg1 MISSING: arg2
|
||||
starargs_mixed(arg1, arg2, "safe") # $ arg1 arg2 SPURIOUS: bad3="arg2"
|
||||
|
||||
args = (arg2, "safe") # $ arg2 func=starargs_mixed
|
||||
starargs_mixed(arg1, *args) # $ arg1
|
||||
@@ -240,11 +240,11 @@ def test_stararg_mixed():
|
||||
empty_args = ()
|
||||
|
||||
# adding first/last
|
||||
starargs_mixed(arg1, arg2, "safe", *empty_args) # $ arg1 MISSING: arg2
|
||||
starargs_mixed(arg1, arg2, "safe", *empty_args) # $ arg1 arg2 SPURIOUS: bad3="arg2"
|
||||
starargs_mixed(*empty_args, arg1, arg2, "safe") # $ MISSING: arg1 arg2
|
||||
|
||||
# adding before/after *args
|
||||
args = (arg2, "safe")
|
||||
starargs_mixed(arg1, *args, *empty_args) # $ arg1 MISSING: arg2
|
||||
args = (arg2, "safe") # $ arg2 func=starargs_mixed
|
||||
starargs_mixed(arg1, *args, *empty_args) # $ arg1
|
||||
args = (arg2, "safe")
|
||||
starargs_mixed(arg1, *empty_args, *args) # $ arg1 MISSING: arg2
|
||||
|
||||
@@ -401,7 +401,7 @@ def f_extra_pos(a, *b):
|
||||
|
||||
|
||||
def test_call_extra_pos():
|
||||
SINK(f_extra_pos(NONSOURCE, SOURCE)) #$ MISSING: flow="SOURCE -> f_extra_pos(..)"
|
||||
SINK(f_extra_pos(NONSOURCE, SOURCE)) #$ flow="SOURCE -> f_extra_pos(..)"
|
||||
|
||||
|
||||
def f_extra_keyword(a, **b):
|
||||
@@ -514,7 +514,7 @@ def test_lambda_unpack_mapping():
|
||||
|
||||
def test_lambda_extra_pos():
|
||||
f_extra_pos = lambda a, *b: b[0]
|
||||
SINK(f_extra_pos(NONSOURCE, SOURCE)) #$ MISSING: flow="SOURCE -> f_extra_pos(..)"
|
||||
SINK(f_extra_pos(NONSOURCE, SOURCE)) #$ flow="SOURCE -> f_extra_pos(..)"
|
||||
|
||||
|
||||
def test_lambda_extra_keyword():
|
||||
@@ -689,7 +689,7 @@ def test_iterable_star_unpacking_in_for_2():
|
||||
|
||||
def iterate_star_args(first, second, *args):
|
||||
for arg in args:
|
||||
SINK(arg) #$ MISSING: flow="SOURCE, l:+5 -> arg" flow="SOURCE, l:+6 -> arg"
|
||||
SINK(arg) #$ flow="SOURCE, l:+5 -> arg" flow="SOURCE, l:+6 -> arg"
|
||||
|
||||
# FP reported here: https://github.com/github/codeql-python-team/issues/49
|
||||
@expects(2)
|
||||
|
||||
Reference in New Issue
Block a user