diff --git a/python/ql/lib/change-notes/2023-07-07-parameter-default-value-definition-node.md b/python/ql/lib/change-notes/2023-07-07-parameter-default-value-definition-node.md new file mode 100644 index 00000000000..3691c602500 --- /dev/null +++ b/python/ql/lib/change-notes/2023-07-07-parameter-default-value-definition-node.md @@ -0,0 +1,4 @@ +--- +category: minorAnalysis +--- +* Parameters with a default value are now considered a `DefinitionNode`. This improvement was motivated by allowing type-tracking and API graphs to follow flow from such a default value to a use by a captured variable. diff --git a/python/ql/lib/semmle/python/Flow.qll b/python/ql/lib/semmle/python/Flow.qll index 8dfdc4a6341..17a408c6713 100644 --- a/python/ql/lib/semmle/python/Flow.qll +++ b/python/ql/lib/semmle/python/Flow.qll @@ -640,12 +640,23 @@ class DefinitionNode extends ControlFlowNode { exists(Assign a | list_or_tuple_nested_element(a.getATarget()).getAFlowNode() = this) or exists(For for | for.getTarget().getAFlowNode() = this) + or + exists(Parameter param | this = param.asName().getAFlowNode() and exists(param.getDefault())) } /** flow node corresponding to the value assigned for the definition corresponding to this flow node */ ControlFlowNode getValue() { result = assigned_value(this.getNode()).getAFlowNode() and - (result.getBasicBlock().dominates(this.getBasicBlock()) or result.isImport()) + ( + result.getBasicBlock().dominates(this.getBasicBlock()) + or + result.isImport() + or + // since the default value for a parameter is evaluated in the same basic block as + // the function definition, but the parameter belongs to the basic block of the function, + // there is no dominance relationship between the two. + exists(Parameter param | this = param.asName().getAFlowNode()) + ) } } @@ -795,6 +806,8 @@ private AstNode assigned_value(Expr lhs) { or /* for lhs in seq: => `result` is the `for` node, representing the `iter(next(seq))` operation. */ result.(For).getTarget() = lhs + or + exists(Parameter param | lhs = param.asName() and result = param.getDefault()) } predicate nested_sequence_assign( diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll index 85b083e701a..eaf65a4d503 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll @@ -473,6 +473,15 @@ predicate runtimeJumpStep(Node nodeFrom, Node nodeTo) { or // Setting the possible values of the variable at the end of import time nodeFrom = nodeTo.(ModuleVariableNode).getADefiningWrite() + or + // a parameter with a default value, since the parameter will be in the scope of the + // function, while the default value itself will be in the scope that _defines_ the + // function. + exists(ParameterDefinition param | + // note: we go to the _control-flow node_ of the parameter, and not the ESSA node of the parameter, since for type-tracking, the ESSA node is not a LocalSourceNode, so we would get in trouble. + nodeFrom.asCfgNode() = param.getDefault() and + nodeTo.asCfgNode() = param.getDefiningNode() + ) } /** @@ -574,9 +583,6 @@ predicate jumpStepSharedWithTypeTracker(Node nodeFrom, Node nodeTo) { r.getAttributeName(), nodeFrom) and nodeTo = r ) - or - // Default value for parameter flows to that parameter - defaultValueFlowStep(nodeFrom, nodeTo) } /** @@ -797,19 +803,6 @@ predicate attributeStoreStep(Node nodeFrom, AttributeContent c, PostUpdateNode n ) } -predicate defaultValueFlowStep(CfgNode nodeFrom, CfgNode nodeTo) { - exists(Function f, Parameter p, ParameterDefinition def | - // `getArgByName` supports, unlike `getAnArg`, keyword-only parameters - p = f.getArgByName(_) and - nodeFrom.asExpr() = p.getDefault() and - // The following expresses - // nodeTo.(ParameterNode).getParameter() = p - // without non-monotonic recursion - def.getParameter() = p and - nodeTo.getNode() = def.getDefiningNode() - ) -} - /** * Holds if data can flow from `nodeFrom` to `nodeTo` via a read of content `c`. */ diff --git a/python/ql/lib/semmle/python/essa/SsaDefinitions.qll b/python/ql/lib/semmle/python/essa/SsaDefinitions.qll index 61007c7129b..8d9de611659 100644 --- a/python/ql/lib/semmle/python/essa/SsaDefinitions.qll +++ b/python/ql/lib/semmle/python/essa/SsaDefinitions.qll @@ -20,7 +20,12 @@ module SsaSource { /** Holds if `v` is defined by assignment at `defn` and given `value`. */ cached predicate assignment_definition(Variable v, ControlFlowNode defn, ControlFlowNode value) { - defn.(NameNode).defines(v) and defn.(DefinitionNode).getValue() = value + defn.(NameNode).defines(v) and + defn.(DefinitionNode).getValue() = value and + // since parameter will be considered a DefinitionNode, if it has a default value, + // we need to exclude it here since it is already covered by parameter_definition + // (and points-to was unhappy that it was included in both) + not parameter_definition(v, defn) } /** Holds if `v` is defined by assignment of the captured exception. */ diff --git a/python/ql/test/experimental/dataflow/typetracking/test.py b/python/ql/test/experimental/dataflow/typetracking/test.py index f0e93d79af2..a822ef683eb 100644 --- a/python/ql/test/experimental/dataflow/typetracking/test.py +++ b/python/ql/test/experimental/dataflow/typetracking/test.py @@ -65,6 +65,19 @@ def to_inner_scope(): also_x = foo() # $ tracked print(also_x) # $ tracked + +def from_parameter_default(): + x_alias = tracked # $tracked + def outer(x=tracked): # $tracked + print(x) # $tracked + def inner(): + print(x) # $ tracked + print(x_alias) # $tracked + return x # $tracked + also_x = outer() # $tracked + print(also_x) # $tracked + + # ------------------------------------------------------------------------------ # Function decorator # ------------------------------------------------------------------------------ diff --git a/python/ql/test/library-tests/variables/definitions/definition-values.expected b/python/ql/test/library-tests/variables/definitions/definition-values.expected new file mode 100644 index 00000000000..c81fafd6a64 --- /dev/null +++ b/python/ql/test/library-tests/variables/definitions/definition-values.expected @@ -0,0 +1,4 @@ +| test.py:3:5:3:9 | ControlFlowNode for fail5 | test.py:3:1:3:13 | ControlFlowNode for FunctionExpr | +| test.py:4:5:4:8 | ControlFlowNode for Tuple | test.py:4:12:4:12 | ControlFlowNode for t | +| test.py:7:5:7:26 | ControlFlowNode for default_value_in_param | test.py:7:1:7:33 | ControlFlowNode for FunctionExpr | +| test.py:7:28:7:28 | ControlFlowNode for x | test.py:7:30:7:31 | ControlFlowNode for IntegerLiteral | diff --git a/python/ql/test/library-tests/variables/definitions/definition-values.ql b/python/ql/test/library-tests/variables/definitions/definition-values.ql new file mode 100644 index 00000000000..7c864541746 --- /dev/null +++ b/python/ql/test/library-tests/variables/definitions/definition-values.ql @@ -0,0 +1,4 @@ +import python + +from DefinitionNode d +select d, d.getValue() diff --git a/python/ql/test/library-tests/variables/definitions/test.expected b/python/ql/test/library-tests/variables/definitions/test.expected index dc853ee7f2f..f5e119e6fb2 100644 --- a/python/ql/test/library-tests/variables/definitions/test.expected +++ b/python/ql/test/library-tests/variables/definitions/test.expected @@ -1,4 +1,6 @@ | 3 | 5 | ControlFlowNode for fail5 | | 4 | 5 | ControlFlowNode for Tuple | | 4 | 5 | ControlFlowNode for x | -| 4 | 8 | ControlFlowNode for y | \ No newline at end of file +| 4 | 8 | ControlFlowNode for y | +| 7 | 5 | ControlFlowNode for default_value_in_param | +| 7 | 28 | ControlFlowNode for x | diff --git a/python/ql/test/library-tests/variables/definitions/test.py b/python/ql/test/library-tests/variables/definitions/test.py index 0695a275b3c..c8cd2507399 100644 --- a/python/ql/test/library-tests/variables/definitions/test.py +++ b/python/ql/test/library-tests/variables/definitions/test.py @@ -3,3 +3,6 @@ def fail5(t): x, y = t return x + +def default_value_in_param(x=42): + print(x)