diff --git a/python/ql/lib/semmle/python/ApiGraphs.qll b/python/ql/lib/semmle/python/ApiGraphs.qll index efd8141efc6..eaa31ed3d6c 100644 --- a/python/ql/lib/semmle/python/ApiGraphs.qll +++ b/python/ql/lib/semmle/python/ApiGraphs.qll @@ -6,8 +6,9 @@ * directed and labeled; they specify how the components represented by nodes relate to each other. */ -// Importing python under the `py` namespace to avoid importing `CallNode` from `Flow.qll` and thereby having a naming conflict with `API::CallNode`. +// Importing python under the `py` namespace to avoid importing `Cfg::CallNode` from `Flow.qll` and thereby having a naming conflict with `API::CallNode`. private import python as PY +private import semmle.python.controlflow.internal.Cfg as Cfg import semmle.python.dataflow.new.DataFlow private import semmle.python.internal.CachedStages @@ -282,7 +283,7 @@ module API { index = this.getIndex() and ( // subscripting - exists(PY::SubscriptNode subscript | + exists(Cfg::SubscriptNode subscript | subscript.getObject() = this.getAValueReachableFromSource().asCfgNode() and subscript.getIndex() = index.asSink().asCfgNode() | @@ -290,7 +291,7 @@ module API { subscript = result.asSource().asCfgNode() or // writing - subscript.(PY::DefinitionNode).getValue() = result.asSink().asCfgNode() + subscript.(Cfg::DefinitionNode).getValue() = result.asSink().asCfgNode() ) or // dictionary literals @@ -684,7 +685,7 @@ module API { * Ignores relative imports, such as `from ..foo.bar import baz`. */ private predicate imports(DataFlow::CfgNode imp, string name) { - exists(PY::ImportExprNode iexpr | + exists(Cfg::ImportExprNode iexpr | imp.getNode() = iexpr and not iexpr.getNode().isRelative() and name = iexpr.getNode().getImportedModuleName() @@ -775,7 +776,7 @@ module API { // list literals, from `x` to `[x]` // TODO: once convenient, this should be done at a higher level than the AST, // at least at the CFG layer, to take splitting into account. - // Also consider `SequenceNode for generality. + // Also consider `Cfg::SequenceNode for generality. exists(PY::List list | list = pred.(DataFlow::ExprNode).getNode().getNode() | rhs.(DataFlow::ExprNode).getNode().getNode() = list.getAnElt() and lbl = Label::subscript() @@ -805,7 +806,7 @@ module API { subscript = trackUseNode(src).getSubscript(index) | // from `x` to a definition of `x[...]` - rhs.asCfgNode() = subscript.asCfgNode().(PY::DefinitionNode).getValue() and + rhs.asCfgNode() = subscript.asCfgNode().(Cfg::DefinitionNode).getValue() and lbl = Label::subscript() or // from `x` to `"key"` in `x["key"]` diff --git a/python/ql/lib/semmle/python/Concepts.qll b/python/ql/lib/semmle/python/Concepts.qll index 76e9f4bd13f..6eb74b52121 100644 --- a/python/ql/lib/semmle/python/Concepts.qll +++ b/python/ql/lib/semmle/python/Concepts.qll @@ -5,6 +5,7 @@ */ private import python +private import semmle.python.controlflow.internal.Cfg as Cfg private import semmle.python.dataflow.new.DataFlow private import semmle.python.dataflow.new.internal.DataFlowImplSpecific private import semmle.python.dataflow.new.RemoteFlowSources @@ -214,7 +215,7 @@ module Path { SafeAccessCheck() { this = DataFlow::BarrierGuard::getABarrierNode() } } - private predicate safeAccessCheck(DataFlow::GuardNode g, ControlFlowNode node, boolean branch) { + private predicate safeAccessCheck(DataFlow::GuardNode g, Cfg::ControlFlowNode node, boolean branch) { g.(SafeAccessCheck::Range).checks(node, branch) } @@ -223,7 +224,7 @@ module Path { /** A data-flow node that checks that a path is safe to access in some way, for example by having a controlled prefix. */ abstract class Range extends DataFlow::GuardNode { /** Holds if this guard validates `node` upon evaluating to `branch`. */ - abstract predicate checks(ControlFlowNode node, boolean branch); + abstract predicate checks(Cfg::ControlFlowNode node, boolean branch); } } } diff --git a/python/ql/lib/semmle/python/controlflow/internal/AstNodeImpl.qll b/python/ql/lib/semmle/python/controlflow/internal/AstNodeImpl.qll index d7e54e64aa8..e41d82a9ad0 100644 --- a/python/ql/lib/semmle/python/controlflow/internal/AstNodeImpl.qll +++ b/python/ql/lib/semmle/python/controlflow/internal/AstNodeImpl.qll @@ -10,6 +10,9 @@ * - Intermediate nodes for multi-operand boolean expressions. */ +overlay[local?] +module; + private import python as Py private import codeql.controlflow.ControlFlowGraph private import codeql.controlflow.SuccessorType @@ -1193,6 +1196,30 @@ module Ast implements AstSig { override AstNode getChild(int index) { index = 0 and result = this.getObject() } } + /** + * An `import x.y` module expression. Modelled as a leaf — the dotted + * name is just a string. + */ + additional class ImportExpression extends Expr { + ImportExpression() { this.asExpr() instanceof Py::ImportExpr } + } + + /** + * A `from m import x` member access. The module sub-expression is a + * child so that the CFG visits both the module load and this + * attribute selection. + */ + additional class ImportMemberExpr extends Expr { + private Py::ImportMember im; + + ImportMemberExpr() { this = TPyExpr(im) } + + /** Gets the module expression `m` in `from m import x`. */ + Expr getModule() { result.asExpr() = im.getModule() } + + override AstNode getChild(int index) { index = 0 and result = this.getModule() } + } + /** A tuple literal. */ additional class TupleExpr extends Expr { private Py::Tuple tuple; @@ -1581,4 +1608,6 @@ Py::AstNode astNodeToPyNode(Ast::AstNode n) { result = n.asStmt() or result = n.asScope() + or + result = n.asPattern() } diff --git a/python/ql/lib/semmle/python/controlflow/internal/Cfg.qll b/python/ql/lib/semmle/python/controlflow/internal/Cfg.qll index 70dfc0b785d..66f36771f48 100644 --- a/python/ql/lib/semmle/python/controlflow/internal/Cfg.qll +++ b/python/ql/lib/semmle/python/controlflow/internal/Cfg.qll @@ -20,6 +20,24 @@ module; private import python as Py private import semmle.python.controlflow.internal.AstNodeImpl as CfgImpl private import codeql.controlflow.SuccessorType +private import codeql.controlflow.BasicBlock as BB + +/** + * A nested sub-module that explicitly implements `BB::CfgSig`, so this + * `Cfg` facade can be passed to parameterised shared modules such as + * `codeql.dataflow.VariableCapture::Flow`. The sub-module + * exposes the *raw* shared-CFG types from `AstNodeImpl.qll` (where the + * signature is satisfied natively), not the facade's wrapped types. + */ +module CfgSigImpl implements BB::CfgSig { + class ControlFlowNode = CfgImpl::ControlFlowNode; + + class BasicBlock = CfgImpl::BasicBlock; + + class EntryBasicBlock = CfgImpl::Cfg::EntryBasicBlock; + + predicate dominatingEdge = CfgImpl::Cfg::dominatingEdge/2; +} /** * Gets the Python AST node corresponding to CFG node `n`, if any. @@ -51,6 +69,11 @@ private predicate isCanonical(CfgImpl::ControlFlowNode n) { n instanceof CfgImpl::ControlFlow::EntryNode or n instanceof CfgImpl::ControlFlow::ExitNode + or + // Annotated exit nodes (normal + abnormal) — needed so that dataflow + // consumers can ask "is this the normal-exit of a scope?" and also + // so that scope-exit synthetic uses in SsaImpl can attach here. + n instanceof CfgImpl::ControlFlow::AnnotatedExitNode } /** @@ -369,6 +392,35 @@ class BasicBlock extends CfgImpl::BasicBlock { or forex(BasicBlock immsucc | immsucc = super.getASuccessor() | immsucc.alwaysReaches(succ)) } + + /** + * Holds if this basic block ends in a node that branches on a boolean + * outcome, and `other` is dominated by the corresponding successor + * for `branch` while not being reachable from the other branch + * without going through this BB. + * + * In other words: any execution that reaches `other` must have just + * evaluated the last node of this BB and taken the `branch` outcome. + * This mirrors the legacy `ConditionBlock.controls(BB, branch)`. + */ + predicate controls(BasicBlock other, boolean branch) { + exists(BasicBlock succ | + branch = true and succ = this.getATrueSuccessor() + or + branch = false and succ = this.getAFalseSuccessor() + | + succ.dominates(other) and + // The other branch must not also reach `other` — otherwise + // `other` is not actually controlled by `branch`. + not exists(BasicBlock otherSucc | + branch = true and otherSucc = this.getAFalseSuccessor() + or + branch = false and otherSucc = this.getATrueSuccessor() + | + otherSucc.reaches(other) + ) + ) + } } // =========================================================================== @@ -734,8 +786,7 @@ class BoolExprNode extends ControlFlowNode { ControlFlowNode getAnOperand() { exists(Py::BoolExpr be | be = toAst(this) and - be.getAValue() = toAst(result) and - result.getBasicBlock().dominates(this.getBasicBlock()) + be.getAValue() = toAst(result) ) } } @@ -766,20 +817,79 @@ class DefinitionNode extends ControlFlowNode { /** Gets the value assigned, if any. */ ControlFlowNode getValue() { - exists(Py::Expr target, Py::Expr value | - target = toAst(this) and - value = toAst(result) and - result.getBasicBlock().dominates(this.getBasicBlock()) - | - // x = value - exists(Py::Assign a | a.getATarget() = target and a.getValue() = value) - or - // x = y = value (nested chained-assign target) - exists(Py::Assign a | a.getATarget().(Py::Tuple).getElt(_) = target and a.getValue() = value) + // For-target: the value is the for-loop's iter expression (which + // is also where `Cfg::ForNode` lives — its `getNode()` returns the + // enclosing `Py::For` statement). Treated specially because there + // is no AST node holding the result of `iter(next(seq))`; we use + // the iter expression's CFG node as the stand-in. + exists(Py::For f | + f.getTarget() = toAst(this) and + toAst(result) = f.getIter() + ) + or + exists(Py::AstNode value | value = assignedValue(toAst(this)) | + toAst(result) = value and + ( + result.getBasicBlock().dominates(this.getBasicBlock()) + or + result.isImport() + or + // The default value for a parameter is evaluated in the same basic block as + // the function definition, but the parameter belongs to the basic block of the + // function, so there is no dominance relationship between the two. + exists(Py::Parameter param | toAst(this) = param.asName()) + ) ) } } +/** + * Gets the AST node that holds the value assigned to `lhs` in a binding + * context. Mirrors `Flow.qll::assigned_value`. + */ +private Py::AstNode assignedValue(Py::Expr lhs) { + // lhs = result + exists(Py::Assign a | a.getATarget() = lhs and result = a.getValue()) + or + // lhs := result + exists(Py::AssignExpr a | a.getTarget() = lhs and result = a.getValue()) + or + // lhs: annotation = result + exists(Py::AnnAssign a | a.getTarget() = lhs and result = a.getValue()) + or + // import result as lhs (also covers plain `import lhs`, where alias.getAsname() = lhs) + exists(Py::Alias a | a.getAsname() = lhs and result = a.getValue()) + or + // lhs += x -> result is the (lhs + x) binary expression + exists(Py::AugAssign a, Py::BinaryExpr b | + b = a.getOperation() and result = b and lhs = b.getLeft() + ) + or + // Nested sequence assign: ..., lhs, ... = ..., result, ... + exists(Py::Assign a | nestedSequenceAssign(a.getATarget(), a.getValue(), lhs, result)) + or + // Parameter default + exists(Py::Parameter param | lhs = param.asName() and result = param.getDefault()) +} + +/** + * Helper for nested sequence assignments such as `(a, b), c = (1, 2), 3`. + */ +private predicate nestedSequenceAssign( + Py::Expr leftParent, Py::Expr rightParent, Py::Expr left, Py::Expr right +) { + exists(int i | + leftParent.(Py::Tuple).getElt(i) = left and rightParent.(Py::Tuple).getElt(i) = right + or + leftParent.(Py::List).getElt(i) = left and rightParent.(Py::List).getElt(i) = right + ) + or + exists(Py::Expr leftMid, Py::Expr rightMid | + nestedSequenceAssign(leftParent, rightParent, leftMid, rightMid) and + nestedSequenceAssign(leftMid, rightMid, left, right) + ) +} + /** A control flow node corresponding to a deletion (`del x`). */ class DeletionNode extends ControlFlowNode { DeletionNode() { this.isDelete() } @@ -789,8 +899,6 @@ class DeletionNode extends ControlFlowNode { class ForNode extends ControlFlowNode { ForNode() { exists(Py::For f | toAst(this) = f.getIter()) } - override Py::For getNode() { exists(Py::For f | toAst(this) = f.getIter() | result = f) } - /** Gets the iterable expression. */ ControlFlowNode getIter() { result = this and result = result // canonical "after" of the iterable @@ -943,8 +1051,15 @@ class DictNode extends ControlFlowNode { /** A control flow node corresponding to an iterable in a `for` loop. */ class IterableNode extends ControlFlowNode { IterableNode() { - exists(Py::For f | toAst(this) = f.getIter()) + this instanceof SequenceNode or - exists(Py::Comp c | toAst(this) = c.getIterable()) + this instanceof SetNode + } + + /** Gets the control flow node for an element of this iterable. */ + ControlFlowNode getAnElement() { + result = this.(SequenceNode).getAnElement() + or + result = this.(SetNode).getAnElement() } } diff --git a/python/ql/lib/semmle/python/dataflow/new/BarrierGuards.qll b/python/ql/lib/semmle/python/dataflow/new/BarrierGuards.qll index 072098991bb..39e8d40fd17 100644 --- a/python/ql/lib/semmle/python/dataflow/new/BarrierGuards.qll +++ b/python/ql/lib/semmle/python/dataflow/new/BarrierGuards.qll @@ -1,11 +1,12 @@ /** Provides commonly used BarrierGuards. */ private import python +private import semmle.python.controlflow.internal.Cfg as Cfg private import semmle.python.dataflow.new.DataFlow -private predicate constCompare(DataFlow::GuardNode g, ControlFlowNode node, boolean branch) { - exists(CompareNode cn | cn = g | - exists(ImmutableLiteral const, Cmpop op, ControlFlowNode c | +private predicate constCompare(DataFlow::GuardNode g, Cfg::ControlFlowNode node, boolean branch) { + exists(Cfg::CompareNode cn | cn = g | + exists(ImmutableLiteral const, Cmpop op, Cfg::ControlFlowNode c | c.getNode() = const and ( op = any(Eq eq) and branch = true @@ -18,7 +19,7 @@ private predicate constCompare(DataFlow::GuardNode g, ControlFlowNode node, bool cn.operands(node, op, c) ) or - exists(NameConstant const, Cmpop op, ControlFlowNode c | + exists(NameConstant const, Cmpop op, Cfg::ControlFlowNode c | c.getNode() = const and ( op = any(Is is_) and branch = true @@ -31,12 +32,12 @@ private predicate constCompare(DataFlow::GuardNode g, ControlFlowNode node, bool cn.operands(node, op, c) ) or - exists(IterableNode const_iterable, Cmpop op | + exists(Cfg::IterableNode const_iterable, Cmpop op | op = any(In in_) and branch = true or op = any(NotIn ni) and branch = false | - forall(ControlFlowNode elem | elem = const_iterable.getAnElement() | + forall(Cfg::ControlFlowNode elem | elem = const_iterable.getAnElement() | elem.getNode() instanceof ImmutableLiteral ) and cn.operands(node, op, const_iterable) diff --git a/python/ql/lib/semmle/python/dataflow/new/SensitiveDataSources.qll b/python/ql/lib/semmle/python/dataflow/new/SensitiveDataSources.qll index 1a32965d08d..644f88e311d 100644 --- a/python/ql/lib/semmle/python/dataflow/new/SensitiveDataSources.qll +++ b/python/ql/lib/semmle/python/dataflow/new/SensitiveDataSources.qll @@ -4,6 +4,7 @@ */ private import python +private import semmle.python.controlflow.internal.Cfg as Cfg private import semmle.python.dataflow.new.DataFlow // Need to import `semmle.python.Frameworks` since frameworks can extend `SensitiveDataSource::Range` private import semmle.python.Frameworks @@ -105,7 +106,7 @@ private module SensitiveDataModeling { or // to cover functions that we don't have the definition for, and where the // reference to the function has not already been marked as being sensitive - this.getFunction().asCfgNode().(NameNode).getId() = sensitiveString(classification) + this.getFunction().asCfgNode().(Cfg::NameNode).getId() = sensitiveString(classification) } override SensitiveDataClassification getClassification() { result = classification } @@ -251,12 +252,12 @@ private module SensitiveDataModeling { SensitiveDataClassification classification; SensitiveVariableAssignment() { - exists(DefinitionNode def | - def.(NameNode).getId() = sensitiveString(classification) and + exists(Cfg::DefinitionNode def | + def.(Cfg::NameNode).getId() = sensitiveString(classification) and ( this.asCfgNode() = def.getValue() or - this.asCfgNode() = def.getValue().(ForNode).getSequence() + this.asCfgNode() = def.getValue().(Cfg::ForNode).getSequence() ) and not this.asExpr() instanceof FunctionExpr and not this.asExpr() instanceof ClassExpr @@ -293,7 +294,7 @@ private module SensitiveDataModeling { SensitiveDataClassification classification; SensitiveSubscript() { - this.asCfgNode().(SubscriptNode).getIndex() = + this.asCfgNode().(Cfg::SubscriptNode).getIndex() = sensitiveLookupStringConst(classification).asCfgNode() } diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/Attributes.qll b/python/ql/lib/semmle/python/dataflow/new/internal/Attributes.qll index 76d2cb11e14..cfdef2a1a90 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/Attributes.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/Attributes.qll @@ -3,6 +3,7 @@ overlay[local] module; private import python +private import semmle.python.controlflow.internal.Cfg as Cfg import DataFlowUtil import DataFlowPublic private import DataFlowPrivate @@ -83,9 +84,9 @@ abstract class AttrWrite extends AttrRef { * ```python * object.attr = value * ``` - * Also gives access to the `value` being written, by extending `DefinitionNode`. + * Also gives access to the `value` being written, by extending `Cfg::DefinitionNode`. */ -private class AttributeAssignmentNode extends DefinitionNode, AttrNode { } +private class AttributeAssignmentNode extends Cfg::DefinitionNode, Cfg::AttrNode { } /** A simple attribute assignment: `object.attr = value`. */ private class AttributeAssignmentAsAttrWrite extends AttrWrite, CfgNode { @@ -131,13 +132,13 @@ private class GlobalAttributeAssignmentAsAttrWrite extends AttrWrite, CfgNode { override string getAttributeName() { result = node.getName() } } -/** Represents `CallNode`s that may refer to calls to built-in functions or classes. */ -private class BuiltInCallNode extends CallNode { +/** Represents `Cfg::CallNode`s that may refer to calls to built-in functions or classes. */ +private class BuiltInCallNode extends Cfg::CallNode { string name; BuiltInCallNode() { // TODO disallow instances where the name of the built-in may refer to an in-scope variable of that name. - exists(NameNode id | + exists(Cfg::NameNode id | name = Builtins::getBuiltinName() and this.getFunction() = id and id.getId() = name and @@ -145,7 +146,7 @@ private class BuiltInCallNode extends CallNode { ) } - /** Gets the name of the built-in function that is called at this `CallNode` */ + /** Gets the name of the built-in function that is called at this `Cfg::CallNode` */ string getBuiltinName() { result = name } } @@ -157,20 +158,20 @@ private class BuiltinAttrCallNode extends BuiltInCallNode { BuiltinAttrCallNode() { name in ["setattr", "getattr", "hasattr", "delattr"] } /** Gets the control flow node for object on which the attribute is accessed. */ - ControlFlowNode getObject() { result in [this.getArg(0), this.getArgByName("object")] } + Cfg::ControlFlowNode getObject() { result in [this.getArg(0), this.getArgByName("object")] } /** * Gets the control flow node for the value that is being written to the attribute. * Only relevant for `setattr` calls. */ - ControlFlowNode getValue() { + Cfg::ControlFlowNode getValue() { // only valid for `setattr` name = "setattr" and result in [this.getArg(2), this.getArgByName("value")] } /** Gets the control flow node that defines the name of the attribute being accessed. */ - ControlFlowNode getName() { result in [this.getArg(1), this.getArgByName("name")] } + Cfg::ControlFlowNode getName() { result in [this.getArg(1), this.getArgByName("name")] } } /** Represents calls to the built-in `setattr`. */ @@ -205,10 +206,10 @@ private class SetAttrCallAsAttrWrite extends AttrWrite, CfgNode { * attr = value * ... * ``` - * Instances of this class correspond to the `NameNode` for `attr`, and also gives access to `value` by - * virtue of being a `DefinitionNode`. + * Instances of this class correspond to the `Cfg::NameNode` for `attr`, and also gives access to `value` by + * virtue of being a `Cfg::DefinitionNode`. */ -private class ClassAttributeAssignmentNode extends DefinitionNode, NameNode { +private class ClassAttributeAssignmentNode extends Cfg::DefinitionNode, Cfg::NameNode { ClassAttributeAssignmentNode() { this.getScope() = any(ClassExpr c).getInnerScope() } } @@ -248,7 +249,7 @@ abstract class AttrRead extends AttrRef, Node, LocalSourceNode { /** A simple attribute read, e.g. `object.attr` */ private class AttributeReadAsAttrRead extends AttrRead, CfgNode { - override AttrNode node; + override Cfg::AttrNode node; AttributeReadAsAttrRead() { node.isLoad() } @@ -285,7 +286,7 @@ private class GetAttrCallAsAttrRead extends AttrRead, CfgNode { * is treated as if it is a read of the attribute `module.attr`, even if `module` is not imported directly. */ private class ModuleAttributeImportAsAttrRead extends AttrRead, CfgNode { - override ImportMemberNode node; + override Cfg::ImportMemberNode node; override Node getObject() { result.asCfgNode() = node.getModule(_) } diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/Builtins.qll b/python/ql/lib/semmle/python/dataflow/new/internal/Builtins.qll index 764af5d9dc5..94c9b486448 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/Builtins.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/Builtins.qll @@ -3,6 +3,7 @@ overlay[local] module; private import python +private import semmle.python.controlflow.internal.Cfg as Cfg private import semmle.python.dataflow.new.DataFlow private import semmle.python.dataflow.new.internal.ImportStar @@ -67,7 +68,7 @@ module Builtins { DataFlow::CfgNode likelyBuiltin(string name) { exists(Module m | result.getNode() = - any(NameNode n | + any(Cfg::NameNode n | possible_builtin_accessed_in_module(n, name, m) and not possible_builtin_defined_in_module(name, m) ) @@ -87,7 +88,7 @@ module Builtins { * Holds if `n` is an access of a global variable called `name` (which is also the name of a * built-in) inside the module `m`. */ - private predicate possible_builtin_accessed_in_module(NameNode n, string name, Module m) { + private predicate possible_builtin_accessed_in_module(Cfg::NameNode n, string name, Module m) { n.isGlobal() and n.isLoad() and name = n.getId() and diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll index fc0bba6b135..ba6d134f83b 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll @@ -25,7 +25,7 @@ * what callable this call might end up targeting. * * Specifically this means that we cannot use type-backtrackers from the function of a - * `CallNode`, since there is no `CallNode` to backtrack from for `func` in the example + * `Cfg::CallNode`, since there is no `Cfg::CallNode` to backtrack from for `func` in the example * above. * * Note: This hasn't been 100% realized yet, so we don't currently expose a predicate to @@ -35,6 +35,7 @@ overlay[local?] module; private import python +private import semmle.python.controlflow.internal.Cfg as Cfg private import DataFlowPublic private import DataFlowPrivate private import FlowSummaryImpl as FlowSummaryImpl @@ -162,7 +163,7 @@ newtype TArgumentPosition = */ TLambdaSelfArgumentPosition() or TPositionalArgumentPosition(int index) { - exists(any(CallNode c).getArg(index)) + exists(any(Cfg::CallNode c).getArg(index)) or // since synthetic calls within a summarized callable could use a unique argument // position, we need to ensure we make these available (these are specified as @@ -174,7 +175,7 @@ newtype TArgumentPosition = index = 0 } or TKeywordArgumentPosition(string name) { - exists(any(CallNode c).getArgByName(name)) + exists(any(Cfg::CallNode c).getArgByName(name)) or // see comment for TPositionalArgumentPosition FlowSummaryImpl::ParsePositions::isParsedKeywordParameterPosition(_, name) @@ -256,9 +257,13 @@ predicate parameterMatch(ParameterPosition ppos, ArgumentPosition apos) { */ overlay[local] predicate isStaticmethod(Function func) { - exists(NameNode id | id.getId() = "staticmethod" and id.isGlobal() | - func.getADecorator() = id.getNode() - ) + // The decorator is *syntactically* a Name "staticmethod" — we don't + // care which variable it resolves to. Even if a class redefines + // `staticmethod`, the binding hasn't happened yet at the decorator + // position, so Python's runtime semantics is "use the builtin". + // Matches legacy ESSA semantics which used `isGlobal()` (i.e. "no + // SSA def reaches this load") at the decorator's `NameNode`. + func.getADecorator().(Name).getId() = "staticmethod" } /** @@ -268,9 +273,7 @@ predicate isStaticmethod(Function func) { */ overlay[local] predicate isClassmethod(Function func) { - exists(NameNode id | id.getId() = "classmethod" and id.isGlobal() | - func.getADecorator() = id.getNode() - ) + func.getADecorator().(Name).getId() = "classmethod" or exists(Class cls | cls.getAMethod() = func and @@ -285,9 +288,7 @@ predicate isClassmethod(Function func) { /** Holds if the function `func` has a `property` decorator. */ overlay[local] predicate hasPropertyDecorator(Function func) { - exists(NameNode id | id.getId() = "property" and id.isGlobal() | - func.getADecorator() = id.getNode() - ) + func.getADecorator().(Name).getId() = "property" } /** @@ -295,10 +296,10 @@ predicate hasPropertyDecorator(Function func) { */ overlay[local] predicate hasContextmanagerDecorator(Function func) { - exists(ControlFlowNode contextmanager | - contextmanager.(NameNode).getId() = "contextmanager" and contextmanager.(NameNode).isGlobal() + exists(Cfg::ControlFlowNode contextmanager | + contextmanager.(Cfg::NameNode).getId() = "contextmanager" and contextmanager.(Cfg::NameNode).isGlobal() or - contextmanager.(AttrNode).getObject("contextmanager").(NameNode).getId() = "contextlib" + contextmanager.(Cfg::AttrNode).getObject("contextmanager").(Cfg::NameNode).getId() = "contextlib" | func.getADecorator() = contextmanager.getNode() ) @@ -314,10 +315,10 @@ predicate hasContextmanagerDecorator(Function func) { */ overlay[local] private predicate hasOverloadDecorator(Function func) { - exists(ControlFlowNode overload | - overload.(NameNode).getId() = "overload" and overload.(NameNode).isGlobal() + exists(Cfg::ControlFlowNode overload | + overload.(Cfg::NameNode).getId() = "overload" and overload.(Cfg::NameNode).isGlobal() or - overload.(AttrNode).getObject("overload").(NameNode).isGlobal() + overload.(Cfg::AttrNode).getObject("overload").(Cfg::NameNode).isGlobal() | func.getADecorator() = overload.getNode() ) @@ -536,7 +537,7 @@ class LibraryCallableValue extends DataFlowCallable, TLibraryCallable { // ============================================================================= /** Gets a call to `type`. */ private CallCfgNode getTypeCall() { - exists(NameNode id | id.getId() = "type" and id.isGlobal() | + exists(Cfg::NameNode id | id.getId() = "type" and id.isGlobal() | result.getFunction().asCfgNode() = id ) } @@ -548,7 +549,7 @@ private CallCfgNode getSuperCall() { // link below), but otherwise only 2 edgecases. Overall it seems ok to ignore this complexity. // // https://github.com/python/cpython/blob/18b1782192f85bd26db89f5bc850f8bee4247c1a/Lib/unittest/mock.py#L48-L50 - exists(NameNode id | id.getId() = "super" and id.isGlobal() | + exists(Cfg::NameNode id | id.getId() = "super" and id.isGlobal() | result.getFunction().asCfgNode() = id ) } @@ -1034,7 +1035,7 @@ private module MethodCalls { */ pragma[nomagic] private predicate directCall( - CallNode call, Function target, string functionName, Class cls, AttrRead attr, Node self + Cfg::CallNode call, Function target, string functionName, Class cls, AttrRead attr, Node self ) { target = findFunctionAccordingToMroKnownStartingClass(cls, functionName) and directCall_join(call, functionName, cls, attr, self) @@ -1043,7 +1044,7 @@ private module MethodCalls { /** Extracted to give good join order */ pragma[nomagic] private predicate directCall_join( - CallNode call, string functionName, Class cls, AttrRead attr, Node self + Cfg::CallNode call, string functionName, Class cls, AttrRead attr, Node self ) { call.getFunction() = attrReadTracker(attr).asCfgNode() and attr.accesses(self, functionName) and @@ -1060,7 +1061,7 @@ private module MethodCalls { */ pragma[nomagic] private predicate callWithinMethodImplicitSelfOrCls( - CallNode call, Function target, string functionName, Class classWithMethod, AttrRead attr, + Cfg::CallNode call, Function target, string functionName, Class classWithMethod, AttrRead attr, Node self ) { target = findFunctionAccordingToMro(getADirectSubclass*(classWithMethod), functionName) and @@ -1070,7 +1071,7 @@ private module MethodCalls { /** Extracted to give good join order */ pragma[nomagic] private predicate callWithinMethodImplicitSelfOrCls_join( - CallNode call, string functionName, Class classWithMethod, AttrRead attr, Node self + Cfg::CallNode call, string functionName, Class classWithMethod, AttrRead attr, Node self ) { call.getFunction() = attrReadTracker(attr).asCfgNode() and attr.accesses(self, functionName) and @@ -1082,7 +1083,7 @@ private module MethodCalls { * resolve the call to a known target (since the only super class might be the * builtin `object`, so we never have the implementation of `__new__` in the DB). */ - predicate fromSuperNewCall(CallNode call, Class classUsedInSuper, AttrRead attr, Node self) { + predicate fromSuperNewCall(Cfg::CallNode call, Class classUsedInSuper, AttrRead attr, Node self) { fromSuper_join(call, "__new__", classUsedInSuper, attr, self) and self in [classTracker(_), clsArgumentTracker(_)] } @@ -1104,7 +1105,7 @@ private module MethodCalls { */ pragma[nomagic] predicate fromSuper( - CallNode call, Function target, string functionName, Class classUsedInSuper, AttrRead attr, + Cfg::CallNode call, Function target, string functionName, Class classUsedInSuper, AttrRead attr, Node self ) { target = findFunctionAccordingToMro(getNextClassInMro(classUsedInSuper), functionName) and @@ -1114,7 +1115,7 @@ private module MethodCalls { /** Extracted to give good join order */ pragma[nomagic] private predicate fromSuper_join( - CallNode call, string functionName, Class classUsedInSuper, AttrRead attr, Node self + Cfg::CallNode call, string functionName, Class classUsedInSuper, AttrRead attr, Node self ) { call.getFunction() = attrReadTracker(attr).asCfgNode() and ( @@ -1133,7 +1134,7 @@ private module MethodCalls { ) } - predicate resolveMethodCall(CallNode call, Function target, CallType type, Node self) { + predicate resolveMethodCall(Cfg::CallNode call, Function target, CallType type, Node self) { ( directCall(call, target, _, _, _, self) or @@ -1180,7 +1181,7 @@ import MethodCalls * NOTE: We have this predicate mostly to be able to compare with old point-to * call-graph resolution. So it could be removed in the future. */ -predicate resolveClassCall(CallNode call, Class cls) { +predicate resolveClassCall(Cfg::CallNode call, Class cls) { call.getFunction() = classTracker(cls).asCfgNode() or // `cls()` inside a classmethod (which also contains `type(self)()` inside a method) @@ -1210,7 +1211,7 @@ Function invokedFunctionFromClassConstruction(Class cls, string funcName) { * * See https://docs.python.org/3/reference/datamodel.html#object.__call__ */ -predicate resolveClassInstanceCall(CallNode call, Function target, Node self) { +predicate resolveClassInstanceCall(Cfg::CallNode call, Function target, Node self) { exists(Class cls | call.getFunction() = classInstanceTracker(cls).asCfgNode() and target = findFunctionAccordingToMroKnownStartingClass(cls, "__call__") @@ -1229,7 +1230,7 @@ predicate resolveClassInstanceCall(CallNode call, Function target, Node self) { * Holds if `call` is a call to the `target`, with call-type `type`. */ cached -predicate resolveCall(CallNode call, Function target, CallType type) { +predicate resolveCall(Cfg::CallNode call, Function target, CallType type) { Stages::DataFlow::ref() and ( type instanceof CallTypePlainFunction and @@ -1254,11 +1255,11 @@ predicate resolveCall(CallNode call, Function target, CallType type) { // ============================================================================= /** * Holds if the argument of `call` at position `apos` is `arg`. This is just a helper - * predicate that maps ArgumentPositions to the arguments of the underlying `CallNode`. + * predicate that maps ArgumentPositions to the arguments of the underlying `Cfg::CallNode`. */ overlay[local] cached -predicate normalCallArg(CallNode call, Node arg, ArgumentPosition apos) { +predicate normalCallArg(Cfg::CallNode call, Node arg, ArgumentPosition apos) { exists(int index | apos.isPositional(index) and arg.asCfgNode() = call.getArg(index) @@ -1273,7 +1274,7 @@ predicate normalCallArg(CallNode call, Node arg, ArgumentPosition apos) { exists(int index | apos.isStarArgs(index) and arg.asCfgNode() = call.getStarArg() and - // since `CallNode.getArg` doesn't include `*args`, we need to drop to the AST level + // since `Cfg::CallNode.getArg` doesn't include `*args`, we need to drop to the AST level // to get the index. Notice that we only use the AST for getting the index, so we // don't need to check for dominance in regards to splitting. call.getStarArg().getNode() = call.getNode().getPositionalArg(index).(Starred).getValue() @@ -1347,7 +1348,7 @@ predicate normalCallArg(CallNode call, Node arg, ArgumentPosition apos) { * translated into `l.clear()`, and we can still have use-use flow. */ cached -predicate getCallArg(CallNode call, Function target, CallType type, Node arg, ArgumentPosition apos) { +predicate getCallArg(Cfg::CallNode call, Function target, CallType type, Node arg, ArgumentPosition apos) { Stages::DataFlow::ref() and resolveCall(call, target, type) and ( @@ -1440,10 +1441,10 @@ private predicate sameEnclosingCallable(Node node1, Node node2) { // DataFlowCall // ============================================================================= newtype TDataFlowCall = - TNormalCall(CallNode call, Function target, CallType type) { resolveCall(call, target, type) } or + TNormalCall(Cfg::CallNode call, Function target, CallType type) { resolveCall(call, target, type) } or /** A call to the generated function inside a comprehension */ TComprehensionCall(Comp c) or - TPotentialLibraryCall(CallNode call) or + TPotentialLibraryCall(Cfg::CallNode call) or /** A synthesized call inside a summarized callable */ TSummaryCall( FlowSummaryImpl::Public::SummarizedCallable c, FlowSummaryImpl::Private::SummaryNode receiver @@ -1463,7 +1464,7 @@ abstract class DataFlowCall extends TDataFlowCall { abstract ArgumentNode getArgument(ArgumentPosition apos); /** Get the control flow node representing this call, if any. */ - abstract ControlFlowNode getNode(); + abstract Cfg::ControlFlowNode getNode(); /** Gets the enclosing callable of this call. */ DataFlowCallable getEnclosingCallable() { result = getCallableScope(this.getScope()) } @@ -1494,28 +1495,28 @@ abstract class ExtractedDataFlowCall extends DataFlowCall { } /** - * A resolved call in source code with an underlying `CallNode`. + * A resolved call in source code with an underlying `Cfg::CallNode`. * * This is considered normal, compared with special calls such as `obj[0]` calling the * `__getitem__` method on the object. However, this also includes calls that go to the * `__call__` special method. */ class NormalCall extends ExtractedDataFlowCall, TNormalCall { - CallNode call; + Cfg::CallNode call; Function target; CallType type; NormalCall() { this = TNormalCall(call, target, type) } override string toString() { - // note: if we used toString directly on the CallNode we would get - // `ControlFlowNode for func()` - // but the `ControlFlowNode` part is just clutter, so we go directly to the AST node + // note: if we used toString directly on the Cfg::CallNode we would get + // `Cfg::ControlFlowNode for func()` + // but the `Cfg::ControlFlowNode` part is just clutter, so we go directly to the AST node // instead. result = call.getNode().toString() } - override ControlFlowNode getNode() { result = call } + override Cfg::ControlFlowNode getNode() { result = call } override Scope getScope() { result = call.getScope() } @@ -1543,7 +1544,7 @@ class ComprehensionCall extends ExtractedDataFlowCall, TComprehensionCall { override string toString() { result = "comprehension call" } - override ControlFlowNode getNode() { result.getNode() = c } + override Cfg::ControlFlowNode getNode() { result.getNode() = c } override Scope getScope() { result = c.getScope() } @@ -1566,14 +1567,14 @@ class ComprehensionCall extends ExtractedDataFlowCall, TComprehensionCall { * in this class. */ class PotentialLibraryCall extends ExtractedDataFlowCall, TPotentialLibraryCall { - CallNode call; + Cfg::CallNode call; PotentialLibraryCall() { this = TPotentialLibraryCall(call) } override string toString() { - // note: if we used toString directly on the CallNode we would get - // `ControlFlowNode for func()` - // but the `ControlFlowNode` part is just clutter, so we go directly to the AST node + // note: if we used toString directly on the Cfg::CallNode we would get + // `Cfg::ControlFlowNode for func()` + // but the `Cfg::ControlFlowNode` part is just clutter, so we go directly to the AST node // instead. result = call.getNode().toString() } @@ -1590,10 +1591,10 @@ class PotentialLibraryCall extends ExtractedDataFlowCall, TPotentialLibraryCall // potential self argument, from `foo.bar()` -- note that this could also just be a // module reference, but we really don't have a good way of knowing :| apos.isSelf() and - result.asCfgNode() = call.getFunction().(AttrNode).getObject() + result.asCfgNode() = call.getFunction().(Cfg::AttrNode).getObject() } - override ControlFlowNode getNode() { result = call } + override Cfg::ControlFlowNode getNode() { result = call } override Scope getScope() { result = call.getScope() } } @@ -1625,7 +1626,7 @@ class SummaryCall extends DataFlowCall, TSummaryCall { override ArgumentNode getArgument(ArgumentPosition apos) { none() } - override ControlFlowNode getNode() { none() } + override Cfg::ControlFlowNode getNode() { none() } override string toString() { result = "[summary] call to " + receiver + " in " + c } @@ -1767,12 +1768,12 @@ private class SummaryPostUpdateNode extends FlowSummaryNode, PostUpdateNodeImpl * This is used for tracking flow through captured variables. */ class SynthCapturedVariablesArgumentNode extends Node, TSynthCapturedVariablesArgumentNode { - ControlFlowNode callable; + Cfg::ControlFlowNode callable; SynthCapturedVariablesArgumentNode() { this = TSynthCapturedVariablesArgumentNode(callable) } - /** Gets the `CallNode` corresponding to this captured variables argument node. */ - CallNode getCallNode() { result.getFunction() = callable } + /** Gets the `Cfg::CallNode` corresponding to this captured variables argument node. */ + Cfg::CallNode getCallNode() { result.getFunction() = callable } /** Gets the `CfgNode` that corresponds to this synthetic node. */ CfgNode getUnderlyingNode() { result.asCfgNode() = callable } @@ -1790,7 +1791,7 @@ class CapturedVariablesArgumentNodeAsArgumentNode extends ArgumentNode, { overlay[global] override predicate argumentOf(DataFlowCall call, ArgumentPosition pos) { - exists(CallNode callNode | callNode = this.getCallNode() | + exists(Cfg::CallNode callNode | callNode = this.getCallNode() | callNode = call.getNode() and exists(Function target | resolveCall(callNode, target, _) | target = any(VariableCapture::CapturedVariable v).getACapturingScope() @@ -1804,7 +1805,7 @@ class CapturedVariablesArgumentNodeAsArgumentNode extends ArgumentNode, class SynthCapturedVariablesArgumentPostUpdateNode extends PostUpdateNodeImpl, TSynthCapturedVariablesArgumentPostUpdateNode { - ControlFlowNode callable; + Cfg::ControlFlowNode callable; SynthCapturedVariablesArgumentPostUpdateNode() { this = TSynthCapturedVariablesArgumentPostUpdateNode(callable) diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll index 67963e7cd38..5d1f6417255 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll @@ -2,8 +2,9 @@ overlay[local?] module; private import python +private import semmle.python.controlflow.internal.Cfg as Cfg private import DataFlowPublic -private import semmle.python.essa.SsaCompute +private import semmle.python.dataflow.new.internal.SsaImpl as SsaImpl private import semmle.python.dataflow.new.internal.ImportResolution private import FlowSummaryImpl as FlowSummaryImpl private import semmle.python.frameworks.data.ModelsAsData @@ -43,13 +44,23 @@ predicate isArgumentNode(ArgumentNode arg, DataFlowCall c, ArgumentPosition pos) // Nodes //-------- overlay[local] -predicate isExpressionNode(ControlFlowNode node) { node.getNode() instanceof Expr } +predicate isExpressionNode(Cfg::ControlFlowNode node) { + node.getNode() instanceof Expr + or + // `Cfg::ForNode` wraps a `For` statement's iter position, but + // overrides `.getNode()` to return the `Py::For` statement (for + // legacy parity). The underlying AST is still an `Expr` (the iter + // expression); we want a dataflow node here so that for-loop + // content reads (`for y in l`) have a source expression node to + // read content from. + node instanceof Cfg::ForNode +} // ============================================================================= // SyntheticPreUpdateNode // ============================================================================= class SyntheticPreUpdateNode extends Node, TSyntheticPreUpdateNode { - CallNode node; + Cfg::CallNode node; SyntheticPreUpdateNode() { this = TSyntheticPreUpdateNode(node) } @@ -151,7 +162,7 @@ predicate synthStarArgsElementParameterNodeStoreStep( * been passed in a `**kwargs` argument. */ class SynthDictSplatArgumentNode extends Node, TSynthDictSplatArgumentNode { - CallNode node; + Cfg::CallNode node; SynthDictSplatArgumentNode() { this = TSynthDictSplatArgumentNode(node) } @@ -165,7 +176,7 @@ class SynthDictSplatArgumentNode extends Node, TSynthDictSplatArgumentNode { private predicate synthDictSplatArgumentNodeStoreStep( ArgumentNode nodeFrom, DictionaryElementContent c, SynthDictSplatArgumentNode nodeTo ) { - exists(string name, CallNode call, ArgumentPosition keywordPos | + exists(string name, Cfg::CallNode call, ArgumentPosition keywordPos | nodeTo = TSynthDictSplatArgumentNode(call) and getCallArg(call, _, _, nodeFrom, keywordPos) and keywordPos.isKeyword(name) and @@ -289,7 +300,7 @@ abstract class PostUpdateNodeImpl extends Node { * Synthetic post-update nodes for synthetic nodes need to be listed one by one. */ class SyntheticPostUpdateNode extends PostUpdateNodeImpl, TSyntheticPostUpdateNode { - ControlFlowNode node; + Cfg::ControlFlowNode node; SyntheticPostUpdateNode() { this = TSyntheticPostUpdateNode(node) } @@ -333,16 +344,22 @@ module LocalFlow { // `x = f(42)` // nodeFrom is `f(42)` // nodeTo is `x` - exists(AssignmentDefinition def | + // + // We use the CFG-level `DefinitionNode.getValue()` directly rather + // than going through SSA, because the new SSA library prunes write + // definitions that have no subsequent read in the same scope (e.g. + // a module-level `def f():` whose `f` is only read inside other + // functions). The CFG-level link is unconditional. + exists(Cfg::DefinitionNode def | nodeFrom.(CfgNode).getNode() = def.getValue() and - nodeTo.(CfgNode).getNode() = def.getDefiningNode() + nodeTo.(CfgNode).getNode() = def ) or // With definition // `with f(42) as x:` // nodeFrom is `f(42)` // nodeTo is `x` - exists(With with, ControlFlowNode contextManager, WithDefinition withDef, ControlFlowNode var | + exists(With with, Cfg::ControlFlowNode contextManager, SsaImpl::WithDefinition withDef, Cfg::ControlFlowNode var | var = withDef.getDefiningNode() | nodeFrom.(CfgNode).getNode() = contextManager and @@ -361,13 +378,13 @@ module LocalFlow { predicate expressionFlowStep(Node nodeFrom, Node nodeTo) { // If expressions - nodeFrom.asCfgNode() = nodeTo.asCfgNode().(IfExprNode).getAnOperand() + nodeFrom.asCfgNode() = nodeTo.asCfgNode().(Cfg::IfExprNode).getAnOperand() or // Assignment expressions - nodeFrom.asCfgNode() = nodeTo.asCfgNode().(AssignmentExprNode).getValue() + nodeFrom.asCfgNode() = nodeTo.asCfgNode().(Cfg::AssignmentExprNode).getValue() or // boolean inline expressions such as `x or y` or `x and y` - nodeFrom.asCfgNode() = nodeTo.asCfgNode().(BoolExprNode).getAnOperand() + nodeFrom.asCfgNode() = nodeTo.asCfgNode().(Cfg::BoolExprNode).getAnOperand() or // Flow inside an unpacking assignment iterableUnpackingFlowStep(nodeFrom, nodeTo) @@ -376,12 +393,12 @@ module LocalFlow { matchFlowStep(nodeFrom, nodeTo) } - predicate useToNextUse(NameNode nodeFrom, NameNode nodeTo) { - AdjacentUses::adjacentUseUse(nodeFrom, nodeTo) + predicate useToNextUse(Cfg::NameNode nodeFrom, Cfg::NameNode nodeTo) { + SsaImpl::AdjacentUses::adjacentUseUse(nodeFrom, nodeTo) } - predicate defToFirstUse(EssaVariable var, NameNode nodeTo) { - AdjacentUses::firstUse(var.getDefinition(), nodeTo) + predicate defToFirstUse(SsaImpl::EssaVariable var, Cfg::NameNode nodeTo) { + SsaImpl::AdjacentUses::firstUse(var.getDefinition(), nodeTo) } predicate useUseFlowStep(Node nodeFrom, Node nodeTo) { @@ -390,12 +407,12 @@ module LocalFlow { // `x = f(y)` // nodeFrom is `y` on first line // nodeTo is `y` on second line - exists(EssaDefinition def | - nodeFrom.(CfgNode).getNode() = def.(EssaNodeDefinition).getDefiningNode() + exists(SsaImpl::EssaDefinition def | + nodeFrom.(CfgNode).getNode() = def.(SsaImpl::EssaNodeDefinition).getDefiningNode() or nodeFrom.(ScopeEntryDefinitionNode).getDefinition() = def | - AdjacentUses::firstUse(def, nodeTo.(CfgNode).getNode()) + SsaImpl::AdjacentUses::firstUse(def, nodeTo.(CfgNode).getNode()) ) or // Next use after use @@ -557,9 +574,9 @@ predicate runtimeJumpStep(Node nodeFrom, Node nodeTo) { // a parameter with a default value, since the parameter will be in the scope of the // function, while the default value itself will be in the scope that _defines_ the // function. - exists(ParameterDefinition param | + exists(SsaImpl::ParameterDefinition param | // note: we go to the _control-flow node_ of the parameter, and not the ESSA node of the parameter, since for type-tracking, the ESSA node is not a LocalSourceNode, so we would get in trouble. - nodeFrom.asCfgNode() = param.getDefault() and + nodeFrom.asCfgNode().getNode() = param.getParameter().(Parameter).getDefault() and nodeTo.asCfgNode() = param.getDefiningNode() ) or @@ -663,7 +680,7 @@ predicate neverSkipInPathGraph(Node n) { // ``` // we would end up saying that the path MUST not skip the x in `y = x`, which is just // annoying and doesn't help the path explanation become clearer. - n.asCfgNode() = any(EssaNodeDefinition def).getDefiningNode() + n.asCfgNode() = any(SsaImpl::EssaNodeDefinition def).getDefiningNode() } /** @@ -872,7 +889,7 @@ predicate listStoreStep(CfgNode nodeFrom, ListElementContent c, CfgNode nodeTo) // nodeFrom is `42`, cfg node // nodeTo is the list, `[..., 42, ...]`, cfg node // c denotes element of list - nodeTo.getNode().(ListNode).getAnElement() = nodeFrom.getNode() and + nodeTo.getNode().(Cfg::ListNode).getAnElement() = nodeFrom.getNode() and not nodeTo.getNode() instanceof UnpackingAssignmentSequenceTarget and // Suppress unused variable warning c = c @@ -885,7 +902,7 @@ predicate setStoreStep(CfgNode nodeFrom, SetElementContent c, CfgNode nodeTo) { // nodeFrom is `42`, cfg node // nodeTo is the set, `{..., 42, ...}`, cfg node // c denotes element of list - nodeTo.getNode().(SetNode).getAnElement() = nodeFrom.getNode() and + nodeTo.getNode().(Cfg::SetNode).getAnElement() = nodeFrom.getNode() and // Suppress unused variable warning c = c } @@ -898,7 +915,7 @@ predicate tupleStoreStep(CfgNode nodeFrom, TupleElementContent c, CfgNode nodeTo // nodeTo is the tuple, `(..., 42, ...)`, cfg node // c denotes element of tuple and index of nodeFrom exists(int n | - nodeTo.getNode().(TupleNode).getElement(n) = nodeFrom.getNode() and + nodeTo.getNode().(Cfg::TupleNode).getElement(n) = nodeFrom.getNode() and not nodeTo.getNode() instanceof UnpackingAssignmentSequenceTarget and c.getIndex() = n ) @@ -912,7 +929,7 @@ predicate dictStoreStep(CfgNode nodeFrom, DictionaryElementContent c, Node nodeT // nodeTo is the dict, `{..., "key" = 42, ...}`, cfg node // c denotes element of dictionary and the key `"key"` exists(KeyValuePair item | - item = nodeTo.asCfgNode().(DictNode).getNode().(Dict).getAnItem() and + item = nodeTo.asCfgNode().(Cfg::DictNode).getNode().(Dict).getAnItem() and nodeFrom.getNode().getNode() = item.getValue() and c.getKey() = item.getKey().(StringLiteral).getS() ) @@ -927,9 +944,9 @@ predicate dictStoreStep(CfgNode nodeFrom, DictionaryElementContent c, Node nodeT private predicate moreDictStoreSteps(CfgNode nodeFrom, DictionaryElementContent c, Node nodeTo) { // NOTE: It's important to add logic to the newtype definition of // DictionaryElementContent if you add new cases here. - exists(SubscriptNode subscript | + exists(Cfg::SubscriptNode subscript | nodeTo.(PostUpdateNode).getPreUpdateNode().asCfgNode() = subscript.getObject() and - nodeFrom.asCfgNode() = subscript.(DefinitionNode).getValue() and + nodeFrom.asCfgNode() = subscript.(Cfg::DefinitionNode).getValue() and c.getKey() = subscript.getIndex().getNode().(StringLiteral).getText() ) or @@ -942,8 +959,8 @@ private predicate moreDictStoreSteps(CfgNode nodeFrom, DictionaryElementContent } predicate dictClearStep(Node node, DictionaryElementContent c) { - exists(SubscriptNode subscript | - subscript instanceof DefinitionNode and + exists(Cfg::SubscriptNode subscript | + subscript instanceof Cfg::DefinitionNode and node.asCfgNode() = subscript.getObject() and c.getKey() = subscript.getIndex().getNode().(StringLiteral).getText() ) @@ -1018,7 +1035,7 @@ predicate subscriptReadStep(CfgNode nodeFrom, Content c, CfgNode nodeTo) { // nodeFrom is `l`, cfg node // nodeTo is `l[3]`, cfg node // c is compatible with 3 - nodeFrom.getNode() = nodeTo.getNode().(SubscriptNode).getObject() and + nodeFrom.getNode() = nodeTo.getNode().(Cfg::SubscriptNode).getObject() and ( c instanceof ListElementContent or @@ -1027,10 +1044,10 @@ predicate subscriptReadStep(CfgNode nodeFrom, Content c, CfgNode nodeTo) { c instanceof DictionaryElementAnyContent or c.(TupleElementContent).getIndex() = - nodeTo.getNode().(SubscriptNode).getIndex().getNode().(IntegerLiteral).getValue() + nodeTo.getNode().(Cfg::SubscriptNode).getIndex().getNode().(IntegerLiteral).getValue() or c.(DictionaryElementContent).getKey() = - nodeTo.getNode().(SubscriptNode).getIndex().getNode().(StringLiteral).getS() + nodeTo.getNode().(Cfg::SubscriptNode).getIndex().getNode().(StringLiteral).getS() ) } diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll index a9d73fe0527..8ed76588004 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll @@ -5,11 +5,12 @@ overlay[local] module; private import python +private import semmle.python.controlflow.internal.Cfg as Cfg private import DataFlowPrivate import semmle.python.dataflow.new.TypeTracking import Attributes import LocalSources -private import semmle.python.essa.SsaCompute +private import semmle.python.dataflow.new.internal.SsaImpl as SsaImpl private import semmle.python.dataflow.new.internal.ImportStar private import semmle.python.frameworks.data.ModelsAsData private import FlowSummaryImpl as FlowSummaryImpl @@ -27,7 +28,7 @@ private import semmle.python.frameworks.data.ModelsAsData overlay[local] newtype TNode = /** A node corresponding to a control flow node. */ - TCfgNode(ControlFlowNode node) { + TCfgNode(Cfg::ControlFlowNode node) { isExpressionNode(node) or node.getNode() instanceof Pattern @@ -36,7 +37,7 @@ newtype TNode = * A node corresponding to a scope entry definition. That is, the value of a variable * as it enters a scope. */ - TScopeEntryDefinitionNode(ScopeEntryDefinition def) { not def.getScope() instanceof Module } or + TScopeEntryDefinitionNode(SsaImpl::ScopeEntryDefinition def) { not def.getScope() instanceof Module } or /** * A synthetic node representing the value of an object before a state change. * @@ -47,13 +48,13 @@ newtype TNode = // NOTE: since we can't rely on the call graph, but we want to have synthetic // pre-update nodes for class calls, we end up getting synthetic pre-update nodes for // ALL calls :| - TSyntheticPreUpdateNode(CallNode call) or + TSyntheticPreUpdateNode(Cfg::CallNode call) or /** * A synthetic node representing the value of an object after a state change. * See QLDoc for `PostUpdateNode`. */ - TSyntheticPostUpdateNode(ControlFlowNode node) { - exists(CallNode call | + TSyntheticPostUpdateNode(Cfg::ControlFlowNode node) { + exists(Cfg::CallNode call | node = call.getArg(_) or node = call.getArgByName(_) @@ -62,12 +63,12 @@ newtype TNode = node = call.getFunction() ) or - node = any(AttrNode a).getObject() + node = any(Cfg::AttrNode a).getObject() or - node = any(SubscriptNode s).getObject() + node = any(Cfg::SubscriptNode s).getObject() or // self parameter when used implicitly in `super()` - exists(Class cls, Function func, ParameterDefinition def | + exists(Class cls, Function func, SsaImpl::ParameterDefinition def | func = cls.getAMethod() and not isStaticmethod(func) and // this matches what we do in ExtractedParameterNode @@ -112,7 +113,7 @@ newtype TNode = exists(ParameterPosition ppos | ppos.isStarArgs(_) | exists(callable.getParameter(ppos))) } or /** A synthetic node to capture keyword arguments that are passed to a `**kwargs` parameter. */ - TSynthDictSplatArgumentNode(CallNode call) { exists(call.getArgByName(_)) } or + TSynthDictSplatArgumentNode(Cfg::CallNode call) { exists(call.getArgByName(_)) } or /** A synthetic node to allow flow to keyword parameters from a `**kwargs` argument. */ TSynthDictSplatParameterNode(DataFlowCallable callable) { exists(ParameterPosition ppos | ppos.isKeyword(_) | exists(callable.getParameter(ppos))) @@ -128,15 +129,15 @@ newtype TNode = * A synthetic node representing the values of the variables captured * by the callable being called. */ - TSynthCapturedVariablesArgumentNode(ControlFlowNode callable) { - callable = any(CallNode c).getFunction() + TSynthCapturedVariablesArgumentNode(Cfg::ControlFlowNode callable) { + callable = any(Cfg::CallNode c).getFunction() } or /** * A synthetic node representing the values of the variables captured * by the callable being called, after the output has been computed. */ - TSynthCapturedVariablesArgumentPostUpdateNode(ControlFlowNode callable) { - callable = any(CallNode c).getFunction() + TSynthCapturedVariablesArgumentPostUpdateNode(Cfg::ControlFlowNode callable) { + callable = any(Cfg::CallNode c).getFunction() } or /** A synthetic node representing the values of variables captured by a comprehension. */ TSynthCompCapturedVariablesArgumentNode(Comp comp) { @@ -194,7 +195,7 @@ class Node extends TNode { } /** Gets the control-flow node corresponding to this node, if any. */ - ControlFlowNode asCfgNode() { none() } + Cfg::ControlFlowNode asCfgNode() { none() } /** Gets the expression corresponding to this node, if any. */ Expr asExpr() { none() } @@ -207,14 +208,14 @@ class Node extends TNode { /** A data-flow node corresponding to a control-flow node. */ class CfgNode extends Node, TCfgNode { - ControlFlowNode node; + Cfg::ControlFlowNode node; CfgNode() { this = TCfgNode(node) } - /** Gets the `ControlFlowNode` represented by this data-flow node. */ - ControlFlowNode getNode() { result = node } + /** Gets the `Cfg::ControlFlowNode` represented by this data-flow node. */ + Cfg::ControlFlowNode getNode() { result = node } - override ControlFlowNode asCfgNode() { result = node } + override Cfg::ControlFlowNode asCfgNode() { result = node } /** Gets a textual representation of this element. */ override string toString() { result = node.toString() } @@ -224,9 +225,9 @@ class CfgNode extends Node, TCfgNode { override Location getLocation() { result = node.getLocation() } } -/** A data-flow node corresponding to a `CallNode` in the control-flow graph. */ +/** A data-flow node corresponding to a `Cfg::CallNode` in the control-flow graph. */ class CallCfgNode extends CfgNode, LocalSourceNode { - override CallNode node; + override Cfg::CallNode node; /** * Gets the data-flow node for the function component of the call corresponding to this data-flow @@ -307,15 +308,15 @@ ExprNode exprNode(DataFlowExpr e) { result.getNode().getNode() = e } * as it enters a scope. */ class ScopeEntryDefinitionNode extends Node, TScopeEntryDefinitionNode { - ScopeEntryDefinition def; + SsaImpl::ScopeEntryDefinition def; ScopeEntryDefinitionNode() { this = TScopeEntryDefinitionNode(def) } - /** Gets the `ScopeEntryDefinition` associated with this node. */ - ScopeEntryDefinition getDefinition() { result = def } + /** Gets the `SsaImpl::ScopeEntryDefinition` associated with this node. */ + SsaImpl::ScopeEntryDefinition getDefinition() { result = def } /** Gets the source variable represented by this node. */ - SsaSourceVariable getVariable() { result = def.getSourceVariable() } + SsaImpl::SsaSourceVariable getVariable() { result = def.getSourceVariable() } override Location getLocation() { result = def.getLocation() } @@ -337,7 +338,7 @@ class ParameterNode extends Node instanceof ParameterNodeImpl { /** A parameter node found in the source code (not in a summary). */ class ExtractedParameterNode extends ParameterNodeImpl, CfgNode { //, LocalSourceNode { - ParameterDefinition def; + SsaImpl::ParameterDefinition def; ExtractedParameterNode() { node = def.getDefiningNode() } @@ -368,10 +369,10 @@ Node getCallArgApproximation() { exists(Class c | result.asExpr() = c.getAMethod().getArg(0)) or // the object part of an attribute expression (which might be a bound method) - result.asCfgNode() = any(AttrNode a).getObject() + result.asCfgNode() = any(Cfg::AttrNode a).getObject() or // the function part of any call - result.asCfgNode() = any(CallNode c).getFunction() + result.asCfgNode() = any(Cfg::CallNode c).getFunction() } /** Gets the extracted argument nodes that do not rely on `getCallArg`. */ @@ -380,7 +381,7 @@ private Node implicitArgumentNode() { normalCallArg(_, result, _) or // and self arguments - result.asCfgNode() = any(CallNode c).getFunction().(AttrNode).getObject() + result.asCfgNode() = any(Cfg::CallNode c).getFunction().(Cfg::AttrNode).getObject() or // for comprehensions, we allow the synthetic `iterable` argument result.asExpr() = any(Comp c).getIterable() @@ -489,17 +490,20 @@ class ModuleVariableNode extends Node, TModuleVariableNode { not result.getScope() = mod } - /** Gets an `EssaNode` that corresponds to an assignment of this global variable. */ + /** Gets a CFG node that corresponds to an assignment of this global variable. */ Node getAWrite() { - any(EssaNodeDefinition def).definedBy(var, result.asCfgNode().(DefinitionNode)) + exists(Cfg::NameNode n | + n.defines(var) and + result.asCfgNode() = n + ) } /** Gets the possible values of the variable at the end of import time */ CfgNode getADefiningWrite() { - exists(SsaVariable def | - def = any(SsaVariable ssa_var).getAnUltimateDefinition() and - def.getDefinition() = result.asCfgNode() and - def.getVariable() = var + exists(SsaImpl::EssaVariable def | + def = any(SsaImpl::EssaVariable ssa_var).getAnUltimateDefinition() and + def.getDefinition().(SsaImpl::EssaNodeDefinition).getDefiningNode() = result.asCfgNode() and + def.getSourceVariable().getVariable() = var ) } @@ -516,7 +520,7 @@ private ModuleVariableNode import_star_read(Node n) { overlay[global] pragma[nomagic] private predicate resolved_import_star_module(Module m, string name, Node n) { - exists(NameNode nn | nn = n.asCfgNode() | + exists(Cfg::NameNode nn | nn = n.asCfgNode() | ImportStar::importStarResolvesTo(pragma[only_bind_into](nn), m) and nn.getId() = name ) @@ -574,88 +578,106 @@ class StarPatternElementNode extends Node, TStarPatternElementNode { } /** - * Gets a node that controls whether other nodes are evaluated. + * A node that participates in a conditional split: a CFG node whose + * evaluation outcome (true/false) is used to choose between two + * successor basic blocks. In the new shared CFG, such nodes appear in + * pairs of `isAfterTrue`/`isAfterFalse` annotated CFG nodes. * - * In the base case, this is the last node of `conditionBlock`, and `flipped` is `false`. - * This definition accounts for (short circuting) `and`- and `or`-expressions, as the structure - * of basic blocks will reflect their semantics. + * Users typically obtain a `GuardNode` by casting from a more specific + * Cfg type: `g.(Cfg::CallNode)` for a call-based check, etc. * - * However, in the program - * ```python - * if not is_safe(path): - * return - * ``` - * the last node in the `ConditionBlock` is `not is_safe(path)`. - * - * We would like to consider also `is_safe(path)` a guard node, albeit with `flipped` being `true`. - * Thus we recurse through `not`-expressions. + * This replaces the legacy (pre-shared-CFG) `GuardNode`/`flipped` + * machinery: the shared CFG carries outcome information structurally + * (via `isAfterTrue`/`isAfterFalse`), so no separate polarity field + * is required. */ -ControlFlowNode guardNode(ConditionBlock conditionBlock, boolean flipped) { - // Base case: the last node truly does determine which successor is chosen - result = conditionBlock.getLastNode() and - flipped = false - or - // Recursive cases: - // if a guard node is a `not`-expression, - // the operand is also a guard node, but with inverted polarity. - exists(UnaryExprNode notNode | - result = notNode.getOperand() and - notNode.getNode().getOp() instanceof Not - | - notNode = guardNode(conditionBlock, flipped.booleanNot()) - ) - or - // if a guard node is compared to a boolean literal, - // the other operand is also a guard node, - // but with polarity depending on the literal (and on the comparison). - exists(CompareNode cmpNode, Cmpop op, ControlFlowNode b, boolean should_flip | - ( - cmpNode.operands(result, op, b) or - cmpNode.operands(b, op, result) - ) and - not result.getNode() instanceof BooleanLiteral and - ( - // comparing to the boolean - (op instanceof Eq or op instanceof Is) and - // we should flip if the value compared against, here the value of `b`, is false - should_flip = b.getNode().(BooleanLiteral).booleanValue().booleanNot() - or - // comparing to the negation of the boolean - (op instanceof NotEq or op instanceof IsNot) and - // again, we should flip if the value compared against, here the value of `not b`, is false. - // That is, if the value of `b` is true. - should_flip = b.getNode().(BooleanLiteral).booleanValue() +class GuardNode extends Cfg::ControlFlowNode { + GuardNode() { + // This is the canonical (post-order) version of an AST node, and + // some `[true]`/`[false]` variant of the same AST exists. We + // include the canonical node because users identify guards by + // their AST (`g.(Cfg::CallNode)` etc.), and the outcome-tagged + // variants are accessed by `outcomeOfGuard` below. + exists(Cfg::ControlFlowNode outcome | + outcome.getNode() = this.getNode() and + (outcome.isAfterTrue(_) or outcome.isAfterFalse(_)) ) - | - // we flip `flipped` according to `should_flip` via the formula `flipped xor should_flip`. - flipped in [true, false] and - cmpNode = guardNode(conditionBlock, flipped.booleanXor(should_flip)) - ) + or + // Or: this IS one of the outcome-tagged variants, supporting + // users who want to query the split point directly. + this.isAfterTrue(_) + or + this.isAfterFalse(_) + } + + /** Holds if this guard controls block `b` upon evaluating to `branch`. */ + predicate controlsBlock(Cfg::BasicBlock b, boolean branch) { + branch in [true, false] and + exists(Cfg::ControlFlowNode outcomeNode | + outcomeOfGuard(this, outcomeNode, branch) and + outcomeNode.getBasicBlock().dominates(b) + ) + } } /** - * A node that controls whether other nodes are evaluated. + * Holds if some execution that arrives at `outcomeNode` corresponds + * to `guard` having evaluated to `branch`. * - * The field `flipped` allows us to match `GuardNode`s underneath - * `not`-expressions and still choose the appropriate branch. + * For a direct guard `if g:`, the outcome node is `g` itself with + * `isAfterTrue`/`isAfterFalse`. For wrapped guards like `not g` or + * `g == True`, the outcome is on the wrapping expression with an + * appropriate polarity transform — we follow those wrappers up the + * AST to find the outermost expression that carries an actual + * `isAfterTrue`/`isAfterFalse` outcome. */ -class GuardNode extends ControlFlowNode { - ConditionBlock conditionBlock; - boolean flipped; - - GuardNode() { this = guardNode(conditionBlock, flipped) } - - /** Holds if this guard controls block `b` upon evaluating to `branch`. */ - predicate controlsBlock(BasicBlock b, boolean branch) { - branch in [true, false] and - conditionBlock.controls(b, branch.booleanXor(flipped)) - } +private predicate outcomeOfGuard( + Cfg::ControlFlowNode guard, Cfg::ControlFlowNode outcomeNode, boolean branch +) { + // Base case: the guard itself splits — the outcome node is the + // first node of an outcome BB, with matching outcome label. + // (The shared CFG also marks inner expressions with outcome flags + // for analysis purposes, but only "splitting" nodes — those that + // actually start an outcome BB — are valid guards on their own.) + outcomeNode.getNode() = guard.getNode() and + outcomeNode = outcomeNode.getBasicBlock().firstNode() and + ( + outcomeNode.isAfterTrue(_) and branch = true + or + outcomeNode.isAfterFalse(_) and branch = false + ) + or + // Recursive: `not guard` — same outcome split as `guard`, flipped. + exists(Cfg::UnaryExprNode notNode, boolean notBranch | + notNode.getOperand().getNode() = guard.getNode() and + notNode.getNode().getOp() instanceof Not and + outcomeOfGuard(notNode, outcomeNode, notBranch) and + branch = notBranch.booleanNot() + ) + or + // Recursive: comparisons against a boolean literal. + exists(Cfg::CompareNode cmpNode, Cmpop op, Cfg::ControlFlowNode otherOperand, + Cfg::ControlFlowNode guardOperand, boolean polarity, boolean cmpBranch + | + guardOperand.getNode() = guard.getNode() and + (cmpNode.operands(guardOperand, op, otherOperand) or cmpNode.operands(otherOperand, op, guardOperand)) and + not guard.getNode() instanceof BooleanLiteral and + ( + (op instanceof Eq or op instanceof Is) and + polarity = otherOperand.getNode().(BooleanLiteral).booleanValue() + or + (op instanceof NotEq or op instanceof IsNot) and + polarity = otherOperand.getNode().(BooleanLiteral).booleanValue().booleanNot() + ) and + outcomeOfGuard(cmpNode, outcomeNode, cmpBranch) and + branch = cmpBranch.booleanXor(polarity.booleanNot()) + ) } /** * Holds if the guard `g` validates `node` upon evaluating to `branch`. */ -signature predicate guardChecksSig(GuardNode g, ControlFlowNode node, boolean branch); +signature predicate guardChecksSig(GuardNode g, Cfg::ControlFlowNode node, boolean branch); /** * Provides a set of barrier nodes for a guard that validates a node. @@ -670,7 +692,7 @@ module BarrierGuard { result = ParameterizedBarrierGuard::getABarrierNode(_) } - private predicate extendedGuardChecks(GuardNode g, ControlFlowNode node, boolean branch, Unit u) { + private predicate extendedGuardChecks(GuardNode g, Cfg::ControlFlowNode node, boolean branch, Unit u) { guardChecks(g, node, branch) and u = u } @@ -680,7 +702,7 @@ bindingset[this] private signature class ParamSig; private module WithParam { - signature predicate guardChecksSig(GuardNode g, ControlFlowNode node, boolean branch, P param); + signature predicate guardChecksSig(GuardNode g, Cfg::ControlFlowNode node, boolean branch, P param); } /** @@ -693,10 +715,10 @@ module ParameterizedBarrierGuard::guardChecksSig/4 guar /** Gets a node that is safely guarded by the given guard check with parameter `param`. */ overlay[global] ExprNode getABarrierNode(P param) { - exists(GuardNode g, EssaDefinition def, ControlFlowNode node, boolean branch | - AdjacentUses::useOfDef(def, node) and + exists(GuardNode g, SsaImpl::EssaDefinition def, Cfg::ControlFlowNode node, boolean branch | + SsaImpl::AdjacentUses::useOfDef(def, node) and guardChecks(g, node, branch, param) and - AdjacentUses::useOfDef(def, result.asCfgNode()) and + SsaImpl::AdjacentUses::useOfDef(def, result.asCfgNode()) and g.controlsBlock(result.asCfgNode().getBasicBlock(), branch) ) } @@ -712,7 +734,7 @@ module ExternalBarrierGuard { private import semmle.python.ApiGraphs overlay[global] - private predicate guardCheck(GuardNode g, ControlFlowNode node, boolean branch, string kind) { + private predicate guardCheck(GuardNode g, Cfg::ControlFlowNode node, boolean branch, string kind) { exists(API::CallNode call, API::Node parameter | parameter = call.getAParameter() and parameter = ModelOutput::getABarrierGuardNode(kind, branch) @@ -748,10 +770,10 @@ newtype TContent = TSetElementContent() or /** An element of a tuple at a specific index. */ TTupleElementContent(int index) { - exists(any(TupleNode tn).getElement(index)) + exists(any(Cfg::TupleNode tn).getElement(index)) or // Arguments can overflow and end up in the starred parameter tuple. - exists(any(CallNode cn).getArg(index)) + exists(any(Cfg::CallNode cn).getArg(index)) or // since flow summaries might use tuples, we ensure that we at least have valid // TTupleElementContent for the 0..7 (7 was picked to match `small_tuple` in @@ -768,10 +790,10 @@ newtype TContent = or // d["key"] = ... key = - any(SubscriptNode sub | sub.isStore() | sub.getIndex().getNode().(StringLiteral).getText()) + any(Cfg::SubscriptNode sub | sub.isStore() | sub.getIndex().getNode().(StringLiteral).getText()) or // d.setdefault("key", ...) - exists(CallNode call | call.getFunction().(AttrNode).getName() = "setdefault" | + exists(Cfg::CallNode call | call.getFunction().(Cfg::AttrNode).getName() = "setdefault" | key = call.getArg(0).getNode().(StringLiteral).getText() ) } or diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/ImportResolution.qll b/python/ql/lib/semmle/python/dataflow/new/internal/ImportResolution.qll index f3943f53f86..c9f2c79aebc 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/ImportResolution.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/ImportResolution.qll @@ -5,6 +5,8 @@ */ private import python +private import semmle.python.controlflow.internal.Cfg as Cfg +private import semmle.python.dataflow.new.internal.SsaImpl as SsaImpl private import semmle.python.dataflow.new.DataFlow private import semmle.python.dataflow.new.internal.ImportStar private import semmle.python.dataflow.new.TypeTracking @@ -69,13 +71,11 @@ module ImportResolution { * Holds if there is an ESSA step from `defFrom` to `defTo`, which should be allowed * for import resolution. */ - private predicate allowedEssaImportStep(EssaDefinition defFrom, EssaDefinition defTo) { + private predicate allowedEssaImportStep(SsaImpl::EssaDefinition defFrom, SsaImpl::EssaDefinition defTo) { // to handle definitions guarded by if-then-else - defFrom = defTo.(PhiFunction).getAnInput() - or - // refined variable - // example: https://github.com/nvbn/thefuck/blob/ceeaeab94b5df5a4fe9d94d61e4f6b0bbea96378/thefuck/utils.py#L25-L45 - defFrom = defTo.(EssaNodeRefinement).getInput().getDefinition() + defFrom = defTo.(SsaImpl::PhiFunction).getAnInput() + // Note: legacy ESSA refinement-step (e.g. for `foo.bar = X`) is + // not modelled in the new SSA. We rely on phi steps only. } /** @@ -92,30 +92,32 @@ module ImportResolution { // Definitions made inside `m` itself // // for code such as `foo = ...; foo.bar = ...` there will be TWO - // EssaDefinition/EssaVariable. One for `foo = ...` (AssignmentDefinition) and one + // SsaImpl::EssaDefinition/SsaImpl::EssaVariable. One for `foo = ...` (SsaImpl::AssignmentDefinition) and one // for `foo.bar = ...`. The one for `foo.bar = ...` (EssaNodeRefinement). The // EssaNodeRefinement is the one that will reach the end of the module (normal // exit). // // However, we cannot just use the EssaNodeRefinement as the `val`, because the // normal data-flow depends on use-use flow, and use-use flow targets CFG nodes not - // EssaNodes. So we need to go back from the EssaDefinition/EssaVariable that + // EssaNodes. So we need to go back from the SsaImpl::EssaDefinition/SsaImpl::EssaVariable that // reaches the end of the module, to the first definition of the variable, and then // track forwards using use-use flow to find a suitable CFG node that has flow into // it from use-use flow. - exists(EssaVariable lastUseVar, EssaVariable firstDef | + exists(SsaImpl::EssaVariable lastUseVar, SsaImpl::EssaVariable firstDef | lastUseVar.getName() = name and // we ignore special variable $ introduced by our analysis (not used for anything) // we ignore special variable * introduced by `from import *` -- TODO: understand why we even have this? not name in ["$", "*"] and - lastUseVar.getAUse() = m.getANormalExit() and + exists(Cfg::ControlFlowNode exit | + exit.isNormalExit() and exit.getScope() = m and lastUseVar.getAUse() = exit + ) and allowedEssaImportStep*(firstDef, lastUseVar) and not allowedEssaImportStep(_, firstDef) | not LocalFlow::defToFirstUse(firstDef, _) and - val.asCfgNode() = firstDef.getDefinition().(EssaNodeDefinition).getDefiningNode() + val.asCfgNode() = firstDef.getDefinition().(SsaImpl::EssaNodeDefinition).getDefiningNode() or - exists(ControlFlowNode mid, ControlFlowNode end | + exists(Cfg::ControlFlowNode mid, Cfg::ControlFlowNode end | LocalFlow::defToFirstUse(firstDef, mid) and LocalFlow::useToNextUse*(mid, end) and not LocalFlow::useToNextUse(end, _) and @@ -143,9 +145,9 @@ module ImportResolution { * handles simple cases where we can statically tell that this is the case. */ private predicate all_mentions_name(Module m, string name) { - exists(DefinitionNode def, SequenceNode n | + exists(Cfg::DefinitionNode def, Cfg::SequenceNode n | def.getValue() = n and - def.(NameNode).getId() = "__all__" and + def.(Cfg::NameNode).getId() = "__all__" and def.getScope() = m and any(StringLiteral s | s.getText() = name) = n.getAnElement().getNode() ) @@ -158,18 +160,18 @@ module ImportResolution { */ private predicate no_or_complicated_all(Module m) { // No mention of `__all__` in the module - not exists(DefinitionNode def | def.getScope() = m and def.(NameNode).getId() = "__all__") + not exists(Cfg::DefinitionNode def | def.getScope() = m and def.(Cfg::NameNode).getId() = "__all__") or // `__all__` is set to a non-sequence value - exists(DefinitionNode def | - def.(NameNode).getId() = "__all__" and + exists(Cfg::DefinitionNode def | + def.(Cfg::NameNode).getId() = "__all__" and def.getScope() = m and - not def.getValue() instanceof SequenceNode + not def.getValue() instanceof Cfg::SequenceNode ) or // `__all__` is used in some way that doesn't involve storing a value in it. This usually means // it is being mutated through `append` or `extend`, which we don't handle. - exists(NameNode n | n.getId() = "__all__" and n.getScope() = m and n.isLoad()) + exists(Cfg::NameNode n | n.getId() = "__all__" and n.getScope() = m and n.isLoad()) } private predicate potential_module_export(Module m, string name) { @@ -177,7 +179,7 @@ module ImportResolution { or no_or_complicated_all(m) and ( - exists(NameNode n | n.getId() = name and n.getScope() = m and name.charAt(0) != "_") + exists(Cfg::NameNode n | n.getId() = name and n.getScope() = m and name.charAt(0) != "_") or exists(Alias a | a.getAsname().(Name).getId() = name and a.getValue().getScope() = m) ) @@ -207,12 +209,12 @@ module ImportResolution { /** Gets a module that may have been added to `sys.modules`. */ private Module sys_modules_module_with_name(string name) { - exists(ControlFlowNode n, DataFlow::Node mod | - exists(SubscriptNode sub | + exists(Cfg::ControlFlowNode n, DataFlow::Node mod | + exists(Cfg::SubscriptNode sub | sub.getObject() = sys_modules_reference().asCfgNode() and sub.getIndex() = n and n.getNode().(StringLiteral).getText() = name and - sub.(DefinitionNode).getValue() = mod.asCfgNode() and + sub.(Cfg::DefinitionNode).getValue() = mod.asCfgNode() and mod = getModuleReference(result) ) ) @@ -324,11 +326,11 @@ module ImportResolution { // name as a submodule, we always consider that this attribute _could_ be a // reference to the submodule, even if we don't know that the submodule has been // imported yet. - exists(string submodule, Module package, EssaVariable var | + exists(string submodule, Module package, SsaImpl::EssaVariable var | submodule = var.getName() and - SsaSource::init_module_submodule_defn(var.getSourceVariable(), package.getEntryNode()) and + SsaSource::init_module_submodule_defn(var.getSourceVariable().getVariable(), package.getEntryNode()) and m = getModuleFromName(package.getPackageName() + "." + submodule) and - result.asCfgNode() = var.getDefinition().(EssaNodeDefinition).getDefiningNode() + result.asCfgNode() = var.getDefinition().(SsaImpl::EssaNodeDefinition).getDefiningNode() ) } diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/ImportStar.qll b/python/ql/lib/semmle/python/dataflow/new/internal/ImportStar.qll index 83f8ee862c3..8c906696be7 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/ImportStar.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/ImportStar.qll @@ -3,6 +3,7 @@ overlay[local] module; private import python +private import semmle.python.controlflow.internal.Cfg as Cfg private import semmle.python.dataflow.new.internal.Builtins private import semmle.python.dataflow.new.internal.ImportResolution private import semmle.python.dataflow.new.DataFlow @@ -15,7 +16,7 @@ module ImportStar { */ overlay[local] cached - predicate namePossiblyDefinedInImportStar(NameNode n, string name, Scope s) { + predicate namePossiblyDefinedInImportStar(Cfg::NameNode n, string name, Scope s) { n.isLoad() and name = n.getId() and s = n.getScope().getEnclosingScope*() and @@ -52,7 +53,7 @@ module ImportStar { /** Holds if a global variable called `name` is assigned a value in the module `m`. */ cached predicate globalNameDefinedInModule(string name, Module m) { - exists(NameNode n | + exists(Cfg::NameNode n | not exists(LocalVariable v | n.defines(v)) and n.isStore() and name = n.getId() and @@ -66,7 +67,7 @@ module ImportStar { */ overlay[global] cached - predicate importStarResolvesTo(NameNode n, Module m) { + predicate importStarResolvesTo(Cfg::NameNode n, Module m) { m = getStarImported+(n.getEnclosingModule()) and globalNameDefinedInModule(n.getId(), m) and not isDefinedLocally(n.getNode()) @@ -99,7 +100,7 @@ module ImportStar { */ overlay[local] cached - ControlFlowNode potentialImportStarBase(Scope s) { - result = any(ImportStarNode n | n.getScope() = s).getModule() + Cfg::ControlFlowNode potentialImportStarBase(Scope s) { + result = any(Cfg::ImportStarNode n | n.getScope() = s).getModule() } } diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/IterableUnpacking.qll b/python/ql/lib/semmle/python/dataflow/new/internal/IterableUnpacking.qll index 5def15fa3c8..0704763bd89 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/IterableUnpacking.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/IterableUnpacking.qll @@ -170,6 +170,8 @@ overlay[local] module; private import python +private import semmle.python.controlflow.internal.Cfg as Cfg +private import semmle.python.dataflow.new.internal.SsaImpl as SsaImpl private import DataFlowPublic /** @@ -178,7 +180,7 @@ private import DataFlowPublic * This class abstracts away the differing representations of comprehensions and * for statements. */ -class ForTarget extends ControlFlowNode { +class ForTarget extends Cfg::ControlFlowNode { Expr source; ForTarget() { @@ -198,7 +200,7 @@ class ForTarget extends ControlFlowNode { } /** The LHS of an assignment, it also records the assigned value. */ -class AssignmentTarget extends ControlFlowNode { +class AssignmentTarget extends Cfg::ControlFlowNode { Expr value; AssignmentTarget() { @@ -209,7 +211,7 @@ class AssignmentTarget extends ControlFlowNode { } /** A direct (or top-level) target of an unpacking assignment. */ -class UnpackingAssignmentDirectTarget extends ControlFlowNode instanceof SequenceNode { +class UnpackingAssignmentDirectTarget extends Cfg::ControlFlowNode instanceof Cfg::SequenceNode { Expr value; UnpackingAssignmentDirectTarget() { @@ -222,7 +224,7 @@ class UnpackingAssignmentDirectTarget extends ControlFlowNode instanceof Sequenc } /** A (possibly recursive) target of an unpacking assignment. */ -class UnpackingAssignmentTarget extends ControlFlowNode { +class UnpackingAssignmentTarget extends Cfg::ControlFlowNode { UnpackingAssignmentTarget() { this instanceof UnpackingAssignmentDirectTarget or @@ -231,10 +233,10 @@ class UnpackingAssignmentTarget extends ControlFlowNode { } /** A (possibly recursive) target of an unpacking assignment which is also a sequence. */ -class UnpackingAssignmentSequenceTarget extends UnpackingAssignmentTarget instanceof SequenceNode { - ControlFlowNode getElement(int i) { result = super.getElement(i) } +class UnpackingAssignmentSequenceTarget extends UnpackingAssignmentTarget instanceof Cfg::SequenceNode { + Cfg::ControlFlowNode getElement(int i) { result = super.getElement(i) } - ControlFlowNode getAnElement() { result = this.getElement(_) } + Cfg::ControlFlowNode getAnElement() { result = this.getElement(_) } } /** @@ -255,7 +257,7 @@ predicate iterableUnpackingAssignmentFlowStep(Node nodeFrom, Node nodeTo) { predicate iterableUnpackingForReadStep(CfgNode nodeFrom, Content c, Node nodeTo) { exists(ForTarget target | nodeFrom.getNode().getNode() = target.getSource() and - target instanceof SequenceNode and + target instanceof Cfg::SequenceNode and nodeTo = TIterableSequenceNode(target) ) and ( @@ -323,11 +325,11 @@ predicate iterableUnpackingConvertingStoreStep(Node nodeFrom, Content c, Node no */ predicate iterableUnpackingElementReadStep(Node nodeFrom, Content c, Node nodeTo) { exists( - UnpackingAssignmentSequenceTarget target, int index, ControlFlowNode element, int starIndex + UnpackingAssignmentSequenceTarget target, int index, Cfg::ControlFlowNode element, int starIndex | - target.getElement(starIndex) instanceof StarredNode + target.getElement(starIndex) instanceof Cfg::StarredNode or - not exists(target.getAnElement().(StarredNode)) and + not exists(target.getAnElement().(Cfg::StarredNode)) and starIndex = -1 | nodeFrom.(CfgNode).getNode() = target and @@ -342,18 +344,18 @@ predicate iterableUnpackingElementReadStep(Node nodeFrom, Content c, Node nodeTo else c.(TupleElementContent).getIndex() >= index - 1 ) and ( - if element instanceof SequenceNode + if element instanceof Cfg::SequenceNode then // Step 5b nodeTo = TIterableSequenceNode(element) else - if element instanceof StarredNode + if element instanceof Cfg::StarredNode then // Step 5c nodeTo = TIterableElementNode(element) else // Step 5a - exists(MultiAssignmentDefinition mad | element = mad.getDefiningNode() | + exists(SsaImpl::MultiAssignmentDefinition mad | element = mad.getDefiningNode() | nodeTo.(CfgNode).getNode() = element ) ) @@ -366,7 +368,7 @@ predicate iterableUnpackingElementReadStep(Node nodeFrom, Content c, Node nodeTo * content type `ListElementContent`. */ predicate iterableUnpackingStarredElementStoreStep(Node nodeFrom, Content c, Node nodeTo) { - exists(ControlFlowNode starred, MultiAssignmentDefinition mad | + exists(Cfg::ControlFlowNode starred, SsaImpl::MultiAssignmentDefinition mad | starred.getNode() instanceof Starred and starred = mad.getDefiningNode() | diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/LocalSources.qll b/python/ql/lib/semmle/python/dataflow/new/internal/LocalSources.qll index 5cbe7b44ab3..9f63e2160ed 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/LocalSources.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/LocalSources.qll @@ -9,6 +9,7 @@ overlay[local] module; private import python +private import semmle.python.controlflow.internal.Cfg as Cfg import DataFlowPublic private import DataFlowPrivate private import semmle.python.internal.CachedStages @@ -314,7 +315,7 @@ private module Cached { */ cached predicate subscript(LocalSourceNode node, CfgNode subscript, CfgNode index) { - exists(CfgNode seq, SubscriptNode subscriptNode | subscriptNode = subscript.getNode() | + exists(CfgNode seq, Cfg::SubscriptNode subscriptNode | subscriptNode = subscript.getNode() | node.flowsTo(seq) and seq.getNode() = subscriptNode.getObject() and index.getNode() = subscriptNode.getIndex() diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/MatchUnpacking.qll b/python/ql/lib/semmle/python/dataflow/new/internal/MatchUnpacking.qll index e72e378da52..f931c460603 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/MatchUnpacking.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/MatchUnpacking.qll @@ -55,6 +55,7 @@ module; private import python private import DataFlowPublic +private import semmle.python.controlflow.internal.Cfg as Cfg /** * Holds when there is flow from the subject `nodeFrom` to the (top-level) pattern `nodeTo` of a `match` statement. @@ -91,8 +92,8 @@ predicate matchAsFlowStep(Node nodeFrom, Node nodeTo) { or // the interior pattern flows to the alias nodeFrom.(CfgNode).getNode().getNode() = subject.getPattern() and - exists(PatternAliasDefinition pad | pad.getDefiningNode().getNode() = alias | - nodeTo.(CfgNode).getNode() = pad.getDefiningNode() + exists(Cfg::ControlFlowNode aliasCfg | aliasCfg.getNode() = alias | + nodeTo.(CfgNode).getNode() = aliasCfg ) ) } @@ -126,8 +127,8 @@ predicate matchLiteralFlowStep(Node nodeFrom, Node nodeTo) { predicate matchCaptureFlowStep(Node nodeFrom, Node nodeTo) { exists(MatchCapturePattern capture, Name var | capture.getVariable() = var | nodeFrom.(CfgNode).getNode().getNode() = capture and - exists(PatternCaptureDefinition pcd | pcd.getDefiningNode().getNode() = var | - nodeTo.(CfgNode).getNode() = pcd.getDefiningNode() + exists(Cfg::ControlFlowNode varCfg | varCfg.getNode() = var | + nodeTo.(CfgNode).getNode() = varCfg ) ) } diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/SsaImpl.qll b/python/ql/lib/semmle/python/dataflow/new/internal/SsaImpl.qll index 59433a96aae..d406d30f148 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/SsaImpl.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/SsaImpl.qll @@ -56,6 +56,16 @@ private module CfgForSsa implements BB::CfgSig { * We only track variables that are read at least once in their scope — * tracking write-only variables is unnecessary work. */ +/** + * A source variable for SSA, wrapping a Python AST `Variable`. + * + * We only track variables that are read at least once in their scope — + * tracking write-only variables would be unnecessary work — *except* + * for module-scope globals, where the "read" can be external (e.g. + * `import mymodule; mymodule.x`). Such globals are tracked + * unconditionally so that import-resolution can find their defining + * write. + */ private newtype TSsaSourceVariable = TPyVar(Py::Variable v) { // Has a use somewhere — read-relevant for SSA. @@ -63,6 +73,19 @@ private newtype TSsaSourceVariable = or // Or has a deletion (treated as a write that destroys the value). exists(Cfg::NameNode n | n.deletes(v)) + or + // Or is a module-scope global written in this module — must be + // tracked even if never read locally, because importers may read + // it as an attribute on the module object. + v.getScope() instanceof Py::Module and + exists(Cfg::NameNode n | n.defines(v)) + or + // Or is a parameter — parameters must always have a + // `ParameterDefinition` for dataflow argument-routing to work, + // even if the parameter is never read in its scope. Mirrors + // legacy ESSA's `ParameterDefinition` (which fired for every + // parameter binding regardless of liveness). + exists(Py::Parameter p | p.asName() = v.getAStore()) } /** @@ -72,11 +95,44 @@ class SsaSourceVariable extends TSsaSourceVariable { /** Gets the underlying Python AST variable. */ Py::Variable getVariable() { this = TPyVar(result) } + /** Gets the (textual) name of this variable. */ + string getName() { result = this.getVariable().getId() } + /** Gets a textual representation of this source variable. */ string toString() { result = this.getVariable().toString() } /** Gets the location of this source variable. */ Py::Location getLocation() { result = this.getVariable().getScope().getLocation() } + + /** Gets the scope in which this variable lives. */ + Py::Scope getScope() { result = this.getVariable().getScope() } + + /** + * Gets a use of this variable as it appears in the source — a `NameNode` + * that loads or deletes the variable. Mirrors legacy + * `SsaSourceVariable.getASourceUse()`. + */ + Cfg::ControlFlowNode getASourceUse() { + exists(Cfg::NameNode n | result = n | n.uses(this.getVariable()) or n.deletes(this.getVariable())) + } + + /** + * Gets an implicit use of this variable. The new SSA does not have + * implicit-use refinements, but we keep this for API parity — every + * normal-exit of the variable's scope counts as a sink, ensuring + * variables stay live to scope exit for taint-tracking. + */ + Cfg::ControlFlowNode getAnImplicitUse() { + result.isNormalExit() and result.getScope() = this.getScope() + } + + /** + * Gets a use of this variable — either an explicit source use or an + * implicit use at scope exit. Mirrors legacy `SsaSourceVariable.getAUse()`. + */ + Cfg::ControlFlowNode getAUse() { + result = this.getASourceUse() or result = this.getAnImplicitUse() + } } /** @@ -93,7 +149,13 @@ private predicate nonLocalReadIn(Py::Variable v, Py::Scope s) { n.uses(v) and n.getScope() = s and not exists(Cfg::NameNode def | def.defines(v) and def.getScope() = s) - ) + ) and + // Match legacy ESSA: only create entry defs for variables that have + // at least one defining store somewhere — otherwise the entry def + // represents "nothing reaches here", which is the default anyway and + // introduces no useful flow. (Legacy's `ModuleVariable` required a + // store; this is the closure-aware generalisation.) + exists(Cfg::NameNode store | store.defines(v)) } /** @@ -164,11 +226,25 @@ private module SsaImplInput implements SsaImplCommon::InputSig { // ignore the flow steps from the synthetic sequence node to the real sequence node, // since we only support one level of content in type-trackers, and the nested // structure requires two levels at least to be useful. - not exists(SequenceNode outer | + not exists(Cfg::SequenceNode outer | outer.getAnElement() = nodeTo.asCfgNode() and IterableUnpacking::iterableUnpackingTupleFlowStep(nodeFrom, nodeTo) ) @@ -259,7 +264,7 @@ module TypeTrackingInput implements Shared::TypeTrackingInput { // Since we only support one level of content in type-trackers we don't actually // support `(aa, ab), (ba, bb) = ...`. Therefore we exclude the read-step from `(aa, // ab)` to `aa` (since it is not needed). - not exists(SequenceNode outer | + not exists(Cfg::SequenceNode outer | outer.getAnElement() = nodeFrom.asCfgNode() and IterableUnpacking::iterableUnpackingTupleFlowStep(_, nodeFrom) ) and @@ -269,7 +274,7 @@ module TypeTrackingInput implements Shared::TypeTrackingInput { IterableUnpacking::iterableUnpackingForReadStep(_, _, seq) and IterableUnpacking::iterableUnpackingConvertingReadStep(seq, _, elem) and IterableUnpacking::iterableUnpackingConvertingStoreStep(elem, _, nodeFrom) and - nodeFrom.asCfgNode() instanceof SequenceNode + nodeFrom.asCfgNode() instanceof Cfg::SequenceNode ) or TypeTrackerSummaryFlow::basicLoadStep(nodeFrom, nodeTo, content) @@ -306,13 +311,13 @@ module TypeTrackingInput implements Shared::TypeTrackingInput { // // nodeFrom is `expr` // nodeTo is entry node for `f` - exists(ScopeEntryDefinition e, SsaSourceVariable var, DefinitionNode def | + exists(SsaImpl::ScopeEntryDefinition e, SsaImpl::SsaSourceVariable var, Cfg::DefinitionNode def | e.getSourceVariable() = var and - var.hasDefiningNode(def) + def.getNode() = var.getVariable().getAStore() | nodeTo.(DataFlowPublic::ScopeEntryDefinitionNode).getDefinition() = e and nodeFrom.asCfgNode() = def and - var.getScope().getScope*() = nodeFrom.getScope() + var.getVariable().getScope().getScope*() = nodeFrom.getScope() ) } diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/VariableCapture.qll b/python/ql/lib/semmle/python/dataflow/new/internal/VariableCapture.qll index 5d647af09bc..2cd2fedb1d7 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/VariableCapture.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/VariableCapture.qll @@ -3,6 +3,9 @@ overlay[local] module; private import python +private import semmle.python.controlflow.internal.Cfg as Cfg +private import semmle.python.controlflow.internal.AstNodeImpl as CfgImpl +private import semmle.python.dataflow.new.internal.SsaImpl as SsaImpl private import DataFlowPublic private import semmle.python.dataflow.new.internal.DataFlowPrivate private import codeql.dataflow.VariableCapture as Shared @@ -14,10 +17,10 @@ private import codeql.dataflow.VariableCapture as Shared // The first is the main implementation, the second is a performance motivated restriction. // The restriction is to clear any `CapturedVariableContent` before writing a new one // to avoid long access paths (see the link for a nice explanation). -private module CaptureInput implements Shared::InputSig { +private module CaptureInput implements Shared::InputSig { private import python as PY - additional class ExprCfgNode extends ControlFlowNode { + additional class ExprCfgNode extends Cfg::ControlFlowNode { ExprCfgNode() { isExpressionNode(this) } } @@ -25,7 +28,9 @@ private module CaptureInput implements Shared::InputSig; +module Flow = Shared::Flow; private Flow::ClosureNode asClosureNode(Node n) { result = n.(SynthCaptureNode).getSynthesizedCaptureNode() diff --git a/python/ql/lib/semmle/python/frameworks/Bottle.qll b/python/ql/lib/semmle/python/frameworks/Bottle.qll index aa2c906948d..9714f196777 100644 --- a/python/ql/lib/semmle/python/frameworks/Bottle.qll +++ b/python/ql/lib/semmle/python/frameworks/Bottle.qll @@ -4,6 +4,7 @@ */ private import python +private import semmle.python.controlflow.internal.Cfg as Cfg private import semmle.python.Concepts private import semmle.python.ApiGraphs private import semmle.python.dataflow.new.RemoteFlowSources @@ -73,7 +74,9 @@ module Bottle { /** A response returned by a view callable. */ class BottleReturnResponse extends Http::Server::HttpResponse::Range { BottleReturnResponse() { - this.asCfgNode() = any(View::ViewCallable vc).getAReturnValueFlowNode() + exists(View::ViewCallable vc, Return ret | + ret.getScope() = vc and this.asCfgNode().getNode() = ret.getValue() + ) } override DataFlow::Node getBody() { result = this } @@ -154,9 +157,9 @@ module Bottle { DataFlow::Node value; HeaderWriteSubscript() { - exists(SubscriptNode subscript | + exists(Cfg::SubscriptNode subscript | this.asCfgNode() = subscript and - value.asCfgNode() = subscript.(DefinitionNode).getValue() and + value.asCfgNode() = subscript.(Cfg::DefinitionNode).getValue() and name.asCfgNode() = subscript.getIndex() and subscript.getObject() = headers().asSource().asCfgNode() ) diff --git a/python/ql/lib/semmle/python/frameworks/Django.qll b/python/ql/lib/semmle/python/frameworks/Django.qll index ee0ed4a84dd..37d5b335725 100644 --- a/python/ql/lib/semmle/python/frameworks/Django.qll +++ b/python/ql/lib/semmle/python/frameworks/Django.qll @@ -4,6 +4,7 @@ */ private import python +private import semmle.python.controlflow.internal.Cfg as Cfg private import semmle.python.dataflow.new.DataFlow private import semmle.python.dataflow.new.RemoteFlowSources private import semmle.python.dataflow.new.TaintTracking @@ -1305,7 +1306,7 @@ module PrivateDjango { dict.(DataFlow::MethodCallNode).calls(files, "dict") ) | - this.asCfgNode().(SubscriptNode).getObject() = dict.asCfgNode() + this.asCfgNode().(Cfg::SubscriptNode).getObject() = dict.asCfgNode() or this.(DataFlow::MethodCallNode).calls(dict, "get") ) @@ -1314,7 +1315,7 @@ module PrivateDjango { exists(DataFlow::AttrRead files, DataFlow::MethodCallNode getlistCall | files.accesses(instance(), "FILES") and getlistCall.calls(files, "getlist") and - this.asCfgNode().(SubscriptNode).getObject() = getlistCall.asCfgNode() + this.asCfgNode().(Cfg::SubscriptNode).getObject() = getlistCall.asCfgNode() ) } } @@ -2216,7 +2217,7 @@ module PrivateDjango { DataFlow::Node value; DjangoResponseCookieSubscriptWrite() { - exists(SubscriptNode subscript, DataFlow::AttrRead cookieLookup | + exists(Cfg::SubscriptNode subscript, DataFlow::AttrRead cookieLookup | // To give `this` a value, we need to choose between either LHS or RHS, // and just go with the LHS this.asCfgNode() = subscript @@ -2228,7 +2229,7 @@ module PrivateDjango { | cookieLookup.flowsTo(subscriptObj) ) and - value.asCfgNode() = subscript.(DefinitionNode).getValue() and + value.asCfgNode() = subscript.(Cfg::DefinitionNode).getValue() and index.asCfgNode() = subscript.getIndex() ) } @@ -2249,7 +2250,7 @@ module PrivateDjango { DataFlow::Node value; DjangoResponseHeaderSubscriptWrite() { - exists(SubscriptNode subscript, DataFlow::AttrRead headerLookup | + exists(Cfg::SubscriptNode subscript, DataFlow::AttrRead headerLookup | // To give `this` a value, we need to choose between either LHS or RHS, // and just go with the LHS this.asCfgNode() = subscript @@ -2261,7 +2262,7 @@ module PrivateDjango { | headerLookup.flowsTo(subscriptObj) ) and - value.asCfgNode() = subscript.(DefinitionNode).getValue() and + value.asCfgNode() = subscript.(Cfg::DefinitionNode).getValue() and index.asCfgNode() = subscript.getIndex() ) } @@ -2284,14 +2285,14 @@ module PrivateDjango { DataFlow::Node value; DjangoResponseSubscriptWrite() { - exists(SubscriptNode subscript | + exists(Cfg::SubscriptNode subscript | // To give `this` a value, we need to choose between either LHS or RHS, // and just go with the LHS this.asCfgNode() = subscript | subscript.getObject() = DjangoImpl::DjangoHttp::Response::HttpResponse::instance().asCfgNode() and - value.asCfgNode() = subscript.(DefinitionNode).getValue() and + value.asCfgNode() = subscript.(Cfg::DefinitionNode).getValue() and index.asCfgNode() = subscript.getIndex() ) } @@ -2426,7 +2427,7 @@ module PrivateDjango { /** Gets a reference to the result of calling the `as_view` classmethod of this class. */ private DataFlow::TypeTrackingNode asViewResult(DataFlow::TypeTracker t) { t.start() and - result.asCfgNode().(CallNode).getFunction() = this.asViewRef().asCfgNode() + result.asCfgNode().(Cfg::CallNode).getFunction() = this.asViewRef().asCfgNode() or exists(DataFlow::TypeTracker t2 | result = this.asViewResult(t2).track(t2, t)) } @@ -2872,7 +2873,9 @@ module PrivateDjango { DataFlow::CfgNode { DjangoRedirectViewGetRedirectUrlReturn() { - node = any(GetRedirectUrlFunction f).getAReturnValueFlowNode() + exists(GetRedirectUrlFunction f, Return ret | + ret.getScope() = f and node.getNode() = ret.getValue() + ) } override DataFlow::Node getRedirectLocation() { result = this } diff --git a/python/ql/lib/semmle/python/frameworks/FastApi.qll b/python/ql/lib/semmle/python/frameworks/FastApi.qll index 9d52e9b4f57..a8a35f48509 100644 --- a/python/ql/lib/semmle/python/frameworks/FastApi.qll +++ b/python/ql/lib/semmle/python/frameworks/FastApi.qll @@ -4,6 +4,7 @@ */ private import python +private import semmle.python.controlflow.internal.Cfg as Cfg private import semmle.python.dataflow.new.DataFlow private import semmle.python.dataflow.new.RemoteFlowSources private import semmle.python.dataflow.new.TaintTracking @@ -309,7 +310,11 @@ module FastApi { FastApiRouteSetup routeSetup; FastApiRequestHandlerReturn() { - node = routeSetup.getARequestHandler().getAReturnValueFlowNode() + exists(Function h, Return ret | + h = routeSetup.getARequestHandler() and + ret.getScope() = h and + node.getNode() = ret.getValue() + ) } override DataFlow::Node getBody() { result = this } @@ -438,7 +443,7 @@ module FastApi { DataFlow::Node value; HeaderSubscriptWrite() { - exists(SubscriptNode subscript, DataFlow::AttrRead headerLookup | + exists(Cfg::SubscriptNode subscript, DataFlow::AttrRead headerLookup | // To give `this` a value, we need to choose between either LHS or RHS, // and just go with the LHS this.asCfgNode() = subscript @@ -447,7 +452,7 @@ module FastApi { exists(DataFlow::Node subscriptObj | subscriptObj.asCfgNode() = subscript.getObject() | headerLookup.flowsTo(subscriptObj) ) and - value.asCfgNode() = subscript.(DefinitionNode).getValue() and + value.asCfgNode() = subscript.(Cfg::DefinitionNode).getValue() and index.asCfgNode() = subscript.getIndex() ) } diff --git a/python/ql/lib/semmle/python/frameworks/Flask.qll b/python/ql/lib/semmle/python/frameworks/Flask.qll index 67a50b1ce76..e2bb161eafe 100644 --- a/python/ql/lib/semmle/python/frameworks/Flask.qll +++ b/python/ql/lib/semmle/python/frameworks/Flask.qll @@ -536,7 +536,7 @@ module Flask { FlaskRouteHandlerReturn() { exists(Function routeHandler | routeHandler = any(FlaskRouteSetup rs).getARequestHandler() and - node = routeHandler.getAReturnValueFlowNode() and + exists(Return ret | ret.getScope() = routeHandler and node.getNode() = ret.getValue()) and not this instanceof Flask::Response::InstanceSource ) } diff --git a/python/ql/lib/semmle/python/frameworks/Gradio.qll b/python/ql/lib/semmle/python/frameworks/Gradio.qll index 11109e150bf..92aec4bd773 100644 --- a/python/ql/lib/semmle/python/frameworks/Gradio.qll +++ b/python/ql/lib/semmle/python/frameworks/Gradio.qll @@ -4,6 +4,7 @@ */ import python +private import semmle.python.controlflow.internal.Cfg as Cfg import semmle.python.dataflow.new.RemoteFlowSources import semmle.python.dataflow.new.TaintTracking import semmle.python.ApiGraphs @@ -51,9 +52,9 @@ module Gradio { // limit only to lists of parameters given to `inputs`. ( ( - call.getKeywordParameter("inputs").asSink().asCfgNode() instanceof ListNode + call.getKeywordParameter("inputs").asSink().asCfgNode() instanceof Cfg::ListNode or - call.getParameter(1).asSink().asCfgNode() instanceof ListNode + call.getParameter(1).asSink().asCfgNode() instanceof Cfg::ListNode ) and ( this = call.getKeywordParameter("inputs").getASubscript().getAValueReachingSink() @@ -75,8 +76,8 @@ module Gradio { exists(GradioInput call | this = call.getParameter(0, "fn").getParameter(_).asSource() and // exclude lists of parameters given to `inputs` - not call.getKeywordParameter("inputs").asSink().asCfgNode() instanceof ListNode and - not call.getParameter(1).asSink().asCfgNode() instanceof ListNode + not call.getKeywordParameter("inputs").asSink().asCfgNode() instanceof Cfg::ListNode and + not call.getParameter(1).asSink().asCfgNode() instanceof Cfg::ListNode ) } @@ -105,16 +106,16 @@ module Gradio { // handle cases where there are multiple arguments passed as a list to `inputs` ( ( - node.getKeywordParameter("inputs").asSink().asCfgNode() instanceof ListNode + node.getKeywordParameter("inputs").asSink().asCfgNode() instanceof Cfg::ListNode or - node.getParameter(1).asSink().asCfgNode() instanceof ListNode + node.getParameter(1).asSink().asCfgNode() instanceof Cfg::ListNode ) and exists(int i | nodeTo = node.getParameter(0, "fn").getParameter(i).asSource() | nodeFrom.asCfgNode() = - node.getKeywordParameter("inputs").asSink().asCfgNode().(ListNode).getElement(i) + node.getKeywordParameter("inputs").asSink().asCfgNode().(Cfg::ListNode).getElement(i) or nodeFrom.asCfgNode() = - node.getParameter(1).asSink().asCfgNode().(ListNode).getElement(i) + node.getParameter(1).asSink().asCfgNode().(Cfg::ListNode).getElement(i) ) ) ) diff --git a/python/ql/lib/semmle/python/frameworks/MarkupSafe.qll b/python/ql/lib/semmle/python/frameworks/MarkupSafe.qll index 6e3b630ffa5..a4832b27ba4 100644 --- a/python/ql/lib/semmle/python/frameworks/MarkupSafe.qll +++ b/python/ql/lib/semmle/python/frameworks/MarkupSafe.qll @@ -4,6 +4,7 @@ */ private import python +private import semmle.python.controlflow.internal.Cfg as Cfg private import semmle.python.dataflow.new.DataFlow private import semmle.python.dataflow.new.TaintTracking private import semmle.python.Concepts @@ -46,7 +47,7 @@ module MarkupSafeModel { /** A direct instantiation of `markupsafe.Markup`. */ private class ClassInstantiation extends InstanceSource, DataFlow::CallCfgNode { - override CallNode node; + override Cfg::CallNode node; ClassInstantiation() { this = classRef().getACall() } } @@ -64,7 +65,7 @@ module MarkupSafeModel { /** A string concatenation with a `markupsafe.Markup` involved. */ class StringConcat extends Markup::InstanceSource, DataFlow::CfgNode { - override BinaryExprNode node; + override Cfg::BinaryExprNode node; StringConcat() { node.getOp() instanceof Add and @@ -79,7 +80,7 @@ module MarkupSafeModel { /** A %-style string format with `markupsafe.Markup` as the format string. */ class PercentStringFormat extends Markup::InstanceSource, DataFlow::CfgNode { - override BinaryExprNode node; + override Cfg::BinaryExprNode node; PercentStringFormat() { node.getOp() instanceof Mod and diff --git a/python/ql/lib/semmle/python/frameworks/Pycurl.qll b/python/ql/lib/semmle/python/frameworks/Pycurl.qll index 7280eec5f61..030e6c66f8d 100644 --- a/python/ql/lib/semmle/python/frameworks/Pycurl.qll +++ b/python/ql/lib/semmle/python/frameworks/Pycurl.qll @@ -7,6 +7,7 @@ */ private import python +private import semmle.python.controlflow.internal.Cfg as Cfg private import semmle.python.Concepts private import semmle.python.ApiGraphs private import semmle.python.frameworks.data.ModelsAsData @@ -56,7 +57,7 @@ module Pycurl { { OutgoingRequestCall() { this = setopt().getACall() and - this.getArg(0).asCfgNode().(AttrNode).getName() = "URL" + this.getArg(0).asCfgNode().(Cfg::AttrNode).getName() = "URL" } override DataFlow::Node getAUrlPart() { @@ -81,7 +82,7 @@ module Pycurl { private class CurlSslCall extends Http::Client::Request::Range instanceof DataFlow::CallCfgNode { CurlSslCall() { this = setopt().getACall() and - this.getArg(0).asCfgNode().(AttrNode).getName() = ["SSL_VERIFYPEER", "SSL_VERIFYHOST"] + this.getArg(0).asCfgNode().(Cfg::AttrNode).getName() = ["SSL_VERIFYPEER", "SSL_VERIFYHOST"] } override DataFlow::Node getAUrlPart() { none() } diff --git a/python/ql/lib/semmle/python/frameworks/Pydantic.qll b/python/ql/lib/semmle/python/frameworks/Pydantic.qll index c3d76835b42..1aa5c9142e6 100644 --- a/python/ql/lib/semmle/python/frameworks/Pydantic.qll +++ b/python/ql/lib/semmle/python/frameworks/Pydantic.qll @@ -7,6 +7,7 @@ */ private import python +private import semmle.python.controlflow.internal.Cfg as Cfg private import semmle.python.dataflow.new.DataFlow private import semmle.python.dataflow.new.TaintTracking private import semmle.python.Concepts @@ -93,7 +94,7 @@ module Pydantic { // be a Pydantic model. So `model[0]` will be an overapproximation, but should not // really cause problems (since we don't expect real code to contain such accesses) nodeFrom = instance() and - nodeTo.asCfgNode().(SubscriptNode).getObject() = nodeFrom.asCfgNode() + nodeTo.asCfgNode().(Cfg::SubscriptNode).getObject() = nodeFrom.asCfgNode() } /** diff --git a/python/ql/lib/semmle/python/frameworks/Pyramid.qll b/python/ql/lib/semmle/python/frameworks/Pyramid.qll index 63e19363fe8..2bd72a85253 100644 --- a/python/ql/lib/semmle/python/frameworks/Pyramid.qll +++ b/python/ql/lib/semmle/python/frameworks/Pyramid.qll @@ -166,7 +166,9 @@ module Pyramid { /** A response returned by a view callable. */ private class PyramidReturnResponse extends Http::Server::HttpResponse::Range { PyramidReturnResponse() { - this.asCfgNode() = any(View::ViewCallable vc).getAReturnValueFlowNode() and + exists(View::ViewCallable vc, Return ret | + ret.getScope() = vc and this.asCfgNode().getNode() = ret.getValue() + ) and not this = instance() } diff --git a/python/ql/lib/semmle/python/frameworks/Stdlib.qll b/python/ql/lib/semmle/python/frameworks/Stdlib.qll index 5d3b994880a..d059c59e9af 100644 --- a/python/ql/lib/semmle/python/frameworks/Stdlib.qll +++ b/python/ql/lib/semmle/python/frameworks/Stdlib.qll @@ -6,6 +6,7 @@ overlay[local?] module; private import python +private import semmle.python.controlflow.internal.Cfg as Cfg private import semmle.python.dataflow.new.DataFlow private import semmle.python.dataflow.new.TaintTracking private import semmle.python.dataflow.new.RemoteFlowSources @@ -1246,7 +1247,7 @@ module StdlibPrivate { /** An additional taint step for calls to `os.path.join` */ private class OsPathJoinCallAdditionalTaintStep extends TaintTracking::AdditionalTaintStep { override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) { - exists(CallNode call | + exists(Cfg::CallNode call | nodeTo.asCfgNode() = call and call = OS::OsPath::join().getACall().asCfgNode() and call.getAnArg() = nodeFrom.asCfgNode() @@ -1317,13 +1318,13 @@ module StdlibPrivate { // run, so if we're able to, we only mark the first element as the command // (and not the arguments to the command). // - result.asCfgNode() = arg_args.asCfgNode().(SequenceNode).getElement(0) + result.asCfgNode() = arg_args.asCfgNode().(Cfg::SequenceNode).getElement(0) or // Either the "args" argument is not a sequence (which is valid) or we where // just not able to figure it out. Simply mark the "args" argument as the // command. // - not arg_args.asCfgNode() instanceof SequenceNode and + not arg_args.asCfgNode() instanceof Cfg::SequenceNode and result = arg_args ) ) @@ -1542,7 +1543,7 @@ module StdlibPrivate { * See https://docs.python.org/3/library/functions.html#eval */ private class BuiltinsEvalCall extends CodeExecution::Range, DataFlow::CallCfgNode { - override CallNode node; + override Cfg::CallNode node; BuiltinsEvalCall() { this = API::builtin("eval").getACall() } @@ -1923,7 +1924,7 @@ module StdlibPrivate { nodeFrom = instance().getAValueReachableFromSource() and nodeTo = [getvalueRef(), getfirstRef(), getlistRef()].getAValueReachableFromSource() or - nodeFrom.asCfgNode() = nodeTo.asCfgNode().(CallNode).getFunction() and + nodeFrom.asCfgNode() = nodeTo.asCfgNode().(Cfg::CallNode).getFunction() and ( nodeFrom = getvalueRef().getAValueReachableFromSource() and nodeTo = getvalueResult().asSource() @@ -1939,7 +1940,7 @@ module StdlibPrivate { nodeFrom in [ instance().getAValueReachableFromSource(), fieldList().getAValueReachableFromSource() ] and - nodeTo.asCfgNode().(SubscriptNode).getObject() = nodeFrom.asCfgNode() + nodeTo.asCfgNode().(Cfg::SubscriptNode).getObject() = nodeFrom.asCfgNode() or // Attributes on Field nodeFrom = field().getAValueReachableFromSource() and @@ -2254,8 +2255,8 @@ module StdlibPrivate { DataFlow::CfgNode { WsgirefSimpleServerApplicationReturn() { - exists(WsgirefSimpleServerApplication requestHandler | - node = requestHandler.getAReturnValueFlowNode() + exists(WsgirefSimpleServerApplication requestHandler, Return ret | + ret.getScope() = requestHandler and node.getNode() = ret.getValue() ) } @@ -2337,9 +2338,9 @@ module StdlibPrivate { DataFlow::Node value; HeaderWriteSubscript() { - exists(SubscriptNode subscript | + exists(Cfg::SubscriptNode subscript | this.asCfgNode() = subscript and - value.asCfgNode() = subscript.(DefinitionNode).getValue() and + value.asCfgNode() = subscript.(Cfg::DefinitionNode).getValue() and name.asCfgNode() = subscript.getIndex() and subscript.getObject() = instance().asCfgNode() ) @@ -2681,7 +2682,7 @@ module StdlibPrivate { or // Data injection // Special handling of the `/` operator - exists(BinaryExprNode slash, DataFlow::Node pathOperand, DataFlow::TypeTracker t2 | + exists(Cfg::BinaryExprNode slash, DataFlow::Node pathOperand, DataFlow::TypeTracker t2 | slash.getOp() instanceof Div and pathOperand.asCfgNode() = slash.getAnOperand() and pathlibPath(t2).flowsTo(pathOperand) and @@ -2806,7 +2807,7 @@ module StdlibPrivate { pathlibPath().flowsTo(nodeTo) and ( // Special handling of the `/` operator - exists(BinaryExprNode slash, DataFlow::Node pathOperand | + exists(Cfg::BinaryExprNode slash, DataFlow::Node pathOperand | slash.getOp() instanceof Div and pathOperand.asCfgNode() = slash.getAnOperand() and pathlibPath().flowsTo(pathOperand) @@ -4605,9 +4606,9 @@ module StdlibPrivate { } override predicate propagatesFlow(string input, string output, boolean preservesValue) { - exists(CallNode c, string name, ControlFlowNode n, DataFlow::AttributeContent ac | - c.getFunction().(NameNode).getId() = "replace" or - c.getFunction().(AttrNode).getName() = "replace" + exists(Cfg::CallNode c, string name, Cfg::ControlFlowNode n, DataFlow::AttributeContent ac | + c.getFunction().(Cfg::NameNode).getId() = "replace" or + c.getFunction().(Cfg::AttrNode).getName() = "replace" | n = c.getArgByName(name) and ac.getAttribute() = name and @@ -5151,10 +5152,10 @@ module StdlibPrivate { * See https://docs.python.org/3.9/library/stdtypes.html#str.startswith */ private class StartswithCall extends Path::SafeAccessCheck::Range { - StartswithCall() { this.(CallNode).getFunction().(AttrNode).getName() = "startswith" } + StartswithCall() { this.(Cfg::CallNode).getFunction().(Cfg::AttrNode).getName() = "startswith" } - override predicate checks(ControlFlowNode node, boolean branch) { - node = this.(CallNode).getFunction().(AttrNode).getObject() and + override predicate checks(Cfg::ControlFlowNode node, boolean branch) { + node = this.(Cfg::CallNode).getFunction().(Cfg::AttrNode).getObject() and branch = true } } diff --git a/python/ql/lib/semmle/python/frameworks/Stdlib/Urllib.qll b/python/ql/lib/semmle/python/frameworks/Stdlib/Urllib.qll index 6b5764e5592..af670c009b6 100644 --- a/python/ql/lib/semmle/python/frameworks/Stdlib/Urllib.qll +++ b/python/ql/lib/semmle/python/frameworks/Stdlib/Urllib.qll @@ -8,6 +8,7 @@ */ private import python +private import semmle.python.controlflow.internal.Cfg as Cfg private import semmle.python.Concepts private import semmle.python.ApiGraphs private import semmle.python.security.dataflow.UrlRedirectCustomizations @@ -91,7 +92,7 @@ private module Urllib { * A read of the `netloc` attribute of a parsed URL as returned by `urllib.parse.urlparse`, * which is being checked in a way that is relevant for URL redirection vulnerabilities. */ - private predicate netlocCheck(DataFlow::GuardNode g, ControlFlowNode node, boolean branch) { + private predicate netlocCheck(DataFlow::GuardNode g, Cfg::ControlFlowNode node, boolean branch) { exists(DataFlow::CallCfgNode urlParseCall, DataFlow::AttrRead netlocRead | urlParseCall = getUrlParseCall() and netlocRead = urlParseCall.getAnAttributeRead("netloc") and diff --git a/python/ql/lib/semmle/python/frameworks/Tornado.qll b/python/ql/lib/semmle/python/frameworks/Tornado.qll index 61cf7df316e..3e57cdd3108 100644 --- a/python/ql/lib/semmle/python/frameworks/Tornado.qll +++ b/python/ql/lib/semmle/python/frameworks/Tornado.qll @@ -4,6 +4,7 @@ */ private import python +private import semmle.python.controlflow.internal.Cfg as Cfg private import semmle.python.dataflow.new.DataFlow private import semmle.python.dataflow.new.RemoteFlowSources private import semmle.python.dataflow.new.TaintTracking @@ -72,9 +73,9 @@ module Tornado { DataFlow::Node value; TornadoHeaderSubscriptWrite() { - exists(SubscriptNode subscript | + exists(Cfg::SubscriptNode subscript | subscript.getObject() = instance().asCfgNode() and - value.asCfgNode() = subscript.(DefinitionNode).getValue() and + value.asCfgNode() = subscript.(Cfg::DefinitionNode).getValue() and index.asCfgNode() = subscript.getIndex() and this.asCfgNode() = subscript ) @@ -422,7 +423,7 @@ module Tornado { // be able to do something more structured for providing modeling of the members // of a container-object. exists(DataFlow::AttrRead files | files.accesses(instance(), "cookies") | - this.asCfgNode().(SubscriptNode).getObject() = files.asCfgNode() + this.asCfgNode().(Cfg::SubscriptNode).getObject() = files.asCfgNode() or this.(DataFlow::MethodCallNode).calls(files, "get") ) @@ -479,20 +480,20 @@ module Tornado { // routing // --------------------------------------------------------------------------- /** Gets a sequence that defines a number of route rules */ - SequenceNode routeSetupRuleList() { - exists(CallNode call | + Cfg::SequenceNode routeSetupRuleList() { + exists(Cfg::CallNode call | call = any(TornadoModule::Web::Application::ClassInstantiation c).asCfgNode() | result in [call.getArg(0), call.getArgByName("handlers")] ) or - exists(CallNode call | + exists(Cfg::CallNode call | call.getFunction() = TornadoModule::Web::Application::add_handlers().asCfgNode() | result in [call.getArg(1), call.getArgByName("host_handlers")] ) or - result = routeSetupRuleList().getElement(_).(TupleNode).getElement(1) + result = routeSetupRuleList().getElement(_).(Cfg::TupleNode).getElement(1) } /** A tornado route setup. */ @@ -515,12 +516,12 @@ module Tornado { /** A route setup using a tuple. */ private class TornadoTupleRouteSetup extends TornadoRouteSetup, DataFlow::CfgNode { - override TupleNode node; + override Cfg::TupleNode node; TornadoTupleRouteSetup() { node = routeSetupRuleList().getElement(_) and count(node.getElement(_)) = 2 and - not node.getElement(1) instanceof SequenceNode + not node.getElement(1) instanceof Cfg::SequenceNode } override DataFlow::Node getUrlPatternArg() { result.asCfgNode() = node.getElement(0) } diff --git a/python/ql/lib/semmle/python/frameworks/Twisted.qll b/python/ql/lib/semmle/python/frameworks/Twisted.qll index 60aedd8fb58..6d32bf42ef1 100644 --- a/python/ql/lib/semmle/python/frameworks/Twisted.qll +++ b/python/ql/lib/semmle/python/frameworks/Twisted.qll @@ -182,7 +182,9 @@ private module Twisted { DataFlow::CfgNode { TwistedResourceRenderMethodReturn() { - this.asCfgNode() = any(TwistedResourceRenderMethod meth).getAReturnValueFlowNode() + exists(TwistedResourceRenderMethod meth, Return ret | + ret.getScope() = meth and this.asCfgNode().getNode() = ret.getValue() + ) } override DataFlow::Node getBody() { result = this } diff --git a/python/ql/lib/semmle/python/frameworks/Werkzeug.qll b/python/ql/lib/semmle/python/frameworks/Werkzeug.qll index d9150c8cfec..d88171275da 100644 --- a/python/ql/lib/semmle/python/frameworks/Werkzeug.qll +++ b/python/ql/lib/semmle/python/frameworks/Werkzeug.qll @@ -6,6 +6,7 @@ */ private import python +private import semmle.python.controlflow.internal.Cfg as Cfg private import semmle.python.dataflow.new.DataFlow private import semmle.python.dataflow.new.TaintTracking private import semmle.python.ApiGraphs @@ -221,9 +222,9 @@ module Werkzeug { DataFlow::Node value; HeaderWriteSubscript() { - exists(SubscriptNode subscript | + exists(Cfg::SubscriptNode subscript | this.asCfgNode() = subscript and - value.asCfgNode() = subscript.(DefinitionNode).getValue() and + value.asCfgNode() = subscript.(Cfg::DefinitionNode).getValue() and name.asCfgNode() = subscript.getIndex() and subscript.getObject() = instance().asCfgNode() ) diff --git a/python/ql/lib/semmle/python/frameworks/Yaml.qll b/python/ql/lib/semmle/python/frameworks/Yaml.qll index 670fad75e6e..8c5601f5a71 100644 --- a/python/ql/lib/semmle/python/frameworks/Yaml.qll +++ b/python/ql/lib/semmle/python/frameworks/Yaml.qll @@ -8,6 +8,7 @@ */ private import python +private import semmle.python.controlflow.internal.Cfg as Cfg private import semmle.python.dataflow.new.DataFlow private import semmle.python.Concepts private import semmle.python.ApiGraphs @@ -28,7 +29,7 @@ private module Yaml { * See https://pyyaml.org/wiki/PyYAMLDocumentation (you will have to scroll down). */ private class YamlLoadCall extends Decoding::Range, DataFlow::CallCfgNode { - override CallNode node; + override Cfg::CallNode node; string func_name; YamlLoadCall() { diff --git a/python/ql/lib/semmle/python/frameworks/Yarl.qll b/python/ql/lib/semmle/python/frameworks/Yarl.qll index a1c602e6016..67007576433 100644 --- a/python/ql/lib/semmle/python/frameworks/Yarl.qll +++ b/python/ql/lib/semmle/python/frameworks/Yarl.qll @@ -4,6 +4,7 @@ */ private import python +private import semmle.python.controlflow.internal.Cfg as Cfg private import semmle.python.dataflow.new.DataFlow private import semmle.python.dataflow.new.TaintTracking private import semmle.python.Concepts @@ -111,7 +112,7 @@ module Yarl { } private predicate yarlUrlIsAbsoluteCall( - DataFlow::GuardNode g, ControlFlowNode node, boolean branch + DataFlow::GuardNode g, Cfg::ControlFlowNode node, boolean branch ) { exists(ClassInstantiation instance, DataFlow::MethodCallNode call | call.calls(instance, "is_absolute") and diff --git a/python/ql/lib/semmle/python/regexp/internal/ParseRegExp.qll b/python/ql/lib/semmle/python/regexp/internal/ParseRegExp.qll index d91c4bbd78c..23031c30772 100644 --- a/python/ql/lib/semmle/python/regexp/internal/ParseRegExp.qll +++ b/python/ql/lib/semmle/python/regexp/internal/ParseRegExp.qll @@ -3,6 +3,7 @@ */ import python +private import semmle.python.controlflow.internal.Cfg as Cfg private import semmle.python.dataflow.new.DataFlow private import semmle.python.Concepts as Concepts private import semmle.python.regex @@ -78,7 +79,7 @@ private module FindRegexMode { t.start() and exists(API::Node flag | flag_name = canonical_name(flag) and result = flag.asSource()) or - exists(BinaryExprNode binop, DataFlow::Node operand | + exists(Cfg::BinaryExprNode binop, DataFlow::Node operand | operand.getALocalSource() = re_flag_tracker(flag_name, t.continue()) and operand.asCfgNode() = binop.getAnOperand() and (binop.getOp() instanceof BitOr or binop.getOp() instanceof Add) and diff --git a/python/ql/lib/semmle/python/security/dataflow/UrlRedirectCustomizations.qll b/python/ql/lib/semmle/python/security/dataflow/UrlRedirectCustomizations.qll index 75a638fc3a4..38b05ecf05d 100644 --- a/python/ql/lib/semmle/python/security/dataflow/UrlRedirectCustomizations.qll +++ b/python/ql/lib/semmle/python/security/dataflow/UrlRedirectCustomizations.qll @@ -5,6 +5,7 @@ */ private import python +private import semmle.python.controlflow.internal.Cfg as Cfg private import semmle.python.dataflow.new.DataFlow private import semmle.python.Concepts private import semmle.python.ApiGraphs @@ -111,7 +112,7 @@ module UrlRedirect { // Url redirection is a problem only if the user controls the prefix of the URL. // TODO: This is a copy of the taint-sanitizer from the old points-to query, which doesn't // cover formatting. - exists(BinaryExprNode string_concat | string_concat.getOp() instanceof Add | + exists(Cfg::BinaryExprNode string_concat | string_concat.getOp() instanceof Add | string_concat.getRight() = this.asCfgNode() ) } diff --git a/python/ql/lib/utils/test/dataflow/MaximalFlowTest.qll b/python/ql/lib/utils/test/dataflow/MaximalFlowTest.qll index cbd3b4c6aa5..f9b1b1c676d 100644 --- a/python/ql/lib/utils/test/dataflow/MaximalFlowTest.qll +++ b/python/ql/lib/utils/test/dataflow/MaximalFlowTest.qll @@ -1,4 +1,5 @@ import python +private import semmle.python.controlflow.internal.Cfg as Cfg import semmle.python.dataflow.new.DataFlow private import semmle.python.dataflow.new.internal.DataFlowPrivate import FlowTest @@ -23,7 +24,7 @@ import MakeTest> module MaximalFlowsConfig implements DataFlow::ConfigSig { predicate isSource(DataFlow::Node node) { exists(node.getLocation().getFile().getRelativePath()) and - not node.asCfgNode() instanceof CallNode and + not node.asCfgNode() instanceof Cfg::CallNode and not node.asCfgNode().getNode() instanceof Return and not node instanceof DataFlow::ParameterNode and not node instanceof DataFlow::PostUpdateNode and @@ -34,9 +35,9 @@ module MaximalFlowsConfig implements DataFlow::ConfigSig { predicate isSink(DataFlow::Node node) { exists(node.getLocation().getFile().getRelativePath()) and - not any(CallNode c).getArg(_) = node.asCfgNode() and + not any(Cfg::CallNode c).getArg(_) = node.asCfgNode() and not isArgumentNode(node, _, _) and - not node.asCfgNode().(NameNode).getId().matches("SINK%") and + not node.asCfgNode().(Cfg::NameNode).getId().matches("SINK%") and not DataFlow::localFlowStep(node, _) } } diff --git a/python/ql/lib/utils/test/dataflow/NormalDataflowTest.qll b/python/ql/lib/utils/test/dataflow/NormalDataflowTest.qll index 696b43d5f03..6c27cdc2523 100644 --- a/python/ql/lib/utils/test/dataflow/NormalDataflowTest.qll +++ b/python/ql/lib/utils/test/dataflow/NormalDataflowTest.qll @@ -1,4 +1,5 @@ import python +private import semmle.python.controlflow.internal.Cfg as Cfg import utils.test.dataflow.FlowTest import utils.test.dataflow.testConfig private import semmle.python.dataflow.new.internal.PrintNode @@ -19,7 +20,7 @@ query predicate missingAnnotationOnSink(Location location, string error, string TestConfig::isSink(sink) and // note: we only care about `SINK` and not `SINK_F`, so we have to reconstruct manually. exists(DataFlow::CallCfgNode call | - call.getFunction().asCfgNode().(NameNode).getId() = "SINK" and + call.getFunction().asCfgNode().(Cfg::NameNode).getId() = "SINK" and (sink = call.getArg(_) or sink = call.getArgByName(_)) ) and location = sink.getLocation() and diff --git a/python/ql/lib/utils/test/dataflow/NormalTaintTrackingTest.qll b/python/ql/lib/utils/test/dataflow/NormalTaintTrackingTest.qll index 753f8f61e13..df2f35c48fe 100644 --- a/python/ql/lib/utils/test/dataflow/NormalTaintTrackingTest.qll +++ b/python/ql/lib/utils/test/dataflow/NormalTaintTrackingTest.qll @@ -1,4 +1,5 @@ import python +private import semmle.python.controlflow.internal.Cfg as Cfg import utils.test.dataflow.FlowTest import utils.test.dataflow.testTaintConfig private import semmle.python.dataflow.new.internal.PrintNode @@ -18,7 +19,7 @@ query predicate missingAnnotationOnSink(Location location, string error, string exists(DataFlow::Node sink | exists(DataFlow::CallCfgNode call | // note: we only care about `SINK` and not `SINK_F`, so we have to reconstruct manually. - call.getFunction().asCfgNode().(NameNode).getId() = "SINK" and + call.getFunction().asCfgNode().(Cfg::NameNode).getId() = "SINK" and (sink = call.getArg(_) or sink = call.getArgByName(_)) ) and location = sink.getLocation() and diff --git a/python/ql/lib/utils/test/dataflow/RoutingTest.qll b/python/ql/lib/utils/test/dataflow/RoutingTest.qll index e7ac4e87247..ffa52dbd550 100644 --- a/python/ql/lib/utils/test/dataflow/RoutingTest.qll +++ b/python/ql/lib/utils/test/dataflow/RoutingTest.qll @@ -1,4 +1,5 @@ import python +private import semmle.python.controlflow.internal.Cfg as Cfg import semmle.python.dataflow.new.DataFlow import utils.test.InlineExpectationsTest private import semmle.python.dataflow.new.internal.PrintNode @@ -49,7 +50,7 @@ private string fromValue(DataFlow::Node fromNode) { pragma[inline] private string fromFunc(DataFlow::ArgumentNode fromNode) { - result = fromNode.getCall().getNode().(CallNode).getFunction().getNode().(Name).getId() + result = fromNode.getCall().getNode().(Cfg::CallNode).getFunction().getNode().(Name).getId() } pragma[inline] diff --git a/python/ql/lib/utils/test/dataflow/UnresolvedCalls.qll b/python/ql/lib/utils/test/dataflow/UnresolvedCalls.qll index a4dfb07ee90..d48f9844938 100644 --- a/python/ql/lib/utils/test/dataflow/UnresolvedCalls.qll +++ b/python/ql/lib/utils/test/dataflow/UnresolvedCalls.qll @@ -1,15 +1,16 @@ import python +private import semmle.python.controlflow.internal.Cfg as Cfg private import semmle.python.dataflow.new.internal.PrintNode private import semmle.python.dataflow.new.internal.DataFlowPrivate as DataFlowPrivate private import semmle.python.ApiGraphs import utils.test.InlineExpectationsTest signature module UnresolvedCallExpectationsSig { - predicate unresolvedCall(CallNode call); + predicate unresolvedCall(Cfg::CallNode call); } module DefaultUnresolvedCallExpectations implements UnresolvedCallExpectationsSig { - predicate unresolvedCall(CallNode call) { + predicate unresolvedCall(Cfg::CallNode call) { not exists(DataFlowPrivate::DataFlowCall dfc | exists(dfc.getCallable()) and dfc.getNode() = call ) and @@ -24,7 +25,7 @@ module MakeUnresolvedCallExpectations { predicate hasActualResult(Location location, string element, string tag, string value) { exists(location.getFile().getRelativePath()) and - exists(CallNode call | Impl::unresolvedCall(call) | + exists(Cfg::CallNode call | Impl::unresolvedCall(call) | location = call.getLocation() and tag = "unresolved_call" and value = prettyExpr(call.getNode()) and diff --git a/python/ql/lib/utils/test/dataflow/testConfig.qll b/python/ql/lib/utils/test/dataflow/testConfig.qll index 552180eeaaf..cfdf1e1a7c9 100644 --- a/python/ql/lib/utils/test/dataflow/testConfig.qll +++ b/python/ql/lib/utils/test/dataflow/testConfig.qll @@ -21,11 +21,12 @@ */ private import python +private import semmle.python.controlflow.internal.Cfg as Cfg import semmle.python.dataflow.new.DataFlow module TestConfig implements DataFlow::ConfigSig { predicate isSource(DataFlow::Node node) { - node.(DataFlow::CfgNode).getNode().(NameNode).getId() = "SOURCE" + node.(DataFlow::CfgNode).getNode().(Cfg::NameNode).getId() = "SOURCE" or node.(DataFlow::CfgNode).getNode().getNode().(StringLiteral).getS() = "source" or @@ -37,7 +38,7 @@ module TestConfig implements DataFlow::ConfigSig { predicate isSink(DataFlow::Node node) { exists(DataFlow::CallCfgNode call | - call.getFunction().asCfgNode().(NameNode).getId() in ["SINK", "SINK_F"] and + call.getFunction().asCfgNode().(Cfg::NameNode).getId() in ["SINK", "SINK_F"] and (node = call.getArg(_) or node = call.getArgByName(_)) and not node = call.getArgByName("not_present_at_runtime") ) diff --git a/python/ql/lib/utils/test/dataflow/testTaintConfig.qll b/python/ql/lib/utils/test/dataflow/testTaintConfig.qll index c9770600eeb..b784042901e 100644 --- a/python/ql/lib/utils/test/dataflow/testTaintConfig.qll +++ b/python/ql/lib/utils/test/dataflow/testTaintConfig.qll @@ -21,12 +21,13 @@ */ private import python +private import semmle.python.controlflow.internal.Cfg as Cfg import semmle.python.dataflow.new.DataFlow import semmle.python.dataflow.new.TaintTracking module TestConfig implements DataFlow::ConfigSig { predicate isSource(DataFlow::Node node) { - node.(DataFlow::CfgNode).getNode().(NameNode).getId() = "SOURCE" + node.(DataFlow::CfgNode).getNode().(Cfg::NameNode).getId() = "SOURCE" or node.(DataFlow::CfgNode).getNode().getNode().(StringLiteral).getS() = "source" or @@ -37,8 +38,8 @@ module TestConfig implements DataFlow::ConfigSig { } predicate isSink(DataFlow::Node node) { - exists(CallNode call | - call.getFunction().(NameNode).getId() in ["SINK", "SINK_F"] and + exists(Cfg::CallNode call | + call.getFunction().(Cfg::NameNode).getId() in ["SINK", "SINK_F"] and node.(DataFlow::CfgNode).getNode() = call.getAnArg() ) } diff --git a/python/ql/test/experimental/meta/InlineTaintTest.qll b/python/ql/test/experimental/meta/InlineTaintTest.qll index 525775d5106..5c20a9913be 100644 --- a/python/ql/test/experimental/meta/InlineTaintTest.qll +++ b/python/ql/test/experimental/meta/InlineTaintTest.qll @@ -10,6 +10,7 @@ */ import python +private import semmle.python.controlflow.internal.Cfg as Cfg import semmle.python.dataflow.new.DataFlow import semmle.python.dataflow.new.TaintTracking import semmle.python.dataflow.new.RemoteFlowSources @@ -19,14 +20,14 @@ private import semmle.python.Concepts DataFlow::Node shouldBeTainted() { exists(DataFlow::CallCfgNode call | - call.getFunction().asCfgNode().(NameNode).getId() = "ensure_tainted" and + call.getFunction().asCfgNode().(Cfg::NameNode).getId() = "ensure_tainted" and result in [call.getArg(_), call.getArgByName(_)] ) } DataFlow::Node shouldNotBeTainted() { exists(DataFlow::CallCfgNode call | - call.getFunction().asCfgNode().(NameNode).getId() = "ensure_not_tainted" and + call.getFunction().asCfgNode().(Cfg::NameNode).getId() = "ensure_not_tainted" and result in [call.getArg(_), call.getArgByName(_)] ) } @@ -36,13 +37,13 @@ DataFlow::Node shouldNotBeTainted() { module Conf { module TestTaintTrackingConfig implements DataFlow::ConfigSig { predicate isSource(DataFlow::Node source) { - source.asCfgNode().(NameNode).getId() in [ + source.asCfgNode().(Cfg::NameNode).getId() in [ "TAINTED_STRING", "TAINTED_BYTES", "TAINTED_LIST", "TAINTED_DICT" ] or // User defined sources - exists(CallNode call | - call.getFunction().(NameNode).getId() = "taint" and + exists(Cfg::CallNode call | + call.getFunction().(Cfg::NameNode).getId() = "taint" and source.(DataFlow::CfgNode).getNode() = call.getAnArg() ) or diff --git a/python/ql/test/library-tests/dataflow/summaries/TestSummaries.qll b/python/ql/test/library-tests/dataflow/summaries/TestSummaries.qll index 14d68455d62..87e1d787101 100644 --- a/python/ql/test/library-tests/dataflow/summaries/TestSummaries.qll +++ b/python/ql/test/library-tests/dataflow/summaries/TestSummaries.qll @@ -2,6 +2,7 @@ overlay[local?] module; private import python +private import semmle.python.controlflow.internal.Cfg as Cfg private import semmle.python.dataflow.new.FlowSummary private import semmle.python.ApiGraphs @@ -17,7 +18,7 @@ module RecursionGuard { RecursionGuard() { this = "RecursionGuard" } override DataFlow::CallCfgNode getACall() { - result.getFunction().asCfgNode().(NameNode).getId() = this and + result.getFunction().asCfgNode().(Cfg::NameNode).getId() = this and (TT::callStep(_, _) implies any()) } @@ -33,7 +34,7 @@ private class SummarizedCallableIdentity extends SummarizedCallable::Range { SummarizedCallableIdentity() { this = "identity" } override DataFlow::CallCfgNode getACall() { - result.getFunction().asCfgNode().(NameNode).getId() = this + result.getFunction().asCfgNode().(Cfg::NameNode).getId() = this } override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this } @@ -50,7 +51,7 @@ private class SummarizedCallableApplyLambda extends SummarizedCallable::Range { SummarizedCallableApplyLambda() { this = "apply_lambda" } override DataFlow::CallCfgNode getACall() { - result.getFunction().asCfgNode().(NameNode).getId() = this + result.getFunction().asCfgNode().(Cfg::NameNode).getId() = this } override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this } @@ -70,7 +71,7 @@ private class SummarizedCallableReversed extends SummarizedCallable::Range { SummarizedCallableReversed() { this = "list_reversed" } override DataFlow::CallCfgNode getACall() { - result.getFunction().asCfgNode().(NameNode).getId() = this + result.getFunction().asCfgNode().(Cfg::NameNode).getId() = this } override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this } @@ -86,7 +87,7 @@ private class SummarizedCallableMap extends SummarizedCallable::Range { SummarizedCallableMap() { this = "list_map" } override DataFlow::CallCfgNode getACall() { - result.getFunction().asCfgNode().(NameNode).getId() = this + result.getFunction().asCfgNode().(Cfg::NameNode).getId() = this } override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this } @@ -106,7 +107,7 @@ private class SummarizedCallableAppend extends SummarizedCallable::Range { SummarizedCallableAppend() { this = "append_to_list" } override DataFlow::CallCfgNode getACall() { - result.getFunction().asCfgNode().(NameNode).getId() = this + result.getFunction().asCfgNode().(Cfg::NameNode).getId() = this } override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this } diff --git a/python/ql/test/library-tests/dataflow/tainttracking/TestTaintLib.qll b/python/ql/test/library-tests/dataflow/tainttracking/TestTaintLib.qll index 67a9f576cc7..f46f08aa509 100644 --- a/python/ql/test/library-tests/dataflow/tainttracking/TestTaintLib.qll +++ b/python/ql/test/library-tests/dataflow/tainttracking/TestTaintLib.qll @@ -1,4 +1,5 @@ import python +private import semmle.python.controlflow.internal.Cfg as Cfg import semmle.python.dataflow.new.TaintTracking import semmle.python.dataflow.new.DataFlow private import semmle.python.dataflow.new.internal.PrintNode @@ -6,20 +7,20 @@ private import semmle.python.dataflow.new.internal.PrintNode module TestTaintTrackingConfig implements DataFlow::ConfigSig { predicate isSource(DataFlow::Node source) { // Standard sources - source.(DataFlow::CfgNode).getNode().(NameNode).getId() in [ + source.(DataFlow::CfgNode).getNode().(Cfg::NameNode).getId() in [ "TAINTED_STRING", "TAINTED_BYTES", "TAINTED_LIST", "TAINTED_DICT" ] or // User defined sources - exists(CallNode call | - call.getFunction().(NameNode).getId() = "taint" and + exists(Cfg::CallNode call | + call.getFunction().(Cfg::NameNode).getId() = "taint" and source.(DataFlow::CfgNode).getNode() = call.getAnArg() ) } predicate isSink(DataFlow::Node sink) { - exists(CallNode call | - call.getFunction().(NameNode).getId() in ["ensure_tainted", "ensure_not_tainted"] and + exists(Cfg::CallNode call | + call.getFunction().(Cfg::NameNode).getId() in ["ensure_tainted", "ensure_not_tainted"] and sink.(DataFlow::CfgNode).getNode() = call.getAnArg() ) } diff --git a/python/ql/test/library-tests/dataflow/typetracking-summaries/TestSummaries.qll b/python/ql/test/library-tests/dataflow/typetracking-summaries/TestSummaries.qll index 57e0013b6e0..4651d5c6180 100644 --- a/python/ql/test/library-tests/dataflow/typetracking-summaries/TestSummaries.qll +++ b/python/ql/test/library-tests/dataflow/typetracking-summaries/TestSummaries.qll @@ -2,6 +2,7 @@ overlay[local?] module; private import python +private import semmle.python.controlflow.internal.Cfg as Cfg private import semmle.python.dataflow.new.FlowSummary private import semmle.python.ApiGraphs @@ -17,7 +18,7 @@ module RecursionGuard { RecursionGuard() { this = "TypeTrackingSummariesRecursionGuard" } override DataFlow::CallCfgNode getACall() { - result.getFunction().asCfgNode().(NameNode).getId() = this and + result.getFunction().asCfgNode().(Cfg::NameNode).getId() = this and (TT::callStep(_, _) implies any()) } @@ -41,7 +42,7 @@ private class SummarizedCallableIdentity extends SummarizedCallable::Range { override DataFlow::CallCfgNode getACall() { none() } override DataFlow::CallCfgNode getACallSimple() { - result.getFunction().asCfgNode().(NameNode).getId() = this + result.getFunction().asCfgNode().(Cfg::NameNode).getId() = this } override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this } @@ -60,7 +61,7 @@ private class SummarizedCallableApplyLambda extends SummarizedCallable::Range { override DataFlow::CallCfgNode getACall() { none() } override DataFlow::CallCfgNode getACallSimple() { - result.getFunction().asCfgNode().(NameNode).getId() = this + result.getFunction().asCfgNode().(Cfg::NameNode).getId() = this } override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this } @@ -82,7 +83,7 @@ private class SummarizedCallableReversed extends SummarizedCallable::Range { override DataFlow::CallCfgNode getACall() { none() } override DataFlow::CallCfgNode getACallSimple() { - result.getFunction().asCfgNode().(NameNode).getId() = this + result.getFunction().asCfgNode().(Cfg::NameNode).getId() = this } override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this } @@ -100,7 +101,7 @@ private class SummarizedCallableMap extends SummarizedCallable::Range { override DataFlow::CallCfgNode getACall() { none() } override DataFlow::CallCfgNode getACallSimple() { - result.getFunction().asCfgNode().(NameNode).getId() = this + result.getFunction().asCfgNode().(Cfg::NameNode).getId() = this } override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this } @@ -122,7 +123,7 @@ private class SummarizedCallableAppend extends SummarizedCallable::Range { override DataFlow::CallCfgNode getACall() { none() } override DataFlow::CallCfgNode getACallSimple() { - result.getFunction().asCfgNode().(NameNode).getId() = this + result.getFunction().asCfgNode().(Cfg::NameNode).getId() = this } override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this } @@ -165,7 +166,7 @@ private class SummarizedCallableReadSecret extends SummarizedCallable::Range { override DataFlow::CallCfgNode getACall() { none() } override DataFlow::CallCfgNode getACallSimple() { - result.getFunction().asCfgNode().(NameNode).getId() = this + result.getFunction().asCfgNode().(Cfg::NameNode).getId() = this } override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this } @@ -183,7 +184,7 @@ private class SummarizedCallableSetSecret extends SummarizedCallable::Range { override DataFlow::CallCfgNode getACall() { none() } override DataFlow::CallCfgNode getACallSimple() { - result.getFunction().asCfgNode().(NameNode).getId() = this + result.getFunction().asCfgNode().(Cfg::NameNode).getId() = this } override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this }