Python: migrate dataflow library to new CFG + shared SSA

Switches the trunk dataflow library and all in-tree consumers
(frameworks, ApiGraphs, Concepts, regexp, security customisations,
test harness) from the legacy Flow.qll/ESSA stack to the new
shared-CFG facade (Cfg.qll) and the ESSA-shaped adapter on the
shared-SSA library (SsaImpl.qll).

Highlights:

  * DataFlowPublic/Private/Dispatch, Attributes, VariableCapture,
    IterableUnpacking, ImportResolution, ImportStar, LocalSources,
    TaintTrackingPrivate, MatchUnpacking, TypeTrackingImpl,
    SsaImpl, Builtins all now qualify CFG/SSA references with
    Cfg:: / SsaImpl:: and stop pulling in semmle.python.essa.*.

  * AstNodeImpl.qll/Cfg.qll: ImportMember exposes its inner
    ImportExpr, DefinitionNode.getValue covers Alias / AnnAssign /
    AugAssign / AssignExpr / For-target / Parameter-default,
    ForNode is treated as an expression node, AnnotatedExitNode is
    canonical, and BoolExprNode.getAnOperand drops the dominance
    constraint that did not hold for short-circuit BBs.

  * SsaImpl.qll: parameters always get a ParameterDefinition (so
    unused parameters still have SSA defs), scope-entry defs for
    module globals require an actual store somewhere, scope-exit
    has a synthetic use so reaching-defs survives to module
    boundary, and the legacy SsaSourceVariable / EssaVariable
    surface (getName, getScope, getAUse, getASourceUse,
    getAnImplicitUse) is reinstated for downstream queries.

  * DataFlowPublic.qll: GuardNode redesigned around the new
    structural outcome nodes (isAfterTrue / isAfterFalse).  The
    legacy ConditionBlock + flipped indirection is gone;
    controlsBlock walks UP through 'not' / '==True' / 'is False'
    etc. via outcomeOfGuard, accumulating polarity cleanly.  Only
    BarrierGuard<...> is preserved as public API.

  * ModuleVariableNode.getAWrite and LocalFlow::definitionFlowStep
    bypass SSA and consult Cfg::NameNode.defines /
    Cfg::DefinitionNode.getValue directly, so that write defs
    pruned by shared SSA (because the variable has no in-scope
    read) still produce dataflow steps.

  * Frameworks + downstream consumers: replace
    EssaVariable.hasDefiningNode, getAReturnValueFlowNode,
    Parameter.getDefault, Scope.getEntryNode / getANormalExit etc.
    with CFG-side bridges through Cfg::ControlFlowNode.

The legacy Flow.qll / Essa.qll stack is untouched and remains
available for queries that import it directly.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
yoff
2026-05-26 07:20:44 +00:00
parent cccd207ae7
commit ccfaa6ea7f
49 changed files with 824 additions and 471 deletions

View File

@@ -6,8 +6,9 @@
* directed and labeled; they specify how the components represented by nodes relate to each other.
*/
// Importing python under the `py` namespace to avoid importing `CallNode` from `Flow.qll` and thereby having a naming conflict with `API::CallNode`.
// Importing python under the `py` namespace to avoid importing `Cfg::CallNode` from `Flow.qll` and thereby having a naming conflict with `API::CallNode`.
private import python as PY
private import semmle.python.controlflow.internal.Cfg as Cfg
import semmle.python.dataflow.new.DataFlow
private import semmle.python.internal.CachedStages
@@ -282,7 +283,7 @@ module API {
index = this.getIndex() and
(
// subscripting
exists(PY::SubscriptNode subscript |
exists(Cfg::SubscriptNode subscript |
subscript.getObject() = this.getAValueReachableFromSource().asCfgNode() and
subscript.getIndex() = index.asSink().asCfgNode()
|
@@ -290,7 +291,7 @@ module API {
subscript = result.asSource().asCfgNode()
or
// writing
subscript.(PY::DefinitionNode).getValue() = result.asSink().asCfgNode()
subscript.(Cfg::DefinitionNode).getValue() = result.asSink().asCfgNode()
)
or
// dictionary literals
@@ -684,7 +685,7 @@ module API {
* Ignores relative imports, such as `from ..foo.bar import baz`.
*/
private predicate imports(DataFlow::CfgNode imp, string name) {
exists(PY::ImportExprNode iexpr |
exists(Cfg::ImportExprNode iexpr |
imp.getNode() = iexpr and
not iexpr.getNode().isRelative() and
name = iexpr.getNode().getImportedModuleName()
@@ -775,7 +776,7 @@ module API {
// list literals, from `x` to `[x]`
// TODO: once convenient, this should be done at a higher level than the AST,
// at least at the CFG layer, to take splitting into account.
// Also consider `SequenceNode for generality.
// Also consider `Cfg::SequenceNode for generality.
exists(PY::List list | list = pred.(DataFlow::ExprNode).getNode().getNode() |
rhs.(DataFlow::ExprNode).getNode().getNode() = list.getAnElt() and
lbl = Label::subscript()
@@ -805,7 +806,7 @@ module API {
subscript = trackUseNode(src).getSubscript(index)
|
// from `x` to a definition of `x[...]`
rhs.asCfgNode() = subscript.asCfgNode().(PY::DefinitionNode).getValue() and
rhs.asCfgNode() = subscript.asCfgNode().(Cfg::DefinitionNode).getValue() and
lbl = Label::subscript()
or
// from `x` to `"key"` in `x["key"]`

View File

@@ -5,6 +5,7 @@
*/
private import python
private import semmle.python.controlflow.internal.Cfg as Cfg
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.internal.DataFlowImplSpecific
private import semmle.python.dataflow.new.RemoteFlowSources
@@ -214,7 +215,7 @@ module Path {
SafeAccessCheck() { this = DataFlow::BarrierGuard<safeAccessCheck/3>::getABarrierNode() }
}
private predicate safeAccessCheck(DataFlow::GuardNode g, ControlFlowNode node, boolean branch) {
private predicate safeAccessCheck(DataFlow::GuardNode g, Cfg::ControlFlowNode node, boolean branch) {
g.(SafeAccessCheck::Range).checks(node, branch)
}
@@ -223,7 +224,7 @@ module Path {
/** A data-flow node that checks that a path is safe to access in some way, for example by having a controlled prefix. */
abstract class Range extends DataFlow::GuardNode {
/** Holds if this guard validates `node` upon evaluating to `branch`. */
abstract predicate checks(ControlFlowNode node, boolean branch);
abstract predicate checks(Cfg::ControlFlowNode node, boolean branch);
}
}
}

View File

@@ -10,6 +10,9 @@
* - Intermediate nodes for multi-operand boolean expressions.
*/
overlay[local?]
module;
private import python as Py
private import codeql.controlflow.ControlFlowGraph
private import codeql.controlflow.SuccessorType
@@ -1193,6 +1196,30 @@ module Ast implements AstSig<Py::Location> {
override AstNode getChild(int index) { index = 0 and result = this.getObject() }
}
/**
* An `import x.y` module expression. Modelled as a leaf — the dotted
* name is just a string.
*/
additional class ImportExpression extends Expr {
ImportExpression() { this.asExpr() instanceof Py::ImportExpr }
}
/**
* A `from m import x` member access. The module sub-expression is a
* child so that the CFG visits both the module load and this
* attribute selection.
*/
additional class ImportMemberExpr extends Expr {
private Py::ImportMember im;
ImportMemberExpr() { this = TPyExpr(im) }
/** Gets the module expression `m` in `from m import x`. */
Expr getModule() { result.asExpr() = im.getModule() }
override AstNode getChild(int index) { index = 0 and result = this.getModule() }
}
/** A tuple literal. */
additional class TupleExpr extends Expr {
private Py::Tuple tuple;
@@ -1581,4 +1608,6 @@ Py::AstNode astNodeToPyNode(Ast::AstNode n) {
result = n.asStmt()
or
result = n.asScope()
or
result = n.asPattern()
}

View File

@@ -20,6 +20,24 @@ module;
private import python as Py
private import semmle.python.controlflow.internal.AstNodeImpl as CfgImpl
private import codeql.controlflow.SuccessorType
private import codeql.controlflow.BasicBlock as BB
/**
* A nested sub-module that explicitly implements `BB::CfgSig`, so this
* `Cfg` facade can be passed to parameterised shared modules such as
* `codeql.dataflow.VariableCapture::Flow<L, Cfg, ...>`. The sub-module
* exposes the *raw* shared-CFG types from `AstNodeImpl.qll` (where the
* signature is satisfied natively), not the facade's wrapped types.
*/
module CfgSigImpl implements BB::CfgSig<Py::Location> {
class ControlFlowNode = CfgImpl::ControlFlowNode;
class BasicBlock = CfgImpl::BasicBlock;
class EntryBasicBlock = CfgImpl::Cfg::EntryBasicBlock;
predicate dominatingEdge = CfgImpl::Cfg::dominatingEdge/2;
}
/**
* Gets the Python AST node corresponding to CFG node `n`, if any.
@@ -51,6 +69,11 @@ private predicate isCanonical(CfgImpl::ControlFlowNode n) {
n instanceof CfgImpl::ControlFlow::EntryNode
or
n instanceof CfgImpl::ControlFlow::ExitNode
or
// Annotated exit nodes (normal + abnormal) — needed so that dataflow
// consumers can ask "is this the normal-exit of a scope?" and also
// so that scope-exit synthetic uses in SsaImpl can attach here.
n instanceof CfgImpl::ControlFlow::AnnotatedExitNode
}
/**
@@ -369,6 +392,35 @@ class BasicBlock extends CfgImpl::BasicBlock {
or
forex(BasicBlock immsucc | immsucc = super.getASuccessor() | immsucc.alwaysReaches(succ))
}
/**
* Holds if this basic block ends in a node that branches on a boolean
* outcome, and `other` is dominated by the corresponding successor
* for `branch` while not being reachable from the other branch
* without going through this BB.
*
* In other words: any execution that reaches `other` must have just
* evaluated the last node of this BB and taken the `branch` outcome.
* This mirrors the legacy `ConditionBlock.controls(BB, branch)`.
*/
predicate controls(BasicBlock other, boolean branch) {
exists(BasicBlock succ |
branch = true and succ = this.getATrueSuccessor()
or
branch = false and succ = this.getAFalseSuccessor()
|
succ.dominates(other) and
// The other branch must not also reach `other` — otherwise
// `other` is not actually controlled by `branch`.
not exists(BasicBlock otherSucc |
branch = true and otherSucc = this.getAFalseSuccessor()
or
branch = false and otherSucc = this.getATrueSuccessor()
|
otherSucc.reaches(other)
)
)
}
}
// ===========================================================================
@@ -734,8 +786,7 @@ class BoolExprNode extends ControlFlowNode {
ControlFlowNode getAnOperand() {
exists(Py::BoolExpr be |
be = toAst(this) and
be.getAValue() = toAst(result) and
result.getBasicBlock().dominates(this.getBasicBlock())
be.getAValue() = toAst(result)
)
}
}
@@ -766,20 +817,79 @@ class DefinitionNode extends ControlFlowNode {
/** Gets the value assigned, if any. */
ControlFlowNode getValue() {
exists(Py::Expr target, Py::Expr value |
target = toAst(this) and
value = toAst(result) and
result.getBasicBlock().dominates(this.getBasicBlock())
|
// x = value
exists(Py::Assign a | a.getATarget() = target and a.getValue() = value)
or
// x = y = value (nested chained-assign target)
exists(Py::Assign a | a.getATarget().(Py::Tuple).getElt(_) = target and a.getValue() = value)
// For-target: the value is the for-loop's iter expression (which
// is also where `Cfg::ForNode` lives — its `getNode()` returns the
// enclosing `Py::For` statement). Treated specially because there
// is no AST node holding the result of `iter(next(seq))`; we use
// the iter expression's CFG node as the stand-in.
exists(Py::For f |
f.getTarget() = toAst(this) and
toAst(result) = f.getIter()
)
or
exists(Py::AstNode value | value = assignedValue(toAst(this)) |
toAst(result) = value and
(
result.getBasicBlock().dominates(this.getBasicBlock())
or
result.isImport()
or
// The default value for a parameter is evaluated in the same basic block as
// the function definition, but the parameter belongs to the basic block of the
// function, so there is no dominance relationship between the two.
exists(Py::Parameter param | toAst(this) = param.asName())
)
)
}
}
/**
* Gets the AST node that holds the value assigned to `lhs` in a binding
* context. Mirrors `Flow.qll::assigned_value`.
*/
private Py::AstNode assignedValue(Py::Expr lhs) {
// lhs = result
exists(Py::Assign a | a.getATarget() = lhs and result = a.getValue())
or
// lhs := result
exists(Py::AssignExpr a | a.getTarget() = lhs and result = a.getValue())
or
// lhs: annotation = result
exists(Py::AnnAssign a | a.getTarget() = lhs and result = a.getValue())
or
// import result as lhs (also covers plain `import lhs`, where alias.getAsname() = lhs)
exists(Py::Alias a | a.getAsname() = lhs and result = a.getValue())
or
// lhs += x -> result is the (lhs + x) binary expression
exists(Py::AugAssign a, Py::BinaryExpr b |
b = a.getOperation() and result = b and lhs = b.getLeft()
)
or
// Nested sequence assign: ..., lhs, ... = ..., result, ...
exists(Py::Assign a | nestedSequenceAssign(a.getATarget(), a.getValue(), lhs, result))
or
// Parameter default
exists(Py::Parameter param | lhs = param.asName() and result = param.getDefault())
}
/**
* Helper for nested sequence assignments such as `(a, b), c = (1, 2), 3`.
*/
private predicate nestedSequenceAssign(
Py::Expr leftParent, Py::Expr rightParent, Py::Expr left, Py::Expr right
) {
exists(int i |
leftParent.(Py::Tuple).getElt(i) = left and rightParent.(Py::Tuple).getElt(i) = right
or
leftParent.(Py::List).getElt(i) = left and rightParent.(Py::List).getElt(i) = right
)
or
exists(Py::Expr leftMid, Py::Expr rightMid |
nestedSequenceAssign(leftParent, rightParent, leftMid, rightMid) and
nestedSequenceAssign(leftMid, rightMid, left, right)
)
}
/** A control flow node corresponding to a deletion (`del x`). */
class DeletionNode extends ControlFlowNode {
DeletionNode() { this.isDelete() }
@@ -789,8 +899,6 @@ class DeletionNode extends ControlFlowNode {
class ForNode extends ControlFlowNode {
ForNode() { exists(Py::For f | toAst(this) = f.getIter()) }
override Py::For getNode() { exists(Py::For f | toAst(this) = f.getIter() | result = f) }
/** Gets the iterable expression. */
ControlFlowNode getIter() {
result = this and result = result // canonical "after" of the iterable
@@ -943,8 +1051,15 @@ class DictNode extends ControlFlowNode {
/** A control flow node corresponding to an iterable in a `for` loop. */
class IterableNode extends ControlFlowNode {
IterableNode() {
exists(Py::For f | toAst(this) = f.getIter())
this instanceof SequenceNode
or
exists(Py::Comp c | toAst(this) = c.getIterable())
this instanceof SetNode
}
/** Gets the control flow node for an element of this iterable. */
ControlFlowNode getAnElement() {
result = this.(SequenceNode).getAnElement()
or
result = this.(SetNode).getAnElement()
}
}

View File

@@ -1,11 +1,12 @@
/** Provides commonly used BarrierGuards. */
private import python
private import semmle.python.controlflow.internal.Cfg as Cfg
private import semmle.python.dataflow.new.DataFlow
private predicate constCompare(DataFlow::GuardNode g, ControlFlowNode node, boolean branch) {
exists(CompareNode cn | cn = g |
exists(ImmutableLiteral const, Cmpop op, ControlFlowNode c |
private predicate constCompare(DataFlow::GuardNode g, Cfg::ControlFlowNode node, boolean branch) {
exists(Cfg::CompareNode cn | cn = g |
exists(ImmutableLiteral const, Cmpop op, Cfg::ControlFlowNode c |
c.getNode() = const and
(
op = any(Eq eq) and branch = true
@@ -18,7 +19,7 @@ private predicate constCompare(DataFlow::GuardNode g, ControlFlowNode node, bool
cn.operands(node, op, c)
)
or
exists(NameConstant const, Cmpop op, ControlFlowNode c |
exists(NameConstant const, Cmpop op, Cfg::ControlFlowNode c |
c.getNode() = const and
(
op = any(Is is_) and branch = true
@@ -31,12 +32,12 @@ private predicate constCompare(DataFlow::GuardNode g, ControlFlowNode node, bool
cn.operands(node, op, c)
)
or
exists(IterableNode const_iterable, Cmpop op |
exists(Cfg::IterableNode const_iterable, Cmpop op |
op = any(In in_) and branch = true
or
op = any(NotIn ni) and branch = false
|
forall(ControlFlowNode elem | elem = const_iterable.getAnElement() |
forall(Cfg::ControlFlowNode elem | elem = const_iterable.getAnElement() |
elem.getNode() instanceof ImmutableLiteral
) and
cn.operands(node, op, const_iterable)

View File

@@ -4,6 +4,7 @@
*/
private import python
private import semmle.python.controlflow.internal.Cfg as Cfg
private import semmle.python.dataflow.new.DataFlow
// Need to import `semmle.python.Frameworks` since frameworks can extend `SensitiveDataSource::Range`
private import semmle.python.Frameworks
@@ -105,7 +106,7 @@ private module SensitiveDataModeling {
or
// to cover functions that we don't have the definition for, and where the
// reference to the function has not already been marked as being sensitive
this.getFunction().asCfgNode().(NameNode).getId() = sensitiveString(classification)
this.getFunction().asCfgNode().(Cfg::NameNode).getId() = sensitiveString(classification)
}
override SensitiveDataClassification getClassification() { result = classification }
@@ -251,12 +252,12 @@ private module SensitiveDataModeling {
SensitiveDataClassification classification;
SensitiveVariableAssignment() {
exists(DefinitionNode def |
def.(NameNode).getId() = sensitiveString(classification) and
exists(Cfg::DefinitionNode def |
def.(Cfg::NameNode).getId() = sensitiveString(classification) and
(
this.asCfgNode() = def.getValue()
or
this.asCfgNode() = def.getValue().(ForNode).getSequence()
this.asCfgNode() = def.getValue().(Cfg::ForNode).getSequence()
) and
not this.asExpr() instanceof FunctionExpr and
not this.asExpr() instanceof ClassExpr
@@ -293,7 +294,7 @@ private module SensitiveDataModeling {
SensitiveDataClassification classification;
SensitiveSubscript() {
this.asCfgNode().(SubscriptNode).getIndex() =
this.asCfgNode().(Cfg::SubscriptNode).getIndex() =
sensitiveLookupStringConst(classification).asCfgNode()
}

View File

@@ -3,6 +3,7 @@ overlay[local]
module;
private import python
private import semmle.python.controlflow.internal.Cfg as Cfg
import DataFlowUtil
import DataFlowPublic
private import DataFlowPrivate
@@ -83,9 +84,9 @@ abstract class AttrWrite extends AttrRef {
* ```python
* object.attr = value
* ```
* Also gives access to the `value` being written, by extending `DefinitionNode`.
* Also gives access to the `value` being written, by extending `Cfg::DefinitionNode`.
*/
private class AttributeAssignmentNode extends DefinitionNode, AttrNode { }
private class AttributeAssignmentNode extends Cfg::DefinitionNode, Cfg::AttrNode { }
/** A simple attribute assignment: `object.attr = value`. */
private class AttributeAssignmentAsAttrWrite extends AttrWrite, CfgNode {
@@ -131,13 +132,13 @@ private class GlobalAttributeAssignmentAsAttrWrite extends AttrWrite, CfgNode {
override string getAttributeName() { result = node.getName() }
}
/** Represents `CallNode`s that may refer to calls to built-in functions or classes. */
private class BuiltInCallNode extends CallNode {
/** Represents `Cfg::CallNode`s that may refer to calls to built-in functions or classes. */
private class BuiltInCallNode extends Cfg::CallNode {
string name;
BuiltInCallNode() {
// TODO disallow instances where the name of the built-in may refer to an in-scope variable of that name.
exists(NameNode id |
exists(Cfg::NameNode id |
name = Builtins::getBuiltinName() and
this.getFunction() = id and
id.getId() = name and
@@ -145,7 +146,7 @@ private class BuiltInCallNode extends CallNode {
)
}
/** Gets the name of the built-in function that is called at this `CallNode` */
/** Gets the name of the built-in function that is called at this `Cfg::CallNode` */
string getBuiltinName() { result = name }
}
@@ -157,20 +158,20 @@ private class BuiltinAttrCallNode extends BuiltInCallNode {
BuiltinAttrCallNode() { name in ["setattr", "getattr", "hasattr", "delattr"] }
/** Gets the control flow node for object on which the attribute is accessed. */
ControlFlowNode getObject() { result in [this.getArg(0), this.getArgByName("object")] }
Cfg::ControlFlowNode getObject() { result in [this.getArg(0), this.getArgByName("object")] }
/**
* Gets the control flow node for the value that is being written to the attribute.
* Only relevant for `setattr` calls.
*/
ControlFlowNode getValue() {
Cfg::ControlFlowNode getValue() {
// only valid for `setattr`
name = "setattr" and
result in [this.getArg(2), this.getArgByName("value")]
}
/** Gets the control flow node that defines the name of the attribute being accessed. */
ControlFlowNode getName() { result in [this.getArg(1), this.getArgByName("name")] }
Cfg::ControlFlowNode getName() { result in [this.getArg(1), this.getArgByName("name")] }
}
/** Represents calls to the built-in `setattr`. */
@@ -205,10 +206,10 @@ private class SetAttrCallAsAttrWrite extends AttrWrite, CfgNode {
* attr = value
* ...
* ```
* Instances of this class correspond to the `NameNode` for `attr`, and also gives access to `value` by
* virtue of being a `DefinitionNode`.
* Instances of this class correspond to the `Cfg::NameNode` for `attr`, and also gives access to `value` by
* virtue of being a `Cfg::DefinitionNode`.
*/
private class ClassAttributeAssignmentNode extends DefinitionNode, NameNode {
private class ClassAttributeAssignmentNode extends Cfg::DefinitionNode, Cfg::NameNode {
ClassAttributeAssignmentNode() { this.getScope() = any(ClassExpr c).getInnerScope() }
}
@@ -248,7 +249,7 @@ abstract class AttrRead extends AttrRef, Node, LocalSourceNode {
/** A simple attribute read, e.g. `object.attr` */
private class AttributeReadAsAttrRead extends AttrRead, CfgNode {
override AttrNode node;
override Cfg::AttrNode node;
AttributeReadAsAttrRead() { node.isLoad() }
@@ -285,7 +286,7 @@ private class GetAttrCallAsAttrRead extends AttrRead, CfgNode {
* is treated as if it is a read of the attribute `module.attr`, even if `module` is not imported directly.
*/
private class ModuleAttributeImportAsAttrRead extends AttrRead, CfgNode {
override ImportMemberNode node;
override Cfg::ImportMemberNode node;
override Node getObject() { result.asCfgNode() = node.getModule(_) }

View File

@@ -3,6 +3,7 @@ overlay[local]
module;
private import python
private import semmle.python.controlflow.internal.Cfg as Cfg
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.internal.ImportStar
@@ -67,7 +68,7 @@ module Builtins {
DataFlow::CfgNode likelyBuiltin(string name) {
exists(Module m |
result.getNode() =
any(NameNode n |
any(Cfg::NameNode n |
possible_builtin_accessed_in_module(n, name, m) and
not possible_builtin_defined_in_module(name, m)
)
@@ -87,7 +88,7 @@ module Builtins {
* Holds if `n` is an access of a global variable called `name` (which is also the name of a
* built-in) inside the module `m`.
*/
private predicate possible_builtin_accessed_in_module(NameNode n, string name, Module m) {
private predicate possible_builtin_accessed_in_module(Cfg::NameNode n, string name, Module m) {
n.isGlobal() and
n.isLoad() and
name = n.getId() and

View File

@@ -25,7 +25,7 @@
* what callable this call might end up targeting.
*
* Specifically this means that we cannot use type-backtrackers from the function of a
* `CallNode`, since there is no `CallNode` to backtrack from for `func` in the example
* `Cfg::CallNode`, since there is no `Cfg::CallNode` to backtrack from for `func` in the example
* above.
*
* Note: This hasn't been 100% realized yet, so we don't currently expose a predicate to
@@ -35,6 +35,7 @@ overlay[local?]
module;
private import python
private import semmle.python.controlflow.internal.Cfg as Cfg
private import DataFlowPublic
private import DataFlowPrivate
private import FlowSummaryImpl as FlowSummaryImpl
@@ -162,7 +163,7 @@ newtype TArgumentPosition =
*/
TLambdaSelfArgumentPosition() or
TPositionalArgumentPosition(int index) {
exists(any(CallNode c).getArg(index))
exists(any(Cfg::CallNode c).getArg(index))
or
// since synthetic calls within a summarized callable could use a unique argument
// position, we need to ensure we make these available (these are specified as
@@ -174,7 +175,7 @@ newtype TArgumentPosition =
index = 0
} or
TKeywordArgumentPosition(string name) {
exists(any(CallNode c).getArgByName(name))
exists(any(Cfg::CallNode c).getArgByName(name))
or
// see comment for TPositionalArgumentPosition
FlowSummaryImpl::ParsePositions::isParsedKeywordParameterPosition(_, name)
@@ -256,9 +257,13 @@ predicate parameterMatch(ParameterPosition ppos, ArgumentPosition apos) {
*/
overlay[local]
predicate isStaticmethod(Function func) {
exists(NameNode id | id.getId() = "staticmethod" and id.isGlobal() |
func.getADecorator() = id.getNode()
)
// The decorator is *syntactically* a Name "staticmethod" — we don't
// care which variable it resolves to. Even if a class redefines
// `staticmethod`, the binding hasn't happened yet at the decorator
// position, so Python's runtime semantics is "use the builtin".
// Matches legacy ESSA semantics which used `isGlobal()` (i.e. "no
// SSA def reaches this load") at the decorator's `NameNode`.
func.getADecorator().(Name).getId() = "staticmethod"
}
/**
@@ -268,9 +273,7 @@ predicate isStaticmethod(Function func) {
*/
overlay[local]
predicate isClassmethod(Function func) {
exists(NameNode id | id.getId() = "classmethod" and id.isGlobal() |
func.getADecorator() = id.getNode()
)
func.getADecorator().(Name).getId() = "classmethod"
or
exists(Class cls |
cls.getAMethod() = func and
@@ -285,9 +288,7 @@ predicate isClassmethod(Function func) {
/** Holds if the function `func` has a `property` decorator. */
overlay[local]
predicate hasPropertyDecorator(Function func) {
exists(NameNode id | id.getId() = "property" and id.isGlobal() |
func.getADecorator() = id.getNode()
)
func.getADecorator().(Name).getId() = "property"
}
/**
@@ -295,10 +296,10 @@ predicate hasPropertyDecorator(Function func) {
*/
overlay[local]
predicate hasContextmanagerDecorator(Function func) {
exists(ControlFlowNode contextmanager |
contextmanager.(NameNode).getId() = "contextmanager" and contextmanager.(NameNode).isGlobal()
exists(Cfg::ControlFlowNode contextmanager |
contextmanager.(Cfg::NameNode).getId() = "contextmanager" and contextmanager.(Cfg::NameNode).isGlobal()
or
contextmanager.(AttrNode).getObject("contextmanager").(NameNode).getId() = "contextlib"
contextmanager.(Cfg::AttrNode).getObject("contextmanager").(Cfg::NameNode).getId() = "contextlib"
|
func.getADecorator() = contextmanager.getNode()
)
@@ -314,10 +315,10 @@ predicate hasContextmanagerDecorator(Function func) {
*/
overlay[local]
private predicate hasOverloadDecorator(Function func) {
exists(ControlFlowNode overload |
overload.(NameNode).getId() = "overload" and overload.(NameNode).isGlobal()
exists(Cfg::ControlFlowNode overload |
overload.(Cfg::NameNode).getId() = "overload" and overload.(Cfg::NameNode).isGlobal()
or
overload.(AttrNode).getObject("overload").(NameNode).isGlobal()
overload.(Cfg::AttrNode).getObject("overload").(Cfg::NameNode).isGlobal()
|
func.getADecorator() = overload.getNode()
)
@@ -536,7 +537,7 @@ class LibraryCallableValue extends DataFlowCallable, TLibraryCallable {
// =============================================================================
/** Gets a call to `type`. */
private CallCfgNode getTypeCall() {
exists(NameNode id | id.getId() = "type" and id.isGlobal() |
exists(Cfg::NameNode id | id.getId() = "type" and id.isGlobal() |
result.getFunction().asCfgNode() = id
)
}
@@ -548,7 +549,7 @@ private CallCfgNode getSuperCall() {
// link below), but otherwise only 2 edgecases. Overall it seems ok to ignore this complexity.
//
// https://github.com/python/cpython/blob/18b1782192f85bd26db89f5bc850f8bee4247c1a/Lib/unittest/mock.py#L48-L50
exists(NameNode id | id.getId() = "super" and id.isGlobal() |
exists(Cfg::NameNode id | id.getId() = "super" and id.isGlobal() |
result.getFunction().asCfgNode() = id
)
}
@@ -1034,7 +1035,7 @@ private module MethodCalls {
*/
pragma[nomagic]
private predicate directCall(
CallNode call, Function target, string functionName, Class cls, AttrRead attr, Node self
Cfg::CallNode call, Function target, string functionName, Class cls, AttrRead attr, Node self
) {
target = findFunctionAccordingToMroKnownStartingClass(cls, functionName) and
directCall_join(call, functionName, cls, attr, self)
@@ -1043,7 +1044,7 @@ private module MethodCalls {
/** Extracted to give good join order */
pragma[nomagic]
private predicate directCall_join(
CallNode call, string functionName, Class cls, AttrRead attr, Node self
Cfg::CallNode call, string functionName, Class cls, AttrRead attr, Node self
) {
call.getFunction() = attrReadTracker(attr).asCfgNode() and
attr.accesses(self, functionName) and
@@ -1060,7 +1061,7 @@ private module MethodCalls {
*/
pragma[nomagic]
private predicate callWithinMethodImplicitSelfOrCls(
CallNode call, Function target, string functionName, Class classWithMethod, AttrRead attr,
Cfg::CallNode call, Function target, string functionName, Class classWithMethod, AttrRead attr,
Node self
) {
target = findFunctionAccordingToMro(getADirectSubclass*(classWithMethod), functionName) and
@@ -1070,7 +1071,7 @@ private module MethodCalls {
/** Extracted to give good join order */
pragma[nomagic]
private predicate callWithinMethodImplicitSelfOrCls_join(
CallNode call, string functionName, Class classWithMethod, AttrRead attr, Node self
Cfg::CallNode call, string functionName, Class classWithMethod, AttrRead attr, Node self
) {
call.getFunction() = attrReadTracker(attr).asCfgNode() and
attr.accesses(self, functionName) and
@@ -1082,7 +1083,7 @@ private module MethodCalls {
* resolve the call to a known target (since the only super class might be the
* builtin `object`, so we never have the implementation of `__new__` in the DB).
*/
predicate fromSuperNewCall(CallNode call, Class classUsedInSuper, AttrRead attr, Node self) {
predicate fromSuperNewCall(Cfg::CallNode call, Class classUsedInSuper, AttrRead attr, Node self) {
fromSuper_join(call, "__new__", classUsedInSuper, attr, self) and
self in [classTracker(_), clsArgumentTracker(_)]
}
@@ -1104,7 +1105,7 @@ private module MethodCalls {
*/
pragma[nomagic]
predicate fromSuper(
CallNode call, Function target, string functionName, Class classUsedInSuper, AttrRead attr,
Cfg::CallNode call, Function target, string functionName, Class classUsedInSuper, AttrRead attr,
Node self
) {
target = findFunctionAccordingToMro(getNextClassInMro(classUsedInSuper), functionName) and
@@ -1114,7 +1115,7 @@ private module MethodCalls {
/** Extracted to give good join order */
pragma[nomagic]
private predicate fromSuper_join(
CallNode call, string functionName, Class classUsedInSuper, AttrRead attr, Node self
Cfg::CallNode call, string functionName, Class classUsedInSuper, AttrRead attr, Node self
) {
call.getFunction() = attrReadTracker(attr).asCfgNode() and
(
@@ -1133,7 +1134,7 @@ private module MethodCalls {
)
}
predicate resolveMethodCall(CallNode call, Function target, CallType type, Node self) {
predicate resolveMethodCall(Cfg::CallNode call, Function target, CallType type, Node self) {
(
directCall(call, target, _, _, _, self)
or
@@ -1180,7 +1181,7 @@ import MethodCalls
* NOTE: We have this predicate mostly to be able to compare with old point-to
* call-graph resolution. So it could be removed in the future.
*/
predicate resolveClassCall(CallNode call, Class cls) {
predicate resolveClassCall(Cfg::CallNode call, Class cls) {
call.getFunction() = classTracker(cls).asCfgNode()
or
// `cls()` inside a classmethod (which also contains `type(self)()` inside a method)
@@ -1210,7 +1211,7 @@ Function invokedFunctionFromClassConstruction(Class cls, string funcName) {
*
* See https://docs.python.org/3/reference/datamodel.html#object.__call__
*/
predicate resolveClassInstanceCall(CallNode call, Function target, Node self) {
predicate resolveClassInstanceCall(Cfg::CallNode call, Function target, Node self) {
exists(Class cls |
call.getFunction() = classInstanceTracker(cls).asCfgNode() and
target = findFunctionAccordingToMroKnownStartingClass(cls, "__call__")
@@ -1229,7 +1230,7 @@ predicate resolveClassInstanceCall(CallNode call, Function target, Node self) {
* Holds if `call` is a call to the `target`, with call-type `type`.
*/
cached
predicate resolveCall(CallNode call, Function target, CallType type) {
predicate resolveCall(Cfg::CallNode call, Function target, CallType type) {
Stages::DataFlow::ref() and
(
type instanceof CallTypePlainFunction and
@@ -1254,11 +1255,11 @@ predicate resolveCall(CallNode call, Function target, CallType type) {
// =============================================================================
/**
* Holds if the argument of `call` at position `apos` is `arg`. This is just a helper
* predicate that maps ArgumentPositions to the arguments of the underlying `CallNode`.
* predicate that maps ArgumentPositions to the arguments of the underlying `Cfg::CallNode`.
*/
overlay[local]
cached
predicate normalCallArg(CallNode call, Node arg, ArgumentPosition apos) {
predicate normalCallArg(Cfg::CallNode call, Node arg, ArgumentPosition apos) {
exists(int index |
apos.isPositional(index) and
arg.asCfgNode() = call.getArg(index)
@@ -1273,7 +1274,7 @@ predicate normalCallArg(CallNode call, Node arg, ArgumentPosition apos) {
exists(int index |
apos.isStarArgs(index) and
arg.asCfgNode() = call.getStarArg() and
// since `CallNode.getArg` doesn't include `*args`, we need to drop to the AST level
// since `Cfg::CallNode.getArg` doesn't include `*args`, we need to drop to the AST level
// to get the index. Notice that we only use the AST for getting the index, so we
// don't need to check for dominance in regards to splitting.
call.getStarArg().getNode() = call.getNode().getPositionalArg(index).(Starred).getValue()
@@ -1347,7 +1348,7 @@ predicate normalCallArg(CallNode call, Node arg, ArgumentPosition apos) {
* translated into `l.clear()`, and we can still have use-use flow.
*/
cached
predicate getCallArg(CallNode call, Function target, CallType type, Node arg, ArgumentPosition apos) {
predicate getCallArg(Cfg::CallNode call, Function target, CallType type, Node arg, ArgumentPosition apos) {
Stages::DataFlow::ref() and
resolveCall(call, target, type) and
(
@@ -1440,10 +1441,10 @@ private predicate sameEnclosingCallable(Node node1, Node node2) {
// DataFlowCall
// =============================================================================
newtype TDataFlowCall =
TNormalCall(CallNode call, Function target, CallType type) { resolveCall(call, target, type) } or
TNormalCall(Cfg::CallNode call, Function target, CallType type) { resolveCall(call, target, type) } or
/** A call to the generated function inside a comprehension */
TComprehensionCall(Comp c) or
TPotentialLibraryCall(CallNode call) or
TPotentialLibraryCall(Cfg::CallNode call) or
/** A synthesized call inside a summarized callable */
TSummaryCall(
FlowSummaryImpl::Public::SummarizedCallable c, FlowSummaryImpl::Private::SummaryNode receiver
@@ -1463,7 +1464,7 @@ abstract class DataFlowCall extends TDataFlowCall {
abstract ArgumentNode getArgument(ArgumentPosition apos);
/** Get the control flow node representing this call, if any. */
abstract ControlFlowNode getNode();
abstract Cfg::ControlFlowNode getNode();
/** Gets the enclosing callable of this call. */
DataFlowCallable getEnclosingCallable() { result = getCallableScope(this.getScope()) }
@@ -1494,28 +1495,28 @@ abstract class ExtractedDataFlowCall extends DataFlowCall {
}
/**
* A resolved call in source code with an underlying `CallNode`.
* A resolved call in source code with an underlying `Cfg::CallNode`.
*
* This is considered normal, compared with special calls such as `obj[0]` calling the
* `__getitem__` method on the object. However, this also includes calls that go to the
* `__call__` special method.
*/
class NormalCall extends ExtractedDataFlowCall, TNormalCall {
CallNode call;
Cfg::CallNode call;
Function target;
CallType type;
NormalCall() { this = TNormalCall(call, target, type) }
override string toString() {
// note: if we used toString directly on the CallNode we would get
// `ControlFlowNode for func()`
// but the `ControlFlowNode` part is just clutter, so we go directly to the AST node
// note: if we used toString directly on the Cfg::CallNode we would get
// `Cfg::ControlFlowNode for func()`
// but the `Cfg::ControlFlowNode` part is just clutter, so we go directly to the AST node
// instead.
result = call.getNode().toString()
}
override ControlFlowNode getNode() { result = call }
override Cfg::ControlFlowNode getNode() { result = call }
override Scope getScope() { result = call.getScope() }
@@ -1543,7 +1544,7 @@ class ComprehensionCall extends ExtractedDataFlowCall, TComprehensionCall {
override string toString() { result = "comprehension call" }
override ControlFlowNode getNode() { result.getNode() = c }
override Cfg::ControlFlowNode getNode() { result.getNode() = c }
override Scope getScope() { result = c.getScope() }
@@ -1566,14 +1567,14 @@ class ComprehensionCall extends ExtractedDataFlowCall, TComprehensionCall {
* in this class.
*/
class PotentialLibraryCall extends ExtractedDataFlowCall, TPotentialLibraryCall {
CallNode call;
Cfg::CallNode call;
PotentialLibraryCall() { this = TPotentialLibraryCall(call) }
override string toString() {
// note: if we used toString directly on the CallNode we would get
// `ControlFlowNode for func()`
// but the `ControlFlowNode` part is just clutter, so we go directly to the AST node
// note: if we used toString directly on the Cfg::CallNode we would get
// `Cfg::ControlFlowNode for func()`
// but the `Cfg::ControlFlowNode` part is just clutter, so we go directly to the AST node
// instead.
result = call.getNode().toString()
}
@@ -1590,10 +1591,10 @@ class PotentialLibraryCall extends ExtractedDataFlowCall, TPotentialLibraryCall
// potential self argument, from `foo.bar()` -- note that this could also just be a
// module reference, but we really don't have a good way of knowing :|
apos.isSelf() and
result.asCfgNode() = call.getFunction().(AttrNode).getObject()
result.asCfgNode() = call.getFunction().(Cfg::AttrNode).getObject()
}
override ControlFlowNode getNode() { result = call }
override Cfg::ControlFlowNode getNode() { result = call }
override Scope getScope() { result = call.getScope() }
}
@@ -1625,7 +1626,7 @@ class SummaryCall extends DataFlowCall, TSummaryCall {
override ArgumentNode getArgument(ArgumentPosition apos) { none() }
override ControlFlowNode getNode() { none() }
override Cfg::ControlFlowNode getNode() { none() }
override string toString() { result = "[summary] call to " + receiver + " in " + c }
@@ -1767,12 +1768,12 @@ private class SummaryPostUpdateNode extends FlowSummaryNode, PostUpdateNodeImpl
* This is used for tracking flow through captured variables.
*/
class SynthCapturedVariablesArgumentNode extends Node, TSynthCapturedVariablesArgumentNode {
ControlFlowNode callable;
Cfg::ControlFlowNode callable;
SynthCapturedVariablesArgumentNode() { this = TSynthCapturedVariablesArgumentNode(callable) }
/** Gets the `CallNode` corresponding to this captured variables argument node. */
CallNode getCallNode() { result.getFunction() = callable }
/** Gets the `Cfg::CallNode` corresponding to this captured variables argument node. */
Cfg::CallNode getCallNode() { result.getFunction() = callable }
/** Gets the `CfgNode` that corresponds to this synthetic node. */
CfgNode getUnderlyingNode() { result.asCfgNode() = callable }
@@ -1790,7 +1791,7 @@ class CapturedVariablesArgumentNodeAsArgumentNode extends ArgumentNode,
{
overlay[global]
override predicate argumentOf(DataFlowCall call, ArgumentPosition pos) {
exists(CallNode callNode | callNode = this.getCallNode() |
exists(Cfg::CallNode callNode | callNode = this.getCallNode() |
callNode = call.getNode() and
exists(Function target | resolveCall(callNode, target, _) |
target = any(VariableCapture::CapturedVariable v).getACapturingScope()
@@ -1804,7 +1805,7 @@ class CapturedVariablesArgumentNodeAsArgumentNode extends ArgumentNode,
class SynthCapturedVariablesArgumentPostUpdateNode extends PostUpdateNodeImpl,
TSynthCapturedVariablesArgumentPostUpdateNode
{
ControlFlowNode callable;
Cfg::ControlFlowNode callable;
SynthCapturedVariablesArgumentPostUpdateNode() {
this = TSynthCapturedVariablesArgumentPostUpdateNode(callable)

View File

@@ -2,8 +2,9 @@ overlay[local?]
module;
private import python
private import semmle.python.controlflow.internal.Cfg as Cfg
private import DataFlowPublic
private import semmle.python.essa.SsaCompute
private import semmle.python.dataflow.new.internal.SsaImpl as SsaImpl
private import semmle.python.dataflow.new.internal.ImportResolution
private import FlowSummaryImpl as FlowSummaryImpl
private import semmle.python.frameworks.data.ModelsAsData
@@ -43,13 +44,23 @@ predicate isArgumentNode(ArgumentNode arg, DataFlowCall c, ArgumentPosition pos)
// Nodes
//--------
overlay[local]
predicate isExpressionNode(ControlFlowNode node) { node.getNode() instanceof Expr }
predicate isExpressionNode(Cfg::ControlFlowNode node) {
node.getNode() instanceof Expr
or
// `Cfg::ForNode` wraps a `For` statement's iter position, but
// overrides `.getNode()` to return the `Py::For` statement (for
// legacy parity). The underlying AST is still an `Expr` (the iter
// expression); we want a dataflow node here so that for-loop
// content reads (`for y in l`) have a source expression node to
// read content from.
node instanceof Cfg::ForNode
}
// =============================================================================
// SyntheticPreUpdateNode
// =============================================================================
class SyntheticPreUpdateNode extends Node, TSyntheticPreUpdateNode {
CallNode node;
Cfg::CallNode node;
SyntheticPreUpdateNode() { this = TSyntheticPreUpdateNode(node) }
@@ -151,7 +162,7 @@ predicate synthStarArgsElementParameterNodeStoreStep(
* been passed in a `**kwargs` argument.
*/
class SynthDictSplatArgumentNode extends Node, TSynthDictSplatArgumentNode {
CallNode node;
Cfg::CallNode node;
SynthDictSplatArgumentNode() { this = TSynthDictSplatArgumentNode(node) }
@@ -165,7 +176,7 @@ class SynthDictSplatArgumentNode extends Node, TSynthDictSplatArgumentNode {
private predicate synthDictSplatArgumentNodeStoreStep(
ArgumentNode nodeFrom, DictionaryElementContent c, SynthDictSplatArgumentNode nodeTo
) {
exists(string name, CallNode call, ArgumentPosition keywordPos |
exists(string name, Cfg::CallNode call, ArgumentPosition keywordPos |
nodeTo = TSynthDictSplatArgumentNode(call) and
getCallArg(call, _, _, nodeFrom, keywordPos) and
keywordPos.isKeyword(name) and
@@ -289,7 +300,7 @@ abstract class PostUpdateNodeImpl extends Node {
* Synthetic post-update nodes for synthetic nodes need to be listed one by one.
*/
class SyntheticPostUpdateNode extends PostUpdateNodeImpl, TSyntheticPostUpdateNode {
ControlFlowNode node;
Cfg::ControlFlowNode node;
SyntheticPostUpdateNode() { this = TSyntheticPostUpdateNode(node) }
@@ -333,16 +344,22 @@ module LocalFlow {
// `x = f(42)`
// nodeFrom is `f(42)`
// nodeTo is `x`
exists(AssignmentDefinition def |
//
// We use the CFG-level `DefinitionNode.getValue()` directly rather
// than going through SSA, because the new SSA library prunes write
// definitions that have no subsequent read in the same scope (e.g.
// a module-level `def f():` whose `f` is only read inside other
// functions). The CFG-level link is unconditional.
exists(Cfg::DefinitionNode def |
nodeFrom.(CfgNode).getNode() = def.getValue() and
nodeTo.(CfgNode).getNode() = def.getDefiningNode()
nodeTo.(CfgNode).getNode() = def
)
or
// With definition
// `with f(42) as x:`
// nodeFrom is `f(42)`
// nodeTo is `x`
exists(With with, ControlFlowNode contextManager, WithDefinition withDef, ControlFlowNode var |
exists(With with, Cfg::ControlFlowNode contextManager, SsaImpl::WithDefinition withDef, Cfg::ControlFlowNode var |
var = withDef.getDefiningNode()
|
nodeFrom.(CfgNode).getNode() = contextManager and
@@ -361,13 +378,13 @@ module LocalFlow {
predicate expressionFlowStep(Node nodeFrom, Node nodeTo) {
// If expressions
nodeFrom.asCfgNode() = nodeTo.asCfgNode().(IfExprNode).getAnOperand()
nodeFrom.asCfgNode() = nodeTo.asCfgNode().(Cfg::IfExprNode).getAnOperand()
or
// Assignment expressions
nodeFrom.asCfgNode() = nodeTo.asCfgNode().(AssignmentExprNode).getValue()
nodeFrom.asCfgNode() = nodeTo.asCfgNode().(Cfg::AssignmentExprNode).getValue()
or
// boolean inline expressions such as `x or y` or `x and y`
nodeFrom.asCfgNode() = nodeTo.asCfgNode().(BoolExprNode).getAnOperand()
nodeFrom.asCfgNode() = nodeTo.asCfgNode().(Cfg::BoolExprNode).getAnOperand()
or
// Flow inside an unpacking assignment
iterableUnpackingFlowStep(nodeFrom, nodeTo)
@@ -376,12 +393,12 @@ module LocalFlow {
matchFlowStep(nodeFrom, nodeTo)
}
predicate useToNextUse(NameNode nodeFrom, NameNode nodeTo) {
AdjacentUses::adjacentUseUse(nodeFrom, nodeTo)
predicate useToNextUse(Cfg::NameNode nodeFrom, Cfg::NameNode nodeTo) {
SsaImpl::AdjacentUses::adjacentUseUse(nodeFrom, nodeTo)
}
predicate defToFirstUse(EssaVariable var, NameNode nodeTo) {
AdjacentUses::firstUse(var.getDefinition(), nodeTo)
predicate defToFirstUse(SsaImpl::EssaVariable var, Cfg::NameNode nodeTo) {
SsaImpl::AdjacentUses::firstUse(var.getDefinition(), nodeTo)
}
predicate useUseFlowStep(Node nodeFrom, Node nodeTo) {
@@ -390,12 +407,12 @@ module LocalFlow {
// `x = f(y)`
// nodeFrom is `y` on first line
// nodeTo is `y` on second line
exists(EssaDefinition def |
nodeFrom.(CfgNode).getNode() = def.(EssaNodeDefinition).getDefiningNode()
exists(SsaImpl::EssaDefinition def |
nodeFrom.(CfgNode).getNode() = def.(SsaImpl::EssaNodeDefinition).getDefiningNode()
or
nodeFrom.(ScopeEntryDefinitionNode).getDefinition() = def
|
AdjacentUses::firstUse(def, nodeTo.(CfgNode).getNode())
SsaImpl::AdjacentUses::firstUse(def, nodeTo.(CfgNode).getNode())
)
or
// Next use after use
@@ -557,9 +574,9 @@ predicate runtimeJumpStep(Node nodeFrom, Node nodeTo) {
// a parameter with a default value, since the parameter will be in the scope of the
// function, while the default value itself will be in the scope that _defines_ the
// function.
exists(ParameterDefinition param |
exists(SsaImpl::ParameterDefinition param |
// note: we go to the _control-flow node_ of the parameter, and not the ESSA node of the parameter, since for type-tracking, the ESSA node is not a LocalSourceNode, so we would get in trouble.
nodeFrom.asCfgNode() = param.getDefault() and
nodeFrom.asCfgNode().getNode() = param.getParameter().(Parameter).getDefault() and
nodeTo.asCfgNode() = param.getDefiningNode()
)
or
@@ -663,7 +680,7 @@ predicate neverSkipInPathGraph(Node n) {
// ```
// we would end up saying that the path MUST not skip the x in `y = x`, which is just
// annoying and doesn't help the path explanation become clearer.
n.asCfgNode() = any(EssaNodeDefinition def).getDefiningNode()
n.asCfgNode() = any(SsaImpl::EssaNodeDefinition def).getDefiningNode()
}
/**
@@ -872,7 +889,7 @@ predicate listStoreStep(CfgNode nodeFrom, ListElementContent c, CfgNode nodeTo)
// nodeFrom is `42`, cfg node
// nodeTo is the list, `[..., 42, ...]`, cfg node
// c denotes element of list
nodeTo.getNode().(ListNode).getAnElement() = nodeFrom.getNode() and
nodeTo.getNode().(Cfg::ListNode).getAnElement() = nodeFrom.getNode() and
not nodeTo.getNode() instanceof UnpackingAssignmentSequenceTarget and
// Suppress unused variable warning
c = c
@@ -885,7 +902,7 @@ predicate setStoreStep(CfgNode nodeFrom, SetElementContent c, CfgNode nodeTo) {
// nodeFrom is `42`, cfg node
// nodeTo is the set, `{..., 42, ...}`, cfg node
// c denotes element of list
nodeTo.getNode().(SetNode).getAnElement() = nodeFrom.getNode() and
nodeTo.getNode().(Cfg::SetNode).getAnElement() = nodeFrom.getNode() and
// Suppress unused variable warning
c = c
}
@@ -898,7 +915,7 @@ predicate tupleStoreStep(CfgNode nodeFrom, TupleElementContent c, CfgNode nodeTo
// nodeTo is the tuple, `(..., 42, ...)`, cfg node
// c denotes element of tuple and index of nodeFrom
exists(int n |
nodeTo.getNode().(TupleNode).getElement(n) = nodeFrom.getNode() and
nodeTo.getNode().(Cfg::TupleNode).getElement(n) = nodeFrom.getNode() and
not nodeTo.getNode() instanceof UnpackingAssignmentSequenceTarget and
c.getIndex() = n
)
@@ -912,7 +929,7 @@ predicate dictStoreStep(CfgNode nodeFrom, DictionaryElementContent c, Node nodeT
// nodeTo is the dict, `{..., "key" = 42, ...}`, cfg node
// c denotes element of dictionary and the key `"key"`
exists(KeyValuePair item |
item = nodeTo.asCfgNode().(DictNode).getNode().(Dict).getAnItem() and
item = nodeTo.asCfgNode().(Cfg::DictNode).getNode().(Dict).getAnItem() and
nodeFrom.getNode().getNode() = item.getValue() and
c.getKey() = item.getKey().(StringLiteral).getS()
)
@@ -927,9 +944,9 @@ predicate dictStoreStep(CfgNode nodeFrom, DictionaryElementContent c, Node nodeT
private predicate moreDictStoreSteps(CfgNode nodeFrom, DictionaryElementContent c, Node nodeTo) {
// NOTE: It's important to add logic to the newtype definition of
// DictionaryElementContent if you add new cases here.
exists(SubscriptNode subscript |
exists(Cfg::SubscriptNode subscript |
nodeTo.(PostUpdateNode).getPreUpdateNode().asCfgNode() = subscript.getObject() and
nodeFrom.asCfgNode() = subscript.(DefinitionNode).getValue() and
nodeFrom.asCfgNode() = subscript.(Cfg::DefinitionNode).getValue() and
c.getKey() = subscript.getIndex().getNode().(StringLiteral).getText()
)
or
@@ -942,8 +959,8 @@ private predicate moreDictStoreSteps(CfgNode nodeFrom, DictionaryElementContent
}
predicate dictClearStep(Node node, DictionaryElementContent c) {
exists(SubscriptNode subscript |
subscript instanceof DefinitionNode and
exists(Cfg::SubscriptNode subscript |
subscript instanceof Cfg::DefinitionNode and
node.asCfgNode() = subscript.getObject() and
c.getKey() = subscript.getIndex().getNode().(StringLiteral).getText()
)
@@ -1018,7 +1035,7 @@ predicate subscriptReadStep(CfgNode nodeFrom, Content c, CfgNode nodeTo) {
// nodeFrom is `l`, cfg node
// nodeTo is `l[3]`, cfg node
// c is compatible with 3
nodeFrom.getNode() = nodeTo.getNode().(SubscriptNode).getObject() and
nodeFrom.getNode() = nodeTo.getNode().(Cfg::SubscriptNode).getObject() and
(
c instanceof ListElementContent
or
@@ -1027,10 +1044,10 @@ predicate subscriptReadStep(CfgNode nodeFrom, Content c, CfgNode nodeTo) {
c instanceof DictionaryElementAnyContent
or
c.(TupleElementContent).getIndex() =
nodeTo.getNode().(SubscriptNode).getIndex().getNode().(IntegerLiteral).getValue()
nodeTo.getNode().(Cfg::SubscriptNode).getIndex().getNode().(IntegerLiteral).getValue()
or
c.(DictionaryElementContent).getKey() =
nodeTo.getNode().(SubscriptNode).getIndex().getNode().(StringLiteral).getS()
nodeTo.getNode().(Cfg::SubscriptNode).getIndex().getNode().(StringLiteral).getS()
)
}

View File

@@ -5,11 +5,12 @@ overlay[local]
module;
private import python
private import semmle.python.controlflow.internal.Cfg as Cfg
private import DataFlowPrivate
import semmle.python.dataflow.new.TypeTracking
import Attributes
import LocalSources
private import semmle.python.essa.SsaCompute
private import semmle.python.dataflow.new.internal.SsaImpl as SsaImpl
private import semmle.python.dataflow.new.internal.ImportStar
private import semmle.python.frameworks.data.ModelsAsData
private import FlowSummaryImpl as FlowSummaryImpl
@@ -27,7 +28,7 @@ private import semmle.python.frameworks.data.ModelsAsData
overlay[local]
newtype TNode =
/** A node corresponding to a control flow node. */
TCfgNode(ControlFlowNode node) {
TCfgNode(Cfg::ControlFlowNode node) {
isExpressionNode(node)
or
node.getNode() instanceof Pattern
@@ -36,7 +37,7 @@ newtype TNode =
* A node corresponding to a scope entry definition. That is, the value of a variable
* as it enters a scope.
*/
TScopeEntryDefinitionNode(ScopeEntryDefinition def) { not def.getScope() instanceof Module } or
TScopeEntryDefinitionNode(SsaImpl::ScopeEntryDefinition def) { not def.getScope() instanceof Module } or
/**
* A synthetic node representing the value of an object before a state change.
*
@@ -47,13 +48,13 @@ newtype TNode =
// NOTE: since we can't rely on the call graph, but we want to have synthetic
// pre-update nodes for class calls, we end up getting synthetic pre-update nodes for
// ALL calls :|
TSyntheticPreUpdateNode(CallNode call) or
TSyntheticPreUpdateNode(Cfg::CallNode call) or
/**
* A synthetic node representing the value of an object after a state change.
* See QLDoc for `PostUpdateNode`.
*/
TSyntheticPostUpdateNode(ControlFlowNode node) {
exists(CallNode call |
TSyntheticPostUpdateNode(Cfg::ControlFlowNode node) {
exists(Cfg::CallNode call |
node = call.getArg(_)
or
node = call.getArgByName(_)
@@ -62,12 +63,12 @@ newtype TNode =
node = call.getFunction()
)
or
node = any(AttrNode a).getObject()
node = any(Cfg::AttrNode a).getObject()
or
node = any(SubscriptNode s).getObject()
node = any(Cfg::SubscriptNode s).getObject()
or
// self parameter when used implicitly in `super()`
exists(Class cls, Function func, ParameterDefinition def |
exists(Class cls, Function func, SsaImpl::ParameterDefinition def |
func = cls.getAMethod() and
not isStaticmethod(func) and
// this matches what we do in ExtractedParameterNode
@@ -112,7 +113,7 @@ newtype TNode =
exists(ParameterPosition ppos | ppos.isStarArgs(_) | exists(callable.getParameter(ppos)))
} or
/** A synthetic node to capture keyword arguments that are passed to a `**kwargs` parameter. */
TSynthDictSplatArgumentNode(CallNode call) { exists(call.getArgByName(_)) } or
TSynthDictSplatArgumentNode(Cfg::CallNode call) { exists(call.getArgByName(_)) } or
/** A synthetic node to allow flow to keyword parameters from a `**kwargs` argument. */
TSynthDictSplatParameterNode(DataFlowCallable callable) {
exists(ParameterPosition ppos | ppos.isKeyword(_) | exists(callable.getParameter(ppos)))
@@ -128,15 +129,15 @@ newtype TNode =
* A synthetic node representing the values of the variables captured
* by the callable being called.
*/
TSynthCapturedVariablesArgumentNode(ControlFlowNode callable) {
callable = any(CallNode c).getFunction()
TSynthCapturedVariablesArgumentNode(Cfg::ControlFlowNode callable) {
callable = any(Cfg::CallNode c).getFunction()
} or
/**
* A synthetic node representing the values of the variables captured
* by the callable being called, after the output has been computed.
*/
TSynthCapturedVariablesArgumentPostUpdateNode(ControlFlowNode callable) {
callable = any(CallNode c).getFunction()
TSynthCapturedVariablesArgumentPostUpdateNode(Cfg::ControlFlowNode callable) {
callable = any(Cfg::CallNode c).getFunction()
} or
/** A synthetic node representing the values of variables captured by a comprehension. */
TSynthCompCapturedVariablesArgumentNode(Comp comp) {
@@ -194,7 +195,7 @@ class Node extends TNode {
}
/** Gets the control-flow node corresponding to this node, if any. */
ControlFlowNode asCfgNode() { none() }
Cfg::ControlFlowNode asCfgNode() { none() }
/** Gets the expression corresponding to this node, if any. */
Expr asExpr() { none() }
@@ -207,14 +208,14 @@ class Node extends TNode {
/** A data-flow node corresponding to a control-flow node. */
class CfgNode extends Node, TCfgNode {
ControlFlowNode node;
Cfg::ControlFlowNode node;
CfgNode() { this = TCfgNode(node) }
/** Gets the `ControlFlowNode` represented by this data-flow node. */
ControlFlowNode getNode() { result = node }
/** Gets the `Cfg::ControlFlowNode` represented by this data-flow node. */
Cfg::ControlFlowNode getNode() { result = node }
override ControlFlowNode asCfgNode() { result = node }
override Cfg::ControlFlowNode asCfgNode() { result = node }
/** Gets a textual representation of this element. */
override string toString() { result = node.toString() }
@@ -224,9 +225,9 @@ class CfgNode extends Node, TCfgNode {
override Location getLocation() { result = node.getLocation() }
}
/** A data-flow node corresponding to a `CallNode` in the control-flow graph. */
/** A data-flow node corresponding to a `Cfg::CallNode` in the control-flow graph. */
class CallCfgNode extends CfgNode, LocalSourceNode {
override CallNode node;
override Cfg::CallNode node;
/**
* Gets the data-flow node for the function component of the call corresponding to this data-flow
@@ -307,15 +308,15 @@ ExprNode exprNode(DataFlowExpr e) { result.getNode().getNode() = e }
* as it enters a scope.
*/
class ScopeEntryDefinitionNode extends Node, TScopeEntryDefinitionNode {
ScopeEntryDefinition def;
SsaImpl::ScopeEntryDefinition def;
ScopeEntryDefinitionNode() { this = TScopeEntryDefinitionNode(def) }
/** Gets the `ScopeEntryDefinition` associated with this node. */
ScopeEntryDefinition getDefinition() { result = def }
/** Gets the `SsaImpl::ScopeEntryDefinition` associated with this node. */
SsaImpl::ScopeEntryDefinition getDefinition() { result = def }
/** Gets the source variable represented by this node. */
SsaSourceVariable getVariable() { result = def.getSourceVariable() }
SsaImpl::SsaSourceVariable getVariable() { result = def.getSourceVariable() }
override Location getLocation() { result = def.getLocation() }
@@ -337,7 +338,7 @@ class ParameterNode extends Node instanceof ParameterNodeImpl {
/** A parameter node found in the source code (not in a summary). */
class ExtractedParameterNode extends ParameterNodeImpl, CfgNode {
//, LocalSourceNode {
ParameterDefinition def;
SsaImpl::ParameterDefinition def;
ExtractedParameterNode() { node = def.getDefiningNode() }
@@ -368,10 +369,10 @@ Node getCallArgApproximation() {
exists(Class c | result.asExpr() = c.getAMethod().getArg(0))
or
// the object part of an attribute expression (which might be a bound method)
result.asCfgNode() = any(AttrNode a).getObject()
result.asCfgNode() = any(Cfg::AttrNode a).getObject()
or
// the function part of any call
result.asCfgNode() = any(CallNode c).getFunction()
result.asCfgNode() = any(Cfg::CallNode c).getFunction()
}
/** Gets the extracted argument nodes that do not rely on `getCallArg`. */
@@ -380,7 +381,7 @@ private Node implicitArgumentNode() {
normalCallArg(_, result, _)
or
// and self arguments
result.asCfgNode() = any(CallNode c).getFunction().(AttrNode).getObject()
result.asCfgNode() = any(Cfg::CallNode c).getFunction().(Cfg::AttrNode).getObject()
or
// for comprehensions, we allow the synthetic `iterable` argument
result.asExpr() = any(Comp c).getIterable()
@@ -489,17 +490,20 @@ class ModuleVariableNode extends Node, TModuleVariableNode {
not result.getScope() = mod
}
/** Gets an `EssaNode` that corresponds to an assignment of this global variable. */
/** Gets a CFG node that corresponds to an assignment of this global variable. */
Node getAWrite() {
any(EssaNodeDefinition def).definedBy(var, result.asCfgNode().(DefinitionNode))
exists(Cfg::NameNode n |
n.defines(var) and
result.asCfgNode() = n
)
}
/** Gets the possible values of the variable at the end of import time */
CfgNode getADefiningWrite() {
exists(SsaVariable def |
def = any(SsaVariable ssa_var).getAnUltimateDefinition() and
def.getDefinition() = result.asCfgNode() and
def.getVariable() = var
exists(SsaImpl::EssaVariable def |
def = any(SsaImpl::EssaVariable ssa_var).getAnUltimateDefinition() and
def.getDefinition().(SsaImpl::EssaNodeDefinition).getDefiningNode() = result.asCfgNode() and
def.getSourceVariable().getVariable() = var
)
}
@@ -516,7 +520,7 @@ private ModuleVariableNode import_star_read(Node n) {
overlay[global]
pragma[nomagic]
private predicate resolved_import_star_module(Module m, string name, Node n) {
exists(NameNode nn | nn = n.asCfgNode() |
exists(Cfg::NameNode nn | nn = n.asCfgNode() |
ImportStar::importStarResolvesTo(pragma[only_bind_into](nn), m) and
nn.getId() = name
)
@@ -574,88 +578,106 @@ class StarPatternElementNode extends Node, TStarPatternElementNode {
}
/**
* Gets a node that controls whether other nodes are evaluated.
* A node that participates in a conditional split: a CFG node whose
* evaluation outcome (true/false) is used to choose between two
* successor basic blocks. In the new shared CFG, such nodes appear in
* pairs of `isAfterTrue`/`isAfterFalse` annotated CFG nodes.
*
* In the base case, this is the last node of `conditionBlock`, and `flipped` is `false`.
* This definition accounts for (short circuting) `and`- and `or`-expressions, as the structure
* of basic blocks will reflect their semantics.
* Users typically obtain a `GuardNode` by casting from a more specific
* Cfg type: `g.(Cfg::CallNode)` for a call-based check, etc.
*
* However, in the program
* ```python
* if not is_safe(path):
* return
* ```
* the last node in the `ConditionBlock` is `not is_safe(path)`.
*
* We would like to consider also `is_safe(path)` a guard node, albeit with `flipped` being `true`.
* Thus we recurse through `not`-expressions.
* This replaces the legacy (pre-shared-CFG) `GuardNode`/`flipped`
* machinery: the shared CFG carries outcome information structurally
* (via `isAfterTrue`/`isAfterFalse`), so no separate polarity field
* is required.
*/
ControlFlowNode guardNode(ConditionBlock conditionBlock, boolean flipped) {
// Base case: the last node truly does determine which successor is chosen
result = conditionBlock.getLastNode() and
flipped = false
or
// Recursive cases:
// if a guard node is a `not`-expression,
// the operand is also a guard node, but with inverted polarity.
exists(UnaryExprNode notNode |
result = notNode.getOperand() and
notNode.getNode().getOp() instanceof Not
|
notNode = guardNode(conditionBlock, flipped.booleanNot())
)
or
// if a guard node is compared to a boolean literal,
// the other operand is also a guard node,
// but with polarity depending on the literal (and on the comparison).
exists(CompareNode cmpNode, Cmpop op, ControlFlowNode b, boolean should_flip |
(
cmpNode.operands(result, op, b) or
cmpNode.operands(b, op, result)
) and
not result.getNode() instanceof BooleanLiteral and
(
// comparing to the boolean
(op instanceof Eq or op instanceof Is) and
// we should flip if the value compared against, here the value of `b`, is false
should_flip = b.getNode().(BooleanLiteral).booleanValue().booleanNot()
or
// comparing to the negation of the boolean
(op instanceof NotEq or op instanceof IsNot) and
// again, we should flip if the value compared against, here the value of `not b`, is false.
// That is, if the value of `b` is true.
should_flip = b.getNode().(BooleanLiteral).booleanValue()
class GuardNode extends Cfg::ControlFlowNode {
GuardNode() {
// This is the canonical (post-order) version of an AST node, and
// some `[true]`/`[false]` variant of the same AST exists. We
// include the canonical node because users identify guards by
// their AST (`g.(Cfg::CallNode)` etc.), and the outcome-tagged
// variants are accessed by `outcomeOfGuard` below.
exists(Cfg::ControlFlowNode outcome |
outcome.getNode() = this.getNode() and
(outcome.isAfterTrue(_) or outcome.isAfterFalse(_))
)
|
// we flip `flipped` according to `should_flip` via the formula `flipped xor should_flip`.
flipped in [true, false] and
cmpNode = guardNode(conditionBlock, flipped.booleanXor(should_flip))
)
or
// Or: this IS one of the outcome-tagged variants, supporting
// users who want to query the split point directly.
this.isAfterTrue(_)
or
this.isAfterFalse(_)
}
/** Holds if this guard controls block `b` upon evaluating to `branch`. */
predicate controlsBlock(Cfg::BasicBlock b, boolean branch) {
branch in [true, false] and
exists(Cfg::ControlFlowNode outcomeNode |
outcomeOfGuard(this, outcomeNode, branch) and
outcomeNode.getBasicBlock().dominates(b)
)
}
}
/**
* A node that controls whether other nodes are evaluated.
* Holds if some execution that arrives at `outcomeNode` corresponds
* to `guard` having evaluated to `branch`.
*
* The field `flipped` allows us to match `GuardNode`s underneath
* `not`-expressions and still choose the appropriate branch.
* For a direct guard `if g:`, the outcome node is `g` itself with
* `isAfterTrue`/`isAfterFalse`. For wrapped guards like `not g` or
* `g == True`, the outcome is on the wrapping expression with an
* appropriate polarity transform — we follow those wrappers up the
* AST to find the outermost expression that carries an actual
* `isAfterTrue`/`isAfterFalse` outcome.
*/
class GuardNode extends ControlFlowNode {
ConditionBlock conditionBlock;
boolean flipped;
GuardNode() { this = guardNode(conditionBlock, flipped) }
/** Holds if this guard controls block `b` upon evaluating to `branch`. */
predicate controlsBlock(BasicBlock b, boolean branch) {
branch in [true, false] and
conditionBlock.controls(b, branch.booleanXor(flipped))
}
private predicate outcomeOfGuard(
Cfg::ControlFlowNode guard, Cfg::ControlFlowNode outcomeNode, boolean branch
) {
// Base case: the guard itself splits — the outcome node is the
// first node of an outcome BB, with matching outcome label.
// (The shared CFG also marks inner expressions with outcome flags
// for analysis purposes, but only "splitting" nodes — those that
// actually start an outcome BB — are valid guards on their own.)
outcomeNode.getNode() = guard.getNode() and
outcomeNode = outcomeNode.getBasicBlock().firstNode() and
(
outcomeNode.isAfterTrue(_) and branch = true
or
outcomeNode.isAfterFalse(_) and branch = false
)
or
// Recursive: `not guard` — same outcome split as `guard`, flipped.
exists(Cfg::UnaryExprNode notNode, boolean notBranch |
notNode.getOperand().getNode() = guard.getNode() and
notNode.getNode().getOp() instanceof Not and
outcomeOfGuard(notNode, outcomeNode, notBranch) and
branch = notBranch.booleanNot()
)
or
// Recursive: comparisons against a boolean literal.
exists(Cfg::CompareNode cmpNode, Cmpop op, Cfg::ControlFlowNode otherOperand,
Cfg::ControlFlowNode guardOperand, boolean polarity, boolean cmpBranch
|
guardOperand.getNode() = guard.getNode() and
(cmpNode.operands(guardOperand, op, otherOperand) or cmpNode.operands(otherOperand, op, guardOperand)) and
not guard.getNode() instanceof BooleanLiteral and
(
(op instanceof Eq or op instanceof Is) and
polarity = otherOperand.getNode().(BooleanLiteral).booleanValue()
or
(op instanceof NotEq or op instanceof IsNot) and
polarity = otherOperand.getNode().(BooleanLiteral).booleanValue().booleanNot()
) and
outcomeOfGuard(cmpNode, outcomeNode, cmpBranch) and
branch = cmpBranch.booleanXor(polarity.booleanNot())
)
}
/**
* Holds if the guard `g` validates `node` upon evaluating to `branch`.
*/
signature predicate guardChecksSig(GuardNode g, ControlFlowNode node, boolean branch);
signature predicate guardChecksSig(GuardNode g, Cfg::ControlFlowNode node, boolean branch);
/**
* Provides a set of barrier nodes for a guard that validates a node.
@@ -670,7 +692,7 @@ module BarrierGuard<guardChecksSig/3 guardChecks> {
result = ParameterizedBarrierGuard<Unit, extendedGuardChecks/4>::getABarrierNode(_)
}
private predicate extendedGuardChecks(GuardNode g, ControlFlowNode node, boolean branch, Unit u) {
private predicate extendedGuardChecks(GuardNode g, Cfg::ControlFlowNode node, boolean branch, Unit u) {
guardChecks(g, node, branch) and
u = u
}
@@ -680,7 +702,7 @@ bindingset[this]
private signature class ParamSig;
private module WithParam<ParamSig P> {
signature predicate guardChecksSig(GuardNode g, ControlFlowNode node, boolean branch, P param);
signature predicate guardChecksSig(GuardNode g, Cfg::ControlFlowNode node, boolean branch, P param);
}
/**
@@ -693,10 +715,10 @@ module ParameterizedBarrierGuard<ParamSig P, WithParam<P>::guardChecksSig/4 guar
/** Gets a node that is safely guarded by the given guard check with parameter `param`. */
overlay[global]
ExprNode getABarrierNode(P param) {
exists(GuardNode g, EssaDefinition def, ControlFlowNode node, boolean branch |
AdjacentUses::useOfDef(def, node) and
exists(GuardNode g, SsaImpl::EssaDefinition def, Cfg::ControlFlowNode node, boolean branch |
SsaImpl::AdjacentUses::useOfDef(def, node) and
guardChecks(g, node, branch, param) and
AdjacentUses::useOfDef(def, result.asCfgNode()) and
SsaImpl::AdjacentUses::useOfDef(def, result.asCfgNode()) and
g.controlsBlock(result.asCfgNode().getBasicBlock(), branch)
)
}
@@ -712,7 +734,7 @@ module ExternalBarrierGuard {
private import semmle.python.ApiGraphs
overlay[global]
private predicate guardCheck(GuardNode g, ControlFlowNode node, boolean branch, string kind) {
private predicate guardCheck(GuardNode g, Cfg::ControlFlowNode node, boolean branch, string kind) {
exists(API::CallNode call, API::Node parameter |
parameter = call.getAParameter() and
parameter = ModelOutput::getABarrierGuardNode(kind, branch)
@@ -748,10 +770,10 @@ newtype TContent =
TSetElementContent() or
/** An element of a tuple at a specific index. */
TTupleElementContent(int index) {
exists(any(TupleNode tn).getElement(index))
exists(any(Cfg::TupleNode tn).getElement(index))
or
// Arguments can overflow and end up in the starred parameter tuple.
exists(any(CallNode cn).getArg(index))
exists(any(Cfg::CallNode cn).getArg(index))
or
// since flow summaries might use tuples, we ensure that we at least have valid
// TTupleElementContent for the 0..7 (7 was picked to match `small_tuple` in
@@ -768,10 +790,10 @@ newtype TContent =
or
// d["key"] = ...
key =
any(SubscriptNode sub | sub.isStore() | sub.getIndex().getNode().(StringLiteral).getText())
any(Cfg::SubscriptNode sub | sub.isStore() | sub.getIndex().getNode().(StringLiteral).getText())
or
// d.setdefault("key", ...)
exists(CallNode call | call.getFunction().(AttrNode).getName() = "setdefault" |
exists(Cfg::CallNode call | call.getFunction().(Cfg::AttrNode).getName() = "setdefault" |
key = call.getArg(0).getNode().(StringLiteral).getText()
)
} or

View File

@@ -5,6 +5,8 @@
*/
private import python
private import semmle.python.controlflow.internal.Cfg as Cfg
private import semmle.python.dataflow.new.internal.SsaImpl as SsaImpl
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.internal.ImportStar
private import semmle.python.dataflow.new.TypeTracking
@@ -69,13 +71,11 @@ module ImportResolution {
* Holds if there is an ESSA step from `defFrom` to `defTo`, which should be allowed
* for import resolution.
*/
private predicate allowedEssaImportStep(EssaDefinition defFrom, EssaDefinition defTo) {
private predicate allowedEssaImportStep(SsaImpl::EssaDefinition defFrom, SsaImpl::EssaDefinition defTo) {
// to handle definitions guarded by if-then-else
defFrom = defTo.(PhiFunction).getAnInput()
or
// refined variable
// example: https://github.com/nvbn/thefuck/blob/ceeaeab94b5df5a4fe9d94d61e4f6b0bbea96378/thefuck/utils.py#L25-L45
defFrom = defTo.(EssaNodeRefinement).getInput().getDefinition()
defFrom = defTo.(SsaImpl::PhiFunction).getAnInput()
// Note: legacy ESSA refinement-step (e.g. for `foo.bar = X`) is
// not modelled in the new SSA. We rely on phi steps only.
}
/**
@@ -92,30 +92,32 @@ module ImportResolution {
// Definitions made inside `m` itself
//
// for code such as `foo = ...; foo.bar = ...` there will be TWO
// EssaDefinition/EssaVariable. One for `foo = ...` (AssignmentDefinition) and one
// SsaImpl::EssaDefinition/SsaImpl::EssaVariable. One for `foo = ...` (SsaImpl::AssignmentDefinition) and one
// for `foo.bar = ...`. The one for `foo.bar = ...` (EssaNodeRefinement). The
// EssaNodeRefinement is the one that will reach the end of the module (normal
// exit).
//
// However, we cannot just use the EssaNodeRefinement as the `val`, because the
// normal data-flow depends on use-use flow, and use-use flow targets CFG nodes not
// EssaNodes. So we need to go back from the EssaDefinition/EssaVariable that
// EssaNodes. So we need to go back from the SsaImpl::EssaDefinition/SsaImpl::EssaVariable that
// reaches the end of the module, to the first definition of the variable, and then
// track forwards using use-use flow to find a suitable CFG node that has flow into
// it from use-use flow.
exists(EssaVariable lastUseVar, EssaVariable firstDef |
exists(SsaImpl::EssaVariable lastUseVar, SsaImpl::EssaVariable firstDef |
lastUseVar.getName() = name and
// we ignore special variable $ introduced by our analysis (not used for anything)
// we ignore special variable * introduced by `from <pkg> import *` -- TODO: understand why we even have this?
not name in ["$", "*"] and
lastUseVar.getAUse() = m.getANormalExit() and
exists(Cfg::ControlFlowNode exit |
exit.isNormalExit() and exit.getScope() = m and lastUseVar.getAUse() = exit
) and
allowedEssaImportStep*(firstDef, lastUseVar) and
not allowedEssaImportStep(_, firstDef)
|
not LocalFlow::defToFirstUse(firstDef, _) and
val.asCfgNode() = firstDef.getDefinition().(EssaNodeDefinition).getDefiningNode()
val.asCfgNode() = firstDef.getDefinition().(SsaImpl::EssaNodeDefinition).getDefiningNode()
or
exists(ControlFlowNode mid, ControlFlowNode end |
exists(Cfg::ControlFlowNode mid, Cfg::ControlFlowNode end |
LocalFlow::defToFirstUse(firstDef, mid) and
LocalFlow::useToNextUse*(mid, end) and
not LocalFlow::useToNextUse(end, _) and
@@ -143,9 +145,9 @@ module ImportResolution {
* handles simple cases where we can statically tell that this is the case.
*/
private predicate all_mentions_name(Module m, string name) {
exists(DefinitionNode def, SequenceNode n |
exists(Cfg::DefinitionNode def, Cfg::SequenceNode n |
def.getValue() = n and
def.(NameNode).getId() = "__all__" and
def.(Cfg::NameNode).getId() = "__all__" and
def.getScope() = m and
any(StringLiteral s | s.getText() = name) = n.getAnElement().getNode()
)
@@ -158,18 +160,18 @@ module ImportResolution {
*/
private predicate no_or_complicated_all(Module m) {
// No mention of `__all__` in the module
not exists(DefinitionNode def | def.getScope() = m and def.(NameNode).getId() = "__all__")
not exists(Cfg::DefinitionNode def | def.getScope() = m and def.(Cfg::NameNode).getId() = "__all__")
or
// `__all__` is set to a non-sequence value
exists(DefinitionNode def |
def.(NameNode).getId() = "__all__" and
exists(Cfg::DefinitionNode def |
def.(Cfg::NameNode).getId() = "__all__" and
def.getScope() = m and
not def.getValue() instanceof SequenceNode
not def.getValue() instanceof Cfg::SequenceNode
)
or
// `__all__` is used in some way that doesn't involve storing a value in it. This usually means
// it is being mutated through `append` or `extend`, which we don't handle.
exists(NameNode n | n.getId() = "__all__" and n.getScope() = m and n.isLoad())
exists(Cfg::NameNode n | n.getId() = "__all__" and n.getScope() = m and n.isLoad())
}
private predicate potential_module_export(Module m, string name) {
@@ -177,7 +179,7 @@ module ImportResolution {
or
no_or_complicated_all(m) and
(
exists(NameNode n | n.getId() = name and n.getScope() = m and name.charAt(0) != "_")
exists(Cfg::NameNode n | n.getId() = name and n.getScope() = m and name.charAt(0) != "_")
or
exists(Alias a | a.getAsname().(Name).getId() = name and a.getValue().getScope() = m)
)
@@ -207,12 +209,12 @@ module ImportResolution {
/** Gets a module that may have been added to `sys.modules`. */
private Module sys_modules_module_with_name(string name) {
exists(ControlFlowNode n, DataFlow::Node mod |
exists(SubscriptNode sub |
exists(Cfg::ControlFlowNode n, DataFlow::Node mod |
exists(Cfg::SubscriptNode sub |
sub.getObject() = sys_modules_reference().asCfgNode() and
sub.getIndex() = n and
n.getNode().(StringLiteral).getText() = name and
sub.(DefinitionNode).getValue() = mod.asCfgNode() and
sub.(Cfg::DefinitionNode).getValue() = mod.asCfgNode() and
mod = getModuleReference(result)
)
)
@@ -324,11 +326,11 @@ module ImportResolution {
// name as a submodule, we always consider that this attribute _could_ be a
// reference to the submodule, even if we don't know that the submodule has been
// imported yet.
exists(string submodule, Module package, EssaVariable var |
exists(string submodule, Module package, SsaImpl::EssaVariable var |
submodule = var.getName() and
SsaSource::init_module_submodule_defn(var.getSourceVariable(), package.getEntryNode()) and
SsaSource::init_module_submodule_defn(var.getSourceVariable().getVariable(), package.getEntryNode()) and
m = getModuleFromName(package.getPackageName() + "." + submodule) and
result.asCfgNode() = var.getDefinition().(EssaNodeDefinition).getDefiningNode()
result.asCfgNode() = var.getDefinition().(SsaImpl::EssaNodeDefinition).getDefiningNode()
)
}

View File

@@ -3,6 +3,7 @@ overlay[local]
module;
private import python
private import semmle.python.controlflow.internal.Cfg as Cfg
private import semmle.python.dataflow.new.internal.Builtins
private import semmle.python.dataflow.new.internal.ImportResolution
private import semmle.python.dataflow.new.DataFlow
@@ -15,7 +16,7 @@ module ImportStar {
*/
overlay[local]
cached
predicate namePossiblyDefinedInImportStar(NameNode n, string name, Scope s) {
predicate namePossiblyDefinedInImportStar(Cfg::NameNode n, string name, Scope s) {
n.isLoad() and
name = n.getId() and
s = n.getScope().getEnclosingScope*() and
@@ -52,7 +53,7 @@ module ImportStar {
/** Holds if a global variable called `name` is assigned a value in the module `m`. */
cached
predicate globalNameDefinedInModule(string name, Module m) {
exists(NameNode n |
exists(Cfg::NameNode n |
not exists(LocalVariable v | n.defines(v)) and
n.isStore() and
name = n.getId() and
@@ -66,7 +67,7 @@ module ImportStar {
*/
overlay[global]
cached
predicate importStarResolvesTo(NameNode n, Module m) {
predicate importStarResolvesTo(Cfg::NameNode n, Module m) {
m = getStarImported+(n.getEnclosingModule()) and
globalNameDefinedInModule(n.getId(), m) and
not isDefinedLocally(n.getNode())
@@ -99,7 +100,7 @@ module ImportStar {
*/
overlay[local]
cached
ControlFlowNode potentialImportStarBase(Scope s) {
result = any(ImportStarNode n | n.getScope() = s).getModule()
Cfg::ControlFlowNode potentialImportStarBase(Scope s) {
result = any(Cfg::ImportStarNode n | n.getScope() = s).getModule()
}
}

View File

@@ -170,6 +170,8 @@ overlay[local]
module;
private import python
private import semmle.python.controlflow.internal.Cfg as Cfg
private import semmle.python.dataflow.new.internal.SsaImpl as SsaImpl
private import DataFlowPublic
/**
@@ -178,7 +180,7 @@ private import DataFlowPublic
* This class abstracts away the differing representations of comprehensions and
* for statements.
*/
class ForTarget extends ControlFlowNode {
class ForTarget extends Cfg::ControlFlowNode {
Expr source;
ForTarget() {
@@ -198,7 +200,7 @@ class ForTarget extends ControlFlowNode {
}
/** The LHS of an assignment, it also records the assigned value. */
class AssignmentTarget extends ControlFlowNode {
class AssignmentTarget extends Cfg::ControlFlowNode {
Expr value;
AssignmentTarget() {
@@ -209,7 +211,7 @@ class AssignmentTarget extends ControlFlowNode {
}
/** A direct (or top-level) target of an unpacking assignment. */
class UnpackingAssignmentDirectTarget extends ControlFlowNode instanceof SequenceNode {
class UnpackingAssignmentDirectTarget extends Cfg::ControlFlowNode instanceof Cfg::SequenceNode {
Expr value;
UnpackingAssignmentDirectTarget() {
@@ -222,7 +224,7 @@ class UnpackingAssignmentDirectTarget extends ControlFlowNode instanceof Sequenc
}
/** A (possibly recursive) target of an unpacking assignment. */
class UnpackingAssignmentTarget extends ControlFlowNode {
class UnpackingAssignmentTarget extends Cfg::ControlFlowNode {
UnpackingAssignmentTarget() {
this instanceof UnpackingAssignmentDirectTarget
or
@@ -231,10 +233,10 @@ class UnpackingAssignmentTarget extends ControlFlowNode {
}
/** A (possibly recursive) target of an unpacking assignment which is also a sequence. */
class UnpackingAssignmentSequenceTarget extends UnpackingAssignmentTarget instanceof SequenceNode {
ControlFlowNode getElement(int i) { result = super.getElement(i) }
class UnpackingAssignmentSequenceTarget extends UnpackingAssignmentTarget instanceof Cfg::SequenceNode {
Cfg::ControlFlowNode getElement(int i) { result = super.getElement(i) }
ControlFlowNode getAnElement() { result = this.getElement(_) }
Cfg::ControlFlowNode getAnElement() { result = this.getElement(_) }
}
/**
@@ -255,7 +257,7 @@ predicate iterableUnpackingAssignmentFlowStep(Node nodeFrom, Node nodeTo) {
predicate iterableUnpackingForReadStep(CfgNode nodeFrom, Content c, Node nodeTo) {
exists(ForTarget target |
nodeFrom.getNode().getNode() = target.getSource() and
target instanceof SequenceNode and
target instanceof Cfg::SequenceNode and
nodeTo = TIterableSequenceNode(target)
) and
(
@@ -323,11 +325,11 @@ predicate iterableUnpackingConvertingStoreStep(Node nodeFrom, Content c, Node no
*/
predicate iterableUnpackingElementReadStep(Node nodeFrom, Content c, Node nodeTo) {
exists(
UnpackingAssignmentSequenceTarget target, int index, ControlFlowNode element, int starIndex
UnpackingAssignmentSequenceTarget target, int index, Cfg::ControlFlowNode element, int starIndex
|
target.getElement(starIndex) instanceof StarredNode
target.getElement(starIndex) instanceof Cfg::StarredNode
or
not exists(target.getAnElement().(StarredNode)) and
not exists(target.getAnElement().(Cfg::StarredNode)) and
starIndex = -1
|
nodeFrom.(CfgNode).getNode() = target and
@@ -342,18 +344,18 @@ predicate iterableUnpackingElementReadStep(Node nodeFrom, Content c, Node nodeTo
else c.(TupleElementContent).getIndex() >= index - 1
) and
(
if element instanceof SequenceNode
if element instanceof Cfg::SequenceNode
then
// Step 5b
nodeTo = TIterableSequenceNode(element)
else
if element instanceof StarredNode
if element instanceof Cfg::StarredNode
then
// Step 5c
nodeTo = TIterableElementNode(element)
else
// Step 5a
exists(MultiAssignmentDefinition mad | element = mad.getDefiningNode() |
exists(SsaImpl::MultiAssignmentDefinition mad | element = mad.getDefiningNode() |
nodeTo.(CfgNode).getNode() = element
)
)
@@ -366,7 +368,7 @@ predicate iterableUnpackingElementReadStep(Node nodeFrom, Content c, Node nodeTo
* content type `ListElementContent`.
*/
predicate iterableUnpackingStarredElementStoreStep(Node nodeFrom, Content c, Node nodeTo) {
exists(ControlFlowNode starred, MultiAssignmentDefinition mad |
exists(Cfg::ControlFlowNode starred, SsaImpl::MultiAssignmentDefinition mad |
starred.getNode() instanceof Starred and
starred = mad.getDefiningNode()
|

View File

@@ -9,6 +9,7 @@ overlay[local]
module;
private import python
private import semmle.python.controlflow.internal.Cfg as Cfg
import DataFlowPublic
private import DataFlowPrivate
private import semmle.python.internal.CachedStages
@@ -314,7 +315,7 @@ private module Cached {
*/
cached
predicate subscript(LocalSourceNode node, CfgNode subscript, CfgNode index) {
exists(CfgNode seq, SubscriptNode subscriptNode | subscriptNode = subscript.getNode() |
exists(CfgNode seq, Cfg::SubscriptNode subscriptNode | subscriptNode = subscript.getNode() |
node.flowsTo(seq) and
seq.getNode() = subscriptNode.getObject() and
index.getNode() = subscriptNode.getIndex()

View File

@@ -55,6 +55,7 @@ module;
private import python
private import DataFlowPublic
private import semmle.python.controlflow.internal.Cfg as Cfg
/**
* Holds when there is flow from the subject `nodeFrom` to the (top-level) pattern `nodeTo` of a `match` statement.
@@ -91,8 +92,8 @@ predicate matchAsFlowStep(Node nodeFrom, Node nodeTo) {
or
// the interior pattern flows to the alias
nodeFrom.(CfgNode).getNode().getNode() = subject.getPattern() and
exists(PatternAliasDefinition pad | pad.getDefiningNode().getNode() = alias |
nodeTo.(CfgNode).getNode() = pad.getDefiningNode()
exists(Cfg::ControlFlowNode aliasCfg | aliasCfg.getNode() = alias |
nodeTo.(CfgNode).getNode() = aliasCfg
)
)
}
@@ -126,8 +127,8 @@ predicate matchLiteralFlowStep(Node nodeFrom, Node nodeTo) {
predicate matchCaptureFlowStep(Node nodeFrom, Node nodeTo) {
exists(MatchCapturePattern capture, Name var | capture.getVariable() = var |
nodeFrom.(CfgNode).getNode().getNode() = capture and
exists(PatternCaptureDefinition pcd | pcd.getDefiningNode().getNode() = var |
nodeTo.(CfgNode).getNode() = pcd.getDefiningNode()
exists(Cfg::ControlFlowNode varCfg | varCfg.getNode() = var |
nodeTo.(CfgNode).getNode() = varCfg
)
)
}

View File

@@ -56,6 +56,16 @@ private module CfgForSsa implements BB::CfgSig<Py::Location> {
* We only track variables that are read at least once in their scope —
* tracking write-only variables is unnecessary work.
*/
/**
* A source variable for SSA, wrapping a Python AST `Variable`.
*
* We only track variables that are read at least once in their scope —
* tracking write-only variables would be unnecessary work — *except*
* for module-scope globals, where the "read" can be external (e.g.
* `import mymodule; mymodule.x`). Such globals are tracked
* unconditionally so that import-resolution can find their defining
* write.
*/
private newtype TSsaSourceVariable =
TPyVar(Py::Variable v) {
// Has a use somewhere — read-relevant for SSA.
@@ -63,6 +73,19 @@ private newtype TSsaSourceVariable =
or
// Or has a deletion (treated as a write that destroys the value).
exists(Cfg::NameNode n | n.deletes(v))
or
// Or is a module-scope global written in this module — must be
// tracked even if never read locally, because importers may read
// it as an attribute on the module object.
v.getScope() instanceof Py::Module and
exists(Cfg::NameNode n | n.defines(v))
or
// Or is a parameter — parameters must always have a
// `ParameterDefinition` for dataflow argument-routing to work,
// even if the parameter is never read in its scope. Mirrors
// legacy ESSA's `ParameterDefinition` (which fired for every
// parameter binding regardless of liveness).
exists(Py::Parameter p | p.asName() = v.getAStore())
}
/**
@@ -72,11 +95,44 @@ class SsaSourceVariable extends TSsaSourceVariable {
/** Gets the underlying Python AST variable. */
Py::Variable getVariable() { this = TPyVar(result) }
/** Gets the (textual) name of this variable. */
string getName() { result = this.getVariable().getId() }
/** Gets a textual representation of this source variable. */
string toString() { result = this.getVariable().toString() }
/** Gets the location of this source variable. */
Py::Location getLocation() { result = this.getVariable().getScope().getLocation() }
/** Gets the scope in which this variable lives. */
Py::Scope getScope() { result = this.getVariable().getScope() }
/**
* Gets a use of this variable as it appears in the source — a `NameNode`
* that loads or deletes the variable. Mirrors legacy
* `SsaSourceVariable.getASourceUse()`.
*/
Cfg::ControlFlowNode getASourceUse() {
exists(Cfg::NameNode n | result = n | n.uses(this.getVariable()) or n.deletes(this.getVariable()))
}
/**
* Gets an implicit use of this variable. The new SSA does not have
* implicit-use refinements, but we keep this for API parity — every
* normal-exit of the variable's scope counts as a sink, ensuring
* variables stay live to scope exit for taint-tracking.
*/
Cfg::ControlFlowNode getAnImplicitUse() {
result.isNormalExit() and result.getScope() = this.getScope()
}
/**
* Gets a use of this variable — either an explicit source use or an
* implicit use at scope exit. Mirrors legacy `SsaSourceVariable.getAUse()`.
*/
Cfg::ControlFlowNode getAUse() {
result = this.getASourceUse() or result = this.getAnImplicitUse()
}
}
/**
@@ -93,7 +149,13 @@ private predicate nonLocalReadIn(Py::Variable v, Py::Scope s) {
n.uses(v) and
n.getScope() = s and
not exists(Cfg::NameNode def | def.defines(v) and def.getScope() = s)
)
) and
// Match legacy ESSA: only create entry defs for variables that have
// at least one defining store somewhere — otherwise the entry def
// represents "nothing reaches here", which is the default anyway and
// introduces no useful flow. (Legacy's `ModuleVariable` required a
// store; this is the closure-aware generalisation.)
exists(Cfg::NameNode store | store.defines(v))
}
/**
@@ -164,11 +226,25 @@ private module SsaImplInput implements SsaImplCommon::InputSig<Py::Location, Cfg
}
predicate variableRead(CfgImpl::BasicBlock bb, int i, SourceVariable v, boolean certain) {
// Explicit source use — a `Name` load or a `del x` of the variable.
exists(Cfg::NameNode n |
bb.getNode(i) = n and
n.uses(v.getVariable()) and
certain = true
)
or
// Synthetic use at the normal exit of the variable's defining scope.
// This keeps every variable live to scope exit so that callers (e.g.
// `module_export` in ImportResolution.qll, or taint-tracking pass-through
// through unread locals) can ask "which definition reaches end of
// scope?". Mirrors legacy ESSA's `SsaSourceVariable.getAUse()` which
// included `getScope().getANormalExit()`.
exists(Cfg::ControlFlowNode exit |
exit.isNormalExit() and
exit.getScope() = v.getVariable().getScope() and
bb.getNode(i) = exit and
certain = true
)
}
}
@@ -250,40 +326,25 @@ class EssaNodeDefinition extends Ssa::WriteDefinition {
}
/**
* An assignment definition `x = e`. The defining node is `x`'s CFG
* node; the value is `e`'s CFG node.
* An assignment definition: any binding where the value being assigned
* is statically known via `Cfg::DefinitionNode.getValue()`. Includes
* plain assignments, walrus, annotated assignments, augmented
* assignments, import aliases (`import x` / `from m import x [as y]`),
* `with ... as x`, and for-target bindings (where `getValue()` returns
* the iter expression's CFG node). Excludes parameter bindings —
* those are modelled by `ParameterDefinition`.
*/
class AssignmentDefinition extends EssaNodeDefinition {
AssignmentDefinition() {
exists(Cfg::NameNode n | n = this.getDefiningNode() |
exists(Py::Assign a | a.getATarget() = n.getNode())
or
exists(Py::AnnAssign a | a.getTarget() = n.getNode() and exists(a.getValue()))
or
exists(Py::AssignExpr a | a.getTarget() = n.getNode())
or
exists(Py::AugAssign a | a.getTarget() = n.getNode())
exists(n.(Cfg::DefinitionNode).getValue()) and
not n.(Cfg::ControlFlowNode).isParameter()
)
}
/** Gets the CFG node for the value being assigned, if statically known. */
Cfg::ControlFlowNode getValue() {
exists(Cfg::NameNode target | target = this.getDefiningNode() |
exists(Py::Assign a |
a.getATarget() = target.getNode() and
result.getNode() = a.getValue()
)
or
exists(Py::AnnAssign a |
a.getTarget() = target.getNode() and
result.getNode() = a.getValue()
)
or
exists(Py::AssignExpr a |
a.getTarget() = target.getNode() and
result.getNode() = a.getValue()
)
)
result = this.getDefiningNode().(Cfg::DefinitionNode).getValue()
}
}
@@ -312,17 +373,41 @@ class WithDefinition extends EssaNodeDefinition {
/**
* An assignment where the LHS is a tuple/list and the RHS is unpacked:
* `a, b = (1, 2)` or `a, *rest = xs`. The defining node for each
* captured Name is the Name itself.
* `a, b = (1, 2)` or `a, *rest = xs`. The SSA def lives at the inner
* `Name` CFG node, but for IterableUnpacking integration we expose
* the enclosing `StarredNode` as the `getDefiningNode()` for `*rest`
* patterns — mirroring legacy ESSA's `multi_assignment_definition`,
* which placed the def at the StarredNode CFG node.
*/
class MultiAssignmentDefinition extends EssaNodeDefinition {
MultiAssignmentDefinition() {
exists(Cfg::NameNode n | n = this.getDefiningNode() |
exists(Cfg::NameNode n | n = super.getDefiningNode() |
exists(Py::Assign a, Py::Expr lhs |
a.getATarget() = lhs and
(lhs instanceof Py::Tuple or lhs instanceof Py::List) and
lhs.getASubExpression+() = n.getNode()
)
or
// For-loop with tuple/list target: `for a, b in xs:` —
// tuple-unpacking semantics applies to the for-target.
exists(Py::For f, Py::Expr lhs |
f.getTarget() = lhs and
(lhs instanceof Py::Tuple or lhs instanceof Py::List) and
lhs.getASubExpression+() = n.getNode()
)
)
}
override Cfg::ControlFlowNode getDefiningNode() {
// Default: the underlying `Name` CFG node (where the SSA def lives).
not exists(Cfg::StarredNode s | s.getNode().(Py::Starred).getValue() = super.getDefiningNode().getNode()) and
result = super.getDefiningNode()
or
// Exception: for `*rest`, expose the enclosing `Starred` CFG node
// so that `IterableUnpacking::iterableUnpackingStarredElementStoreStep`
// can attach the rest-list to it.
exists(Cfg::StarredNode s | s.getNode().(Py::Starred).getValue() = super.getDefiningNode().getNode() |
result = s
)
}
}
@@ -352,7 +437,14 @@ class ScopeEntryDefinition extends EssaNodeDefinition {
}
/** A phi node (alias matching legacy naming). */
class PhiFunction = PhiNode;
class PhiFunction extends PhiNode {
/**
* Gets an input to this phi function (a definition that flows into
* the phi from one of its predecessor blocks). Mirrors legacy
* ESSA's `PhiFunction.getAnInput()`.
*/
Ssa::Definition getAnInput() { Ssa::phiHasInputFromBlock(this, result, _) }
}
/** Base class for all ESSA definitions (legacy-shaped). */
class EssaDefinition = Ssa::Definition;
@@ -365,8 +457,14 @@ class EssaVariable extends Ssa::Definition {
/** Gets the underlying SSA definition (legacy name). */
Ssa::Definition getDefinition() { result = this }
/** Gets a CFG node where this definition is used. */
Cfg::NameNode getAUse() {
/**
* Gets a CFG node where this definition is used. Includes regular
* `Name` reads as well as the synthetic scope-exit "use" registered
* via `SsaImplInput::variableRead` — mirrors legacy ESSA's
* `EssaVariable.getAUse()` which inherited the synthetic exit-use
* from `SsaSourceVariable`.
*/
Cfg::ControlFlowNode getAUse() {
exists(CfgImpl::BasicBlock bb, int i |
Ssa::ssaDefReachesRead(this.getSourceVariable(), this, bb, i) and
bb.getNode(i) = result
@@ -376,6 +474,9 @@ class EssaVariable extends Ssa::Definition {
/** Gets the (textual) name of the underlying variable. */
string getName() { result = this.getSourceVariable().getVariable().getId() }
/** Gets the scope in which this variable lives. */
Py::Scope getScope() { result = this.getSourceVariable().getVariable().getScope() }
/** Gets an ultimate non-phi ancestor of this definition. */
EssaVariable getAnUltimateDefinition() {
if this instanceof PhiNode

View File

@@ -1,4 +1,6 @@
private import python
private import semmle.python.controlflow.internal.Cfg as Cfg
private import semmle.python.dataflow.new.internal.SsaImpl as SsaImpl
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.internal.DataFlowPrivate as DataFlowPrivate
private import FlowSummaryImpl as FlowSummaryImpl
@@ -75,7 +77,7 @@ import Cached
* and isn't a big problem in practice.
*/
predicate concatStep(DataFlow::CfgNode nodeFrom, DataFlow::CfgNode nodeTo) {
exists(BinaryExprNode add | add = nodeTo.getNode() |
exists(Cfg::BinaryExprNode add | add = nodeTo.getNode() |
add.getOp() instanceof Add and add.getAnOperand() = nodeFrom.getNode()
)
}
@@ -84,7 +86,7 @@ predicate concatStep(DataFlow::CfgNode nodeFrom, DataFlow::CfgNode nodeTo) {
* Holds if taint can flow from `nodeFrom` to `nodeTo` with a step related to subscripting.
*/
predicate subscriptStep(DataFlow::CfgNode nodeFrom, DataFlow::CfgNode nodeTo) {
nodeTo.getNode().(SubscriptNode).getObject() = nodeFrom.getNode()
nodeTo.getNode().(Cfg::SubscriptNode).getObject() = nodeFrom.getNode()
}
/**
@@ -100,15 +102,15 @@ predicate stringManipulation(DataFlow::CfgNode nodeFrom, DataFlow::CfgNode nodeT
(
call = API::builtin(["str", "bytes", "unicode"]).getACall()
or
call.getFunction().asCfgNode().(NameNode).getId() in ["str", "bytes", "unicode"]
call.getFunction().asCfgNode().(Cfg::NameNode).getId() in ["str", "bytes", "unicode"]
) and
nodeFrom in [call.getArg(0), call.getArgByName("object")]
)
or
// String methods. Note that this doesn't recognize `meth = "foo".upper; meth()`
exists(CallNode call, string method_name, ControlFlowNode object |
exists(Cfg::CallNode call, string method_name, Cfg::ControlFlowNode object |
call = nodeTo.getNode() and
object = call.getFunction().(AttrNode).getObject(method_name)
object = call.getFunction().(Cfg::AttrNode).getObject(method_name)
|
nodeFrom.getNode() = object and
method_name in [
@@ -139,7 +141,7 @@ predicate stringManipulation(DataFlow::CfgNode nodeFrom, DataFlow::CfgNode nodeT
)
or
// % formatting
exists(BinaryExprNode fmt | fmt = nodeTo.getNode() |
exists(Cfg::BinaryExprNode fmt | fmt = nodeTo.getNode() |
fmt.getOp() instanceof Mod and
(
fmt.getLeft() = nodeFrom.getNode()
@@ -149,7 +151,7 @@ predicate stringManipulation(DataFlow::CfgNode nodeFrom, DataFlow::CfgNode nodeT
)
or
// string multiplication -- `"foo" * 10`
exists(BinaryExprNode mult | mult = nodeTo.getNode() |
exists(Cfg::BinaryExprNode mult | mult = nodeTo.getNode() |
mult.getOp() instanceof Mult and
mult.getLeft() = nodeFrom.getNode()
)
@@ -207,8 +209,8 @@ predicate awaitStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
* the variable `f` is tainted if the result of `open("foo")` is tainted.
*/
predicate asyncWithStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
exists(With with, ControlFlowNode contextManager, ControlFlowNode var |
var = any(WithDefinition wd).getDefiningNode()
exists(With with, Cfg::ControlFlowNode contextManager, Cfg::ControlFlowNode var |
var = any(SsaImpl::WithDefinition wd).getDefiningNode()
|
nodeFrom.(DataFlow::CfgNode).getNode() = contextManager and
nodeTo.(DataFlow::CfgNode).getNode() = var and

View File

@@ -2,6 +2,8 @@ import codeql.util.Unit
import codeql.typetracking.TypeTracking as Shared
import codeql.typetracking.internal.TypeTrackingImpl as SharedImpl
private import python
private import semmle.python.controlflow.internal.Cfg as Cfg
private import semmle.python.dataflow.new.internal.SsaImpl as SsaImpl
private import semmle.python.internal.CachedStages
private import semmle.python.dataflow.new.internal.DataFlowPublic as DataFlowPublic
private import semmle.python.dataflow.new.internal.DataFlowPrivate as DataFlowPrivate
@@ -94,8 +96,11 @@ private module SummaryTypeTrackerInput implements SummaryTypeTracker::Input {
Node returnOf(Node callable, SummaryComponent return) {
return = FlowSummaryImpl::Private::SummaryComponent::return() and
// `result` should be the return value of a callable expression (lambda or function) referenced by `callable`
result.asCfgNode() =
callable.getALocalSource().asExpr().(CallableExpr).getInnerScope().getAReturnValueFlowNode()
exists(Return ret |
ret.getScope() =
callable.getALocalSource().asExpr().(CallableExpr).getInnerScope() and
result.asCfgNode().getNode() = ret.getValue()
)
}
// Relating callables to nodes
@@ -160,7 +165,7 @@ module TypeTrackingInput implements Shared::TypeTrackingInput<Location> {
// ignore the flow steps from the synthetic sequence node to the real sequence node,
// since we only support one level of content in type-trackers, and the nested
// structure requires two levels at least to be useful.
not exists(SequenceNode outer |
not exists(Cfg::SequenceNode outer |
outer.getAnElement() = nodeTo.asCfgNode() and
IterableUnpacking::iterableUnpackingTupleFlowStep(nodeFrom, nodeTo)
)
@@ -259,7 +264,7 @@ module TypeTrackingInput implements Shared::TypeTrackingInput<Location> {
// Since we only support one level of content in type-trackers we don't actually
// support `(aa, ab), (ba, bb) = ...`. Therefore we exclude the read-step from `(aa,
// ab)` to `aa` (since it is not needed).
not exists(SequenceNode outer |
not exists(Cfg::SequenceNode outer |
outer.getAnElement() = nodeFrom.asCfgNode() and
IterableUnpacking::iterableUnpackingTupleFlowStep(_, nodeFrom)
) and
@@ -269,7 +274,7 @@ module TypeTrackingInput implements Shared::TypeTrackingInput<Location> {
IterableUnpacking::iterableUnpackingForReadStep(_, _, seq) and
IterableUnpacking::iterableUnpackingConvertingReadStep(seq, _, elem) and
IterableUnpacking::iterableUnpackingConvertingStoreStep(elem, _, nodeFrom) and
nodeFrom.asCfgNode() instanceof SequenceNode
nodeFrom.asCfgNode() instanceof Cfg::SequenceNode
)
or
TypeTrackerSummaryFlow::basicLoadStep(nodeFrom, nodeTo, content)
@@ -306,13 +311,13 @@ module TypeTrackingInput implements Shared::TypeTrackingInput<Location> {
//
// nodeFrom is `expr`
// nodeTo is entry node for `f`
exists(ScopeEntryDefinition e, SsaSourceVariable var, DefinitionNode def |
exists(SsaImpl::ScopeEntryDefinition e, SsaImpl::SsaSourceVariable var, Cfg::DefinitionNode def |
e.getSourceVariable() = var and
var.hasDefiningNode(def)
def.getNode() = var.getVariable().getAStore()
|
nodeTo.(DataFlowPublic::ScopeEntryDefinitionNode).getDefinition() = e and
nodeFrom.asCfgNode() = def and
var.getScope().getScope*() = nodeFrom.getScope()
var.getVariable().getScope().getScope*() = nodeFrom.getScope()
)
}

View File

@@ -3,6 +3,9 @@ overlay[local]
module;
private import python
private import semmle.python.controlflow.internal.Cfg as Cfg
private import semmle.python.controlflow.internal.AstNodeImpl as CfgImpl
private import semmle.python.dataflow.new.internal.SsaImpl as SsaImpl
private import DataFlowPublic
private import semmle.python.dataflow.new.internal.DataFlowPrivate
private import codeql.dataflow.VariableCapture as Shared
@@ -14,10 +17,10 @@ private import codeql.dataflow.VariableCapture as Shared
// The first is the main implementation, the second is a performance motivated restriction.
// The restriction is to clear any `CapturedVariableContent` before writing a new one
// to avoid long access paths (see the link for a nice explanation).
private module CaptureInput implements Shared::InputSig<Location, Cfg::BasicBlock> {
private module CaptureInput implements Shared::InputSig<Location, CfgImpl::BasicBlock> {
private import python as PY
additional class ExprCfgNode extends ControlFlowNode {
additional class ExprCfgNode extends Cfg::ControlFlowNode {
ExprCfgNode() { isExpressionNode(this) }
}
@@ -25,7 +28,9 @@ private module CaptureInput implements Shared::InputSig<Location, Cfg::BasicBloc
predicate isConstructor() { none() }
}
Callable basicBlockGetEnclosingCallable(Cfg::BasicBlock bb) { result = bb.getScope() }
Callable basicBlockGetEnclosingCallable(CfgImpl::BasicBlock bb) {
result = bb.getEnclosingCallable().asScope()
}
class CapturedVariable extends LocalVariable {
Function f;
@@ -51,23 +56,23 @@ private module CaptureInput implements Shared::InputSig<Location, Cfg::BasicBloc
class CapturedParameter extends CapturedVariable {
CapturedParameter() { this.isParameter() }
ControlFlowNode getCfgNode() { result.getNode().(Parameter) = this.getAnAccess() }
Cfg::ControlFlowNode getCfgNode() { result.getNode().(Parameter) = this.getAnAccess() }
}
class Expr extends ExprCfgNode {
predicate hasCfgNode(Cfg::BasicBlock bb, int i) { this = bb.getNode(i) }
predicate hasCfgNode(CfgImpl::BasicBlock bb, int i) { this = bb.getNode(i) }
}
class VariableWrite extends ControlFlowNode {
class VariableWrite extends Cfg::ControlFlowNode {
CapturedVariable v;
VariableWrite() {
exists(DefinitionNode d | d.getNode() = v.getAStore() | this = d.getValue())
exists(Cfg::DefinitionNode d | d.getNode() = v.getAStore() | this = d.getValue())
}
CapturedVariable getVariable() { result = v }
predicate hasCfgNode(Cfg::BasicBlock bb, int i) { this = bb.getNode(i) }
predicate hasCfgNode(CfgImpl::BasicBlock bb, int i) { this = bb.getNode(i) }
}
class VariableRead extends Expr {
@@ -82,9 +87,14 @@ private module CaptureInput implements Shared::InputSig<Location, Cfg::BasicBloc
// TODO: Other languages have an extra case here looking like
// simpleAstFlowStep(nodeFrom, nodeTo)
// we should investigate the potential benefit of adding that.
exists(SsaVariable def |
exists(SsaImpl::EssaVariable def |
def.getAUse() = nodeTo and
def.getAnUltimateDefinition().getDefinition().(DefinitionNode).getValue() = nodeFrom
def.getAnUltimateDefinition()
.getDefinition()
.(SsaImpl::EssaNodeDefinition)
.getDefiningNode()
.(Cfg::DefinitionNode)
.getValue() = nodeFrom
)
}
@@ -109,7 +119,7 @@ class CapturedVariable = CaptureInput::CapturedVariable;
class ClosureExpr = CaptureInput::ClosureExpr;
module Flow = Shared::Flow<Location, Cfg, CaptureInput>;
module Flow = Shared::Flow<Location, Cfg::CfgSigImpl, CaptureInput>;
private Flow::ClosureNode asClosureNode(Node n) {
result = n.(SynthCaptureNode).getSynthesizedCaptureNode()

View File

@@ -4,6 +4,7 @@
*/
private import python
private import semmle.python.controlflow.internal.Cfg as Cfg
private import semmle.python.Concepts
private import semmle.python.ApiGraphs
private import semmle.python.dataflow.new.RemoteFlowSources
@@ -73,7 +74,9 @@ module Bottle {
/** A response returned by a view callable. */
class BottleReturnResponse extends Http::Server::HttpResponse::Range {
BottleReturnResponse() {
this.asCfgNode() = any(View::ViewCallable vc).getAReturnValueFlowNode()
exists(View::ViewCallable vc, Return ret |
ret.getScope() = vc and this.asCfgNode().getNode() = ret.getValue()
)
}
override DataFlow::Node getBody() { result = this }
@@ -154,9 +157,9 @@ module Bottle {
DataFlow::Node value;
HeaderWriteSubscript() {
exists(SubscriptNode subscript |
exists(Cfg::SubscriptNode subscript |
this.asCfgNode() = subscript and
value.asCfgNode() = subscript.(DefinitionNode).getValue() and
value.asCfgNode() = subscript.(Cfg::DefinitionNode).getValue() and
name.asCfgNode() = subscript.getIndex() and
subscript.getObject() = headers().asSource().asCfgNode()
)

View File

@@ -4,6 +4,7 @@
*/
private import python
private import semmle.python.controlflow.internal.Cfg as Cfg
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.dataflow.new.TaintTracking
@@ -1305,7 +1306,7 @@ module PrivateDjango {
dict.(DataFlow::MethodCallNode).calls(files, "dict")
)
|
this.asCfgNode().(SubscriptNode).getObject() = dict.asCfgNode()
this.asCfgNode().(Cfg::SubscriptNode).getObject() = dict.asCfgNode()
or
this.(DataFlow::MethodCallNode).calls(dict, "get")
)
@@ -1314,7 +1315,7 @@ module PrivateDjango {
exists(DataFlow::AttrRead files, DataFlow::MethodCallNode getlistCall |
files.accesses(instance(), "FILES") and
getlistCall.calls(files, "getlist") and
this.asCfgNode().(SubscriptNode).getObject() = getlistCall.asCfgNode()
this.asCfgNode().(Cfg::SubscriptNode).getObject() = getlistCall.asCfgNode()
)
}
}
@@ -2216,7 +2217,7 @@ module PrivateDjango {
DataFlow::Node value;
DjangoResponseCookieSubscriptWrite() {
exists(SubscriptNode subscript, DataFlow::AttrRead cookieLookup |
exists(Cfg::SubscriptNode subscript, DataFlow::AttrRead cookieLookup |
// To give `this` a value, we need to choose between either LHS or RHS,
// and just go with the LHS
this.asCfgNode() = subscript
@@ -2228,7 +2229,7 @@ module PrivateDjango {
|
cookieLookup.flowsTo(subscriptObj)
) and
value.asCfgNode() = subscript.(DefinitionNode).getValue() and
value.asCfgNode() = subscript.(Cfg::DefinitionNode).getValue() and
index.asCfgNode() = subscript.getIndex()
)
}
@@ -2249,7 +2250,7 @@ module PrivateDjango {
DataFlow::Node value;
DjangoResponseHeaderSubscriptWrite() {
exists(SubscriptNode subscript, DataFlow::AttrRead headerLookup |
exists(Cfg::SubscriptNode subscript, DataFlow::AttrRead headerLookup |
// To give `this` a value, we need to choose between either LHS or RHS,
// and just go with the LHS
this.asCfgNode() = subscript
@@ -2261,7 +2262,7 @@ module PrivateDjango {
|
headerLookup.flowsTo(subscriptObj)
) and
value.asCfgNode() = subscript.(DefinitionNode).getValue() and
value.asCfgNode() = subscript.(Cfg::DefinitionNode).getValue() and
index.asCfgNode() = subscript.getIndex()
)
}
@@ -2284,14 +2285,14 @@ module PrivateDjango {
DataFlow::Node value;
DjangoResponseSubscriptWrite() {
exists(SubscriptNode subscript |
exists(Cfg::SubscriptNode subscript |
// To give `this` a value, we need to choose between either LHS or RHS,
// and just go with the LHS
this.asCfgNode() = subscript
|
subscript.getObject() =
DjangoImpl::DjangoHttp::Response::HttpResponse::instance().asCfgNode() and
value.asCfgNode() = subscript.(DefinitionNode).getValue() and
value.asCfgNode() = subscript.(Cfg::DefinitionNode).getValue() and
index.asCfgNode() = subscript.getIndex()
)
}
@@ -2426,7 +2427,7 @@ module PrivateDjango {
/** Gets a reference to the result of calling the `as_view` classmethod of this class. */
private DataFlow::TypeTrackingNode asViewResult(DataFlow::TypeTracker t) {
t.start() and
result.asCfgNode().(CallNode).getFunction() = this.asViewRef().asCfgNode()
result.asCfgNode().(Cfg::CallNode).getFunction() = this.asViewRef().asCfgNode()
or
exists(DataFlow::TypeTracker t2 | result = this.asViewResult(t2).track(t2, t))
}
@@ -2872,7 +2873,9 @@ module PrivateDjango {
DataFlow::CfgNode
{
DjangoRedirectViewGetRedirectUrlReturn() {
node = any(GetRedirectUrlFunction f).getAReturnValueFlowNode()
exists(GetRedirectUrlFunction f, Return ret |
ret.getScope() = f and node.getNode() = ret.getValue()
)
}
override DataFlow::Node getRedirectLocation() { result = this }

View File

@@ -4,6 +4,7 @@
*/
private import python
private import semmle.python.controlflow.internal.Cfg as Cfg
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.dataflow.new.TaintTracking
@@ -309,7 +310,11 @@ module FastApi {
FastApiRouteSetup routeSetup;
FastApiRequestHandlerReturn() {
node = routeSetup.getARequestHandler().getAReturnValueFlowNode()
exists(Function h, Return ret |
h = routeSetup.getARequestHandler() and
ret.getScope() = h and
node.getNode() = ret.getValue()
)
}
override DataFlow::Node getBody() { result = this }
@@ -438,7 +443,7 @@ module FastApi {
DataFlow::Node value;
HeaderSubscriptWrite() {
exists(SubscriptNode subscript, DataFlow::AttrRead headerLookup |
exists(Cfg::SubscriptNode subscript, DataFlow::AttrRead headerLookup |
// To give `this` a value, we need to choose between either LHS or RHS,
// and just go with the LHS
this.asCfgNode() = subscript
@@ -447,7 +452,7 @@ module FastApi {
exists(DataFlow::Node subscriptObj | subscriptObj.asCfgNode() = subscript.getObject() |
headerLookup.flowsTo(subscriptObj)
) and
value.asCfgNode() = subscript.(DefinitionNode).getValue() and
value.asCfgNode() = subscript.(Cfg::DefinitionNode).getValue() and
index.asCfgNode() = subscript.getIndex()
)
}

View File

@@ -536,7 +536,7 @@ module Flask {
FlaskRouteHandlerReturn() {
exists(Function routeHandler |
routeHandler = any(FlaskRouteSetup rs).getARequestHandler() and
node = routeHandler.getAReturnValueFlowNode() and
exists(Return ret | ret.getScope() = routeHandler and node.getNode() = ret.getValue()) and
not this instanceof Flask::Response::InstanceSource
)
}

View File

@@ -4,6 +4,7 @@
*/
import python
private import semmle.python.controlflow.internal.Cfg as Cfg
import semmle.python.dataflow.new.RemoteFlowSources
import semmle.python.dataflow.new.TaintTracking
import semmle.python.ApiGraphs
@@ -51,9 +52,9 @@ module Gradio {
// limit only to lists of parameters given to `inputs`.
(
(
call.getKeywordParameter("inputs").asSink().asCfgNode() instanceof ListNode
call.getKeywordParameter("inputs").asSink().asCfgNode() instanceof Cfg::ListNode
or
call.getParameter(1).asSink().asCfgNode() instanceof ListNode
call.getParameter(1).asSink().asCfgNode() instanceof Cfg::ListNode
) and
(
this = call.getKeywordParameter("inputs").getASubscript().getAValueReachingSink()
@@ -75,8 +76,8 @@ module Gradio {
exists(GradioInput call |
this = call.getParameter(0, "fn").getParameter(_).asSource() and
// exclude lists of parameters given to `inputs`
not call.getKeywordParameter("inputs").asSink().asCfgNode() instanceof ListNode and
not call.getParameter(1).asSink().asCfgNode() instanceof ListNode
not call.getKeywordParameter("inputs").asSink().asCfgNode() instanceof Cfg::ListNode and
not call.getParameter(1).asSink().asCfgNode() instanceof Cfg::ListNode
)
}
@@ -105,16 +106,16 @@ module Gradio {
// handle cases where there are multiple arguments passed as a list to `inputs`
(
(
node.getKeywordParameter("inputs").asSink().asCfgNode() instanceof ListNode
node.getKeywordParameter("inputs").asSink().asCfgNode() instanceof Cfg::ListNode
or
node.getParameter(1).asSink().asCfgNode() instanceof ListNode
node.getParameter(1).asSink().asCfgNode() instanceof Cfg::ListNode
) and
exists(int i | nodeTo = node.getParameter(0, "fn").getParameter(i).asSource() |
nodeFrom.asCfgNode() =
node.getKeywordParameter("inputs").asSink().asCfgNode().(ListNode).getElement(i)
node.getKeywordParameter("inputs").asSink().asCfgNode().(Cfg::ListNode).getElement(i)
or
nodeFrom.asCfgNode() =
node.getParameter(1).asSink().asCfgNode().(ListNode).getElement(i)
node.getParameter(1).asSink().asCfgNode().(Cfg::ListNode).getElement(i)
)
)
)

View File

@@ -4,6 +4,7 @@
*/
private import python
private import semmle.python.controlflow.internal.Cfg as Cfg
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.TaintTracking
private import semmle.python.Concepts
@@ -46,7 +47,7 @@ module MarkupSafeModel {
/** A direct instantiation of `markupsafe.Markup`. */
private class ClassInstantiation extends InstanceSource, DataFlow::CallCfgNode {
override CallNode node;
override Cfg::CallNode node;
ClassInstantiation() { this = classRef().getACall() }
}
@@ -64,7 +65,7 @@ module MarkupSafeModel {
/** A string concatenation with a `markupsafe.Markup` involved. */
class StringConcat extends Markup::InstanceSource, DataFlow::CfgNode {
override BinaryExprNode node;
override Cfg::BinaryExprNode node;
StringConcat() {
node.getOp() instanceof Add and
@@ -79,7 +80,7 @@ module MarkupSafeModel {
/** A %-style string format with `markupsafe.Markup` as the format string. */
class PercentStringFormat extends Markup::InstanceSource, DataFlow::CfgNode {
override BinaryExprNode node;
override Cfg::BinaryExprNode node;
PercentStringFormat() {
node.getOp() instanceof Mod and

View File

@@ -7,6 +7,7 @@
*/
private import python
private import semmle.python.controlflow.internal.Cfg as Cfg
private import semmle.python.Concepts
private import semmle.python.ApiGraphs
private import semmle.python.frameworks.data.ModelsAsData
@@ -56,7 +57,7 @@ module Pycurl {
{
OutgoingRequestCall() {
this = setopt().getACall() and
this.getArg(0).asCfgNode().(AttrNode).getName() = "URL"
this.getArg(0).asCfgNode().(Cfg::AttrNode).getName() = "URL"
}
override DataFlow::Node getAUrlPart() {
@@ -81,7 +82,7 @@ module Pycurl {
private class CurlSslCall extends Http::Client::Request::Range instanceof DataFlow::CallCfgNode {
CurlSslCall() {
this = setopt().getACall() and
this.getArg(0).asCfgNode().(AttrNode).getName() = ["SSL_VERIFYPEER", "SSL_VERIFYHOST"]
this.getArg(0).asCfgNode().(Cfg::AttrNode).getName() = ["SSL_VERIFYPEER", "SSL_VERIFYHOST"]
}
override DataFlow::Node getAUrlPart() { none() }

View File

@@ -7,6 +7,7 @@
*/
private import python
private import semmle.python.controlflow.internal.Cfg as Cfg
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.TaintTracking
private import semmle.python.Concepts
@@ -93,7 +94,7 @@ module Pydantic {
// be a Pydantic model. So `model[0]` will be an overapproximation, but should not
// really cause problems (since we don't expect real code to contain such accesses)
nodeFrom = instance() and
nodeTo.asCfgNode().(SubscriptNode).getObject() = nodeFrom.asCfgNode()
nodeTo.asCfgNode().(Cfg::SubscriptNode).getObject() = nodeFrom.asCfgNode()
}
/**

View File

@@ -166,7 +166,9 @@ module Pyramid {
/** A response returned by a view callable. */
private class PyramidReturnResponse extends Http::Server::HttpResponse::Range {
PyramidReturnResponse() {
this.asCfgNode() = any(View::ViewCallable vc).getAReturnValueFlowNode() and
exists(View::ViewCallable vc, Return ret |
ret.getScope() = vc and this.asCfgNode().getNode() = ret.getValue()
) and
not this = instance()
}

View File

@@ -6,6 +6,7 @@ overlay[local?]
module;
private import python
private import semmle.python.controlflow.internal.Cfg as Cfg
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.TaintTracking
private import semmle.python.dataflow.new.RemoteFlowSources
@@ -1246,7 +1247,7 @@ module StdlibPrivate {
/** An additional taint step for calls to `os.path.join` */
private class OsPathJoinCallAdditionalTaintStep extends TaintTracking::AdditionalTaintStep {
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
exists(CallNode call |
exists(Cfg::CallNode call |
nodeTo.asCfgNode() = call and
call = OS::OsPath::join().getACall().asCfgNode() and
call.getAnArg() = nodeFrom.asCfgNode()
@@ -1317,13 +1318,13 @@ module StdlibPrivate {
// run, so if we're able to, we only mark the first element as the command
// (and not the arguments to the command).
//
result.asCfgNode() = arg_args.asCfgNode().(SequenceNode).getElement(0)
result.asCfgNode() = arg_args.asCfgNode().(Cfg::SequenceNode).getElement(0)
or
// Either the "args" argument is not a sequence (which is valid) or we where
// just not able to figure it out. Simply mark the "args" argument as the
// command.
//
not arg_args.asCfgNode() instanceof SequenceNode and
not arg_args.asCfgNode() instanceof Cfg::SequenceNode and
result = arg_args
)
)
@@ -1542,7 +1543,7 @@ module StdlibPrivate {
* See https://docs.python.org/3/library/functions.html#eval
*/
private class BuiltinsEvalCall extends CodeExecution::Range, DataFlow::CallCfgNode {
override CallNode node;
override Cfg::CallNode node;
BuiltinsEvalCall() { this = API::builtin("eval").getACall() }
@@ -1923,7 +1924,7 @@ module StdlibPrivate {
nodeFrom = instance().getAValueReachableFromSource() and
nodeTo = [getvalueRef(), getfirstRef(), getlistRef()].getAValueReachableFromSource()
or
nodeFrom.asCfgNode() = nodeTo.asCfgNode().(CallNode).getFunction() and
nodeFrom.asCfgNode() = nodeTo.asCfgNode().(Cfg::CallNode).getFunction() and
(
nodeFrom = getvalueRef().getAValueReachableFromSource() and
nodeTo = getvalueResult().asSource()
@@ -1939,7 +1940,7 @@ module StdlibPrivate {
nodeFrom in [
instance().getAValueReachableFromSource(), fieldList().getAValueReachableFromSource()
] and
nodeTo.asCfgNode().(SubscriptNode).getObject() = nodeFrom.asCfgNode()
nodeTo.asCfgNode().(Cfg::SubscriptNode).getObject() = nodeFrom.asCfgNode()
or
// Attributes on Field
nodeFrom = field().getAValueReachableFromSource() and
@@ -2254,8 +2255,8 @@ module StdlibPrivate {
DataFlow::CfgNode
{
WsgirefSimpleServerApplicationReturn() {
exists(WsgirefSimpleServerApplication requestHandler |
node = requestHandler.getAReturnValueFlowNode()
exists(WsgirefSimpleServerApplication requestHandler, Return ret |
ret.getScope() = requestHandler and node.getNode() = ret.getValue()
)
}
@@ -2337,9 +2338,9 @@ module StdlibPrivate {
DataFlow::Node value;
HeaderWriteSubscript() {
exists(SubscriptNode subscript |
exists(Cfg::SubscriptNode subscript |
this.asCfgNode() = subscript and
value.asCfgNode() = subscript.(DefinitionNode).getValue() and
value.asCfgNode() = subscript.(Cfg::DefinitionNode).getValue() and
name.asCfgNode() = subscript.getIndex() and
subscript.getObject() = instance().asCfgNode()
)
@@ -2681,7 +2682,7 @@ module StdlibPrivate {
or
// Data injection
// Special handling of the `/` operator
exists(BinaryExprNode slash, DataFlow::Node pathOperand, DataFlow::TypeTracker t2 |
exists(Cfg::BinaryExprNode slash, DataFlow::Node pathOperand, DataFlow::TypeTracker t2 |
slash.getOp() instanceof Div and
pathOperand.asCfgNode() = slash.getAnOperand() and
pathlibPath(t2).flowsTo(pathOperand) and
@@ -2806,7 +2807,7 @@ module StdlibPrivate {
pathlibPath().flowsTo(nodeTo) and
(
// Special handling of the `/` operator
exists(BinaryExprNode slash, DataFlow::Node pathOperand |
exists(Cfg::BinaryExprNode slash, DataFlow::Node pathOperand |
slash.getOp() instanceof Div and
pathOperand.asCfgNode() = slash.getAnOperand() and
pathlibPath().flowsTo(pathOperand)
@@ -4605,9 +4606,9 @@ module StdlibPrivate {
}
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
exists(CallNode c, string name, ControlFlowNode n, DataFlow::AttributeContent ac |
c.getFunction().(NameNode).getId() = "replace" or
c.getFunction().(AttrNode).getName() = "replace"
exists(Cfg::CallNode c, string name, Cfg::ControlFlowNode n, DataFlow::AttributeContent ac |
c.getFunction().(Cfg::NameNode).getId() = "replace" or
c.getFunction().(Cfg::AttrNode).getName() = "replace"
|
n = c.getArgByName(name) and
ac.getAttribute() = name and
@@ -5151,10 +5152,10 @@ module StdlibPrivate {
* See https://docs.python.org/3.9/library/stdtypes.html#str.startswith
*/
private class StartswithCall extends Path::SafeAccessCheck::Range {
StartswithCall() { this.(CallNode).getFunction().(AttrNode).getName() = "startswith" }
StartswithCall() { this.(Cfg::CallNode).getFunction().(Cfg::AttrNode).getName() = "startswith" }
override predicate checks(ControlFlowNode node, boolean branch) {
node = this.(CallNode).getFunction().(AttrNode).getObject() and
override predicate checks(Cfg::ControlFlowNode node, boolean branch) {
node = this.(Cfg::CallNode).getFunction().(Cfg::AttrNode).getObject() and
branch = true
}
}

View File

@@ -8,6 +8,7 @@
*/
private import python
private import semmle.python.controlflow.internal.Cfg as Cfg
private import semmle.python.Concepts
private import semmle.python.ApiGraphs
private import semmle.python.security.dataflow.UrlRedirectCustomizations
@@ -91,7 +92,7 @@ private module Urllib {
* A read of the `netloc` attribute of a parsed URL as returned by `urllib.parse.urlparse`,
* which is being checked in a way that is relevant for URL redirection vulnerabilities.
*/
private predicate netlocCheck(DataFlow::GuardNode g, ControlFlowNode node, boolean branch) {
private predicate netlocCheck(DataFlow::GuardNode g, Cfg::ControlFlowNode node, boolean branch) {
exists(DataFlow::CallCfgNode urlParseCall, DataFlow::AttrRead netlocRead |
urlParseCall = getUrlParseCall() and
netlocRead = urlParseCall.getAnAttributeRead("netloc") and

View File

@@ -4,6 +4,7 @@
*/
private import python
private import semmle.python.controlflow.internal.Cfg as Cfg
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.dataflow.new.TaintTracking
@@ -72,9 +73,9 @@ module Tornado {
DataFlow::Node value;
TornadoHeaderSubscriptWrite() {
exists(SubscriptNode subscript |
exists(Cfg::SubscriptNode subscript |
subscript.getObject() = instance().asCfgNode() and
value.asCfgNode() = subscript.(DefinitionNode).getValue() and
value.asCfgNode() = subscript.(Cfg::DefinitionNode).getValue() and
index.asCfgNode() = subscript.getIndex() and
this.asCfgNode() = subscript
)
@@ -422,7 +423,7 @@ module Tornado {
// be able to do something more structured for providing modeling of the members
// of a container-object.
exists(DataFlow::AttrRead files | files.accesses(instance(), "cookies") |
this.asCfgNode().(SubscriptNode).getObject() = files.asCfgNode()
this.asCfgNode().(Cfg::SubscriptNode).getObject() = files.asCfgNode()
or
this.(DataFlow::MethodCallNode).calls(files, "get")
)
@@ -479,20 +480,20 @@ module Tornado {
// routing
// ---------------------------------------------------------------------------
/** Gets a sequence that defines a number of route rules */
SequenceNode routeSetupRuleList() {
exists(CallNode call |
Cfg::SequenceNode routeSetupRuleList() {
exists(Cfg::CallNode call |
call = any(TornadoModule::Web::Application::ClassInstantiation c).asCfgNode()
|
result in [call.getArg(0), call.getArgByName("handlers")]
)
or
exists(CallNode call |
exists(Cfg::CallNode call |
call.getFunction() = TornadoModule::Web::Application::add_handlers().asCfgNode()
|
result in [call.getArg(1), call.getArgByName("host_handlers")]
)
or
result = routeSetupRuleList().getElement(_).(TupleNode).getElement(1)
result = routeSetupRuleList().getElement(_).(Cfg::TupleNode).getElement(1)
}
/** A tornado route setup. */
@@ -515,12 +516,12 @@ module Tornado {
/** A route setup using a tuple. */
private class TornadoTupleRouteSetup extends TornadoRouteSetup, DataFlow::CfgNode {
override TupleNode node;
override Cfg::TupleNode node;
TornadoTupleRouteSetup() {
node = routeSetupRuleList().getElement(_) and
count(node.getElement(_)) = 2 and
not node.getElement(1) instanceof SequenceNode
not node.getElement(1) instanceof Cfg::SequenceNode
}
override DataFlow::Node getUrlPatternArg() { result.asCfgNode() = node.getElement(0) }

View File

@@ -182,7 +182,9 @@ private module Twisted {
DataFlow::CfgNode
{
TwistedResourceRenderMethodReturn() {
this.asCfgNode() = any(TwistedResourceRenderMethod meth).getAReturnValueFlowNode()
exists(TwistedResourceRenderMethod meth, Return ret |
ret.getScope() = meth and this.asCfgNode().getNode() = ret.getValue()
)
}
override DataFlow::Node getBody() { result = this }

View File

@@ -6,6 +6,7 @@
*/
private import python
private import semmle.python.controlflow.internal.Cfg as Cfg
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.TaintTracking
private import semmle.python.ApiGraphs
@@ -221,9 +222,9 @@ module Werkzeug {
DataFlow::Node value;
HeaderWriteSubscript() {
exists(SubscriptNode subscript |
exists(Cfg::SubscriptNode subscript |
this.asCfgNode() = subscript and
value.asCfgNode() = subscript.(DefinitionNode).getValue() and
value.asCfgNode() = subscript.(Cfg::DefinitionNode).getValue() and
name.asCfgNode() = subscript.getIndex() and
subscript.getObject() = instance().asCfgNode()
)

View File

@@ -8,6 +8,7 @@
*/
private import python
private import semmle.python.controlflow.internal.Cfg as Cfg
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.Concepts
private import semmle.python.ApiGraphs
@@ -28,7 +29,7 @@ private module Yaml {
* See https://pyyaml.org/wiki/PyYAMLDocumentation (you will have to scroll down).
*/
private class YamlLoadCall extends Decoding::Range, DataFlow::CallCfgNode {
override CallNode node;
override Cfg::CallNode node;
string func_name;
YamlLoadCall() {

View File

@@ -4,6 +4,7 @@
*/
private import python
private import semmle.python.controlflow.internal.Cfg as Cfg
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.TaintTracking
private import semmle.python.Concepts
@@ -111,7 +112,7 @@ module Yarl {
}
private predicate yarlUrlIsAbsoluteCall(
DataFlow::GuardNode g, ControlFlowNode node, boolean branch
DataFlow::GuardNode g, Cfg::ControlFlowNode node, boolean branch
) {
exists(ClassInstantiation instance, DataFlow::MethodCallNode call |
call.calls(instance, "is_absolute") and

View File

@@ -3,6 +3,7 @@
*/
import python
private import semmle.python.controlflow.internal.Cfg as Cfg
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.Concepts as Concepts
private import semmle.python.regex
@@ -78,7 +79,7 @@ private module FindRegexMode {
t.start() and
exists(API::Node flag | flag_name = canonical_name(flag) and result = flag.asSource())
or
exists(BinaryExprNode binop, DataFlow::Node operand |
exists(Cfg::BinaryExprNode binop, DataFlow::Node operand |
operand.getALocalSource() = re_flag_tracker(flag_name, t.continue()) and
operand.asCfgNode() = binop.getAnOperand() and
(binop.getOp() instanceof BitOr or binop.getOp() instanceof Add) and

View File

@@ -5,6 +5,7 @@
*/
private import python
private import semmle.python.controlflow.internal.Cfg as Cfg
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.Concepts
private import semmle.python.ApiGraphs
@@ -111,7 +112,7 @@ module UrlRedirect {
// Url redirection is a problem only if the user controls the prefix of the URL.
// TODO: This is a copy of the taint-sanitizer from the old points-to query, which doesn't
// cover formatting.
exists(BinaryExprNode string_concat | string_concat.getOp() instanceof Add |
exists(Cfg::BinaryExprNode string_concat | string_concat.getOp() instanceof Add |
string_concat.getRight() = this.asCfgNode()
)
}

View File

@@ -1,4 +1,5 @@
import python
private import semmle.python.controlflow.internal.Cfg as Cfg
import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.internal.DataFlowPrivate
import FlowTest
@@ -23,7 +24,7 @@ import MakeTest<MakeTestSig<MaximalFlowTest>>
module MaximalFlowsConfig implements DataFlow::ConfigSig {
predicate isSource(DataFlow::Node node) {
exists(node.getLocation().getFile().getRelativePath()) and
not node.asCfgNode() instanceof CallNode and
not node.asCfgNode() instanceof Cfg::CallNode and
not node.asCfgNode().getNode() instanceof Return and
not node instanceof DataFlow::ParameterNode and
not node instanceof DataFlow::PostUpdateNode and
@@ -34,9 +35,9 @@ module MaximalFlowsConfig implements DataFlow::ConfigSig {
predicate isSink(DataFlow::Node node) {
exists(node.getLocation().getFile().getRelativePath()) and
not any(CallNode c).getArg(_) = node.asCfgNode() and
not any(Cfg::CallNode c).getArg(_) = node.asCfgNode() and
not isArgumentNode(node, _, _) and
not node.asCfgNode().(NameNode).getId().matches("SINK%") and
not node.asCfgNode().(Cfg::NameNode).getId().matches("SINK%") and
not DataFlow::localFlowStep(node, _)
}
}

View File

@@ -1,4 +1,5 @@
import python
private import semmle.python.controlflow.internal.Cfg as Cfg
import utils.test.dataflow.FlowTest
import utils.test.dataflow.testConfig
private import semmle.python.dataflow.new.internal.PrintNode
@@ -19,7 +20,7 @@ query predicate missingAnnotationOnSink(Location location, string error, string
TestConfig::isSink(sink) and
// note: we only care about `SINK` and not `SINK_F`, so we have to reconstruct manually.
exists(DataFlow::CallCfgNode call |
call.getFunction().asCfgNode().(NameNode).getId() = "SINK" and
call.getFunction().asCfgNode().(Cfg::NameNode).getId() = "SINK" and
(sink = call.getArg(_) or sink = call.getArgByName(_))
) and
location = sink.getLocation() and

View File

@@ -1,4 +1,5 @@
import python
private import semmle.python.controlflow.internal.Cfg as Cfg
import utils.test.dataflow.FlowTest
import utils.test.dataflow.testTaintConfig
private import semmle.python.dataflow.new.internal.PrintNode
@@ -18,7 +19,7 @@ query predicate missingAnnotationOnSink(Location location, string error, string
exists(DataFlow::Node sink |
exists(DataFlow::CallCfgNode call |
// note: we only care about `SINK` and not `SINK_F`, so we have to reconstruct manually.
call.getFunction().asCfgNode().(NameNode).getId() = "SINK" and
call.getFunction().asCfgNode().(Cfg::NameNode).getId() = "SINK" and
(sink = call.getArg(_) or sink = call.getArgByName(_))
) and
location = sink.getLocation() and

View File

@@ -1,4 +1,5 @@
import python
private import semmle.python.controlflow.internal.Cfg as Cfg
import semmle.python.dataflow.new.DataFlow
import utils.test.InlineExpectationsTest
private import semmle.python.dataflow.new.internal.PrintNode
@@ -49,7 +50,7 @@ private string fromValue(DataFlow::Node fromNode) {
pragma[inline]
private string fromFunc(DataFlow::ArgumentNode fromNode) {
result = fromNode.getCall().getNode().(CallNode).getFunction().getNode().(Name).getId()
result = fromNode.getCall().getNode().(Cfg::CallNode).getFunction().getNode().(Name).getId()
}
pragma[inline]

View File

@@ -1,15 +1,16 @@
import python
private import semmle.python.controlflow.internal.Cfg as Cfg
private import semmle.python.dataflow.new.internal.PrintNode
private import semmle.python.dataflow.new.internal.DataFlowPrivate as DataFlowPrivate
private import semmle.python.ApiGraphs
import utils.test.InlineExpectationsTest
signature module UnresolvedCallExpectationsSig {
predicate unresolvedCall(CallNode call);
predicate unresolvedCall(Cfg::CallNode call);
}
module DefaultUnresolvedCallExpectations implements UnresolvedCallExpectationsSig {
predicate unresolvedCall(CallNode call) {
predicate unresolvedCall(Cfg::CallNode call) {
not exists(DataFlowPrivate::DataFlowCall dfc |
exists(dfc.getCallable()) and dfc.getNode() = call
) and
@@ -24,7 +25,7 @@ module MakeUnresolvedCallExpectations<UnresolvedCallExpectationsSig Impl> {
predicate hasActualResult(Location location, string element, string tag, string value) {
exists(location.getFile().getRelativePath()) and
exists(CallNode call | Impl::unresolvedCall(call) |
exists(Cfg::CallNode call | Impl::unresolvedCall(call) |
location = call.getLocation() and
tag = "unresolved_call" and
value = prettyExpr(call.getNode()) and

View File

@@ -21,11 +21,12 @@
*/
private import python
private import semmle.python.controlflow.internal.Cfg as Cfg
import semmle.python.dataflow.new.DataFlow
module TestConfig implements DataFlow::ConfigSig {
predicate isSource(DataFlow::Node node) {
node.(DataFlow::CfgNode).getNode().(NameNode).getId() = "SOURCE"
node.(DataFlow::CfgNode).getNode().(Cfg::NameNode).getId() = "SOURCE"
or
node.(DataFlow::CfgNode).getNode().getNode().(StringLiteral).getS() = "source"
or
@@ -37,7 +38,7 @@ module TestConfig implements DataFlow::ConfigSig {
predicate isSink(DataFlow::Node node) {
exists(DataFlow::CallCfgNode call |
call.getFunction().asCfgNode().(NameNode).getId() in ["SINK", "SINK_F"] and
call.getFunction().asCfgNode().(Cfg::NameNode).getId() in ["SINK", "SINK_F"] and
(node = call.getArg(_) or node = call.getArgByName(_)) and
not node = call.getArgByName("not_present_at_runtime")
)

View File

@@ -21,12 +21,13 @@
*/
private import python
private import semmle.python.controlflow.internal.Cfg as Cfg
import semmle.python.dataflow.new.DataFlow
import semmle.python.dataflow.new.TaintTracking
module TestConfig implements DataFlow::ConfigSig {
predicate isSource(DataFlow::Node node) {
node.(DataFlow::CfgNode).getNode().(NameNode).getId() = "SOURCE"
node.(DataFlow::CfgNode).getNode().(Cfg::NameNode).getId() = "SOURCE"
or
node.(DataFlow::CfgNode).getNode().getNode().(StringLiteral).getS() = "source"
or
@@ -37,8 +38,8 @@ module TestConfig implements DataFlow::ConfigSig {
}
predicate isSink(DataFlow::Node node) {
exists(CallNode call |
call.getFunction().(NameNode).getId() in ["SINK", "SINK_F"] and
exists(Cfg::CallNode call |
call.getFunction().(Cfg::NameNode).getId() in ["SINK", "SINK_F"] and
node.(DataFlow::CfgNode).getNode() = call.getAnArg()
)
}

View File

@@ -10,6 +10,7 @@
*/
import python
private import semmle.python.controlflow.internal.Cfg as Cfg
import semmle.python.dataflow.new.DataFlow
import semmle.python.dataflow.new.TaintTracking
import semmle.python.dataflow.new.RemoteFlowSources
@@ -19,14 +20,14 @@ private import semmle.python.Concepts
DataFlow::Node shouldBeTainted() {
exists(DataFlow::CallCfgNode call |
call.getFunction().asCfgNode().(NameNode).getId() = "ensure_tainted" and
call.getFunction().asCfgNode().(Cfg::NameNode).getId() = "ensure_tainted" and
result in [call.getArg(_), call.getArgByName(_)]
)
}
DataFlow::Node shouldNotBeTainted() {
exists(DataFlow::CallCfgNode call |
call.getFunction().asCfgNode().(NameNode).getId() = "ensure_not_tainted" and
call.getFunction().asCfgNode().(Cfg::NameNode).getId() = "ensure_not_tainted" and
result in [call.getArg(_), call.getArgByName(_)]
)
}
@@ -36,13 +37,13 @@ DataFlow::Node shouldNotBeTainted() {
module Conf {
module TestTaintTrackingConfig implements DataFlow::ConfigSig {
predicate isSource(DataFlow::Node source) {
source.asCfgNode().(NameNode).getId() in [
source.asCfgNode().(Cfg::NameNode).getId() in [
"TAINTED_STRING", "TAINTED_BYTES", "TAINTED_LIST", "TAINTED_DICT"
]
or
// User defined sources
exists(CallNode call |
call.getFunction().(NameNode).getId() = "taint" and
exists(Cfg::CallNode call |
call.getFunction().(Cfg::NameNode).getId() = "taint" and
source.(DataFlow::CfgNode).getNode() = call.getAnArg()
)
or

View File

@@ -2,6 +2,7 @@ overlay[local?]
module;
private import python
private import semmle.python.controlflow.internal.Cfg as Cfg
private import semmle.python.dataflow.new.FlowSummary
private import semmle.python.ApiGraphs
@@ -17,7 +18,7 @@ module RecursionGuard {
RecursionGuard() { this = "RecursionGuard" }
override DataFlow::CallCfgNode getACall() {
result.getFunction().asCfgNode().(NameNode).getId() = this and
result.getFunction().asCfgNode().(Cfg::NameNode).getId() = this and
(TT::callStep(_, _) implies any())
}
@@ -33,7 +34,7 @@ private class SummarizedCallableIdentity extends SummarizedCallable::Range {
SummarizedCallableIdentity() { this = "identity" }
override DataFlow::CallCfgNode getACall() {
result.getFunction().asCfgNode().(NameNode).getId() = this
result.getFunction().asCfgNode().(Cfg::NameNode).getId() = this
}
override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this }
@@ -50,7 +51,7 @@ private class SummarizedCallableApplyLambda extends SummarizedCallable::Range {
SummarizedCallableApplyLambda() { this = "apply_lambda" }
override DataFlow::CallCfgNode getACall() {
result.getFunction().asCfgNode().(NameNode).getId() = this
result.getFunction().asCfgNode().(Cfg::NameNode).getId() = this
}
override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this }
@@ -70,7 +71,7 @@ private class SummarizedCallableReversed extends SummarizedCallable::Range {
SummarizedCallableReversed() { this = "list_reversed" }
override DataFlow::CallCfgNode getACall() {
result.getFunction().asCfgNode().(NameNode).getId() = this
result.getFunction().asCfgNode().(Cfg::NameNode).getId() = this
}
override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this }
@@ -86,7 +87,7 @@ private class SummarizedCallableMap extends SummarizedCallable::Range {
SummarizedCallableMap() { this = "list_map" }
override DataFlow::CallCfgNode getACall() {
result.getFunction().asCfgNode().(NameNode).getId() = this
result.getFunction().asCfgNode().(Cfg::NameNode).getId() = this
}
override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this }
@@ -106,7 +107,7 @@ private class SummarizedCallableAppend extends SummarizedCallable::Range {
SummarizedCallableAppend() { this = "append_to_list" }
override DataFlow::CallCfgNode getACall() {
result.getFunction().asCfgNode().(NameNode).getId() = this
result.getFunction().asCfgNode().(Cfg::NameNode).getId() = this
}
override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this }

View File

@@ -1,4 +1,5 @@
import python
private import semmle.python.controlflow.internal.Cfg as Cfg
import semmle.python.dataflow.new.TaintTracking
import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.internal.PrintNode
@@ -6,20 +7,20 @@ private import semmle.python.dataflow.new.internal.PrintNode
module TestTaintTrackingConfig implements DataFlow::ConfigSig {
predicate isSource(DataFlow::Node source) {
// Standard sources
source.(DataFlow::CfgNode).getNode().(NameNode).getId() in [
source.(DataFlow::CfgNode).getNode().(Cfg::NameNode).getId() in [
"TAINTED_STRING", "TAINTED_BYTES", "TAINTED_LIST", "TAINTED_DICT"
]
or
// User defined sources
exists(CallNode call |
call.getFunction().(NameNode).getId() = "taint" and
exists(Cfg::CallNode call |
call.getFunction().(Cfg::NameNode).getId() = "taint" and
source.(DataFlow::CfgNode).getNode() = call.getAnArg()
)
}
predicate isSink(DataFlow::Node sink) {
exists(CallNode call |
call.getFunction().(NameNode).getId() in ["ensure_tainted", "ensure_not_tainted"] and
exists(Cfg::CallNode call |
call.getFunction().(Cfg::NameNode).getId() in ["ensure_tainted", "ensure_not_tainted"] and
sink.(DataFlow::CfgNode).getNode() = call.getAnArg()
)
}

View File

@@ -2,6 +2,7 @@ overlay[local?]
module;
private import python
private import semmle.python.controlflow.internal.Cfg as Cfg
private import semmle.python.dataflow.new.FlowSummary
private import semmle.python.ApiGraphs
@@ -17,7 +18,7 @@ module RecursionGuard {
RecursionGuard() { this = "TypeTrackingSummariesRecursionGuard" }
override DataFlow::CallCfgNode getACall() {
result.getFunction().asCfgNode().(NameNode).getId() = this and
result.getFunction().asCfgNode().(Cfg::NameNode).getId() = this and
(TT::callStep(_, _) implies any())
}
@@ -41,7 +42,7 @@ private class SummarizedCallableIdentity extends SummarizedCallable::Range {
override DataFlow::CallCfgNode getACall() { none() }
override DataFlow::CallCfgNode getACallSimple() {
result.getFunction().asCfgNode().(NameNode).getId() = this
result.getFunction().asCfgNode().(Cfg::NameNode).getId() = this
}
override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this }
@@ -60,7 +61,7 @@ private class SummarizedCallableApplyLambda extends SummarizedCallable::Range {
override DataFlow::CallCfgNode getACall() { none() }
override DataFlow::CallCfgNode getACallSimple() {
result.getFunction().asCfgNode().(NameNode).getId() = this
result.getFunction().asCfgNode().(Cfg::NameNode).getId() = this
}
override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this }
@@ -82,7 +83,7 @@ private class SummarizedCallableReversed extends SummarizedCallable::Range {
override DataFlow::CallCfgNode getACall() { none() }
override DataFlow::CallCfgNode getACallSimple() {
result.getFunction().asCfgNode().(NameNode).getId() = this
result.getFunction().asCfgNode().(Cfg::NameNode).getId() = this
}
override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this }
@@ -100,7 +101,7 @@ private class SummarizedCallableMap extends SummarizedCallable::Range {
override DataFlow::CallCfgNode getACall() { none() }
override DataFlow::CallCfgNode getACallSimple() {
result.getFunction().asCfgNode().(NameNode).getId() = this
result.getFunction().asCfgNode().(Cfg::NameNode).getId() = this
}
override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this }
@@ -122,7 +123,7 @@ private class SummarizedCallableAppend extends SummarizedCallable::Range {
override DataFlow::CallCfgNode getACall() { none() }
override DataFlow::CallCfgNode getACallSimple() {
result.getFunction().asCfgNode().(NameNode).getId() = this
result.getFunction().asCfgNode().(Cfg::NameNode).getId() = this
}
override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this }
@@ -165,7 +166,7 @@ private class SummarizedCallableReadSecret extends SummarizedCallable::Range {
override DataFlow::CallCfgNode getACall() { none() }
override DataFlow::CallCfgNode getACallSimple() {
result.getFunction().asCfgNode().(NameNode).getId() = this
result.getFunction().asCfgNode().(Cfg::NameNode).getId() = this
}
override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this }
@@ -183,7 +184,7 @@ private class SummarizedCallableSetSecret extends SummarizedCallable::Range {
override DataFlow::CallCfgNode getACall() { none() }
override DataFlow::CallCfgNode getACallSimple() {
result.getFunction().asCfgNode().(NameNode).getId() = this
result.getFunction().asCfgNode().(Cfg::NameNode).getId() = this
}
override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this }