Merge pull request #16884 from hvitved/ssa/dataflow-integration

SSA: Add data flow integration layer
This commit is contained in:
Tom Hvitved
2024-07-10 12:47:37 +02:00
committed by GitHub

View File

@@ -1163,4 +1163,489 @@ module Make<LocationSig Location, InputSig<Location> Input> {
)
}
}
/** Provides the input to `DataFlowIntegration`. */
signature module DataFlowIntegrationInputSig {
/**
* An expression with a value. That is, we expect these expressions to be
* represented in the data flow graph.
*/
class Expr {
/** Gets a textual representation of this expression. */
string toString();
/** Holds if the `i`th node of basic block `bb` evaluates this expression. */
predicate hasCfgNode(BasicBlock bb, int i);
}
/**
* Gets a read of SSA definition `def`.
*
* Override this with a cached version when applicable.
*/
default Expr getARead(Definition def) {
exists(SourceVariable v, BasicBlock bb, int i |
ssaDefReachesRead(v, def, bb, i) and
variableRead(bb, i, v, true) and
result.hasCfgNode(bb, i)
)
}
/** Holds if SSA definition `def` assigns `value` to the underlying variable. */
predicate ssaDefAssigns(WriteDefinition def, Expr value);
/** A parameter. */
class Parameter {
/** Gets a textual representation of this parameter. */
string toString();
/** Gets the location of this parameter. */
Location getLocation();
}
/** Holds if SSA definition `def` initializes parameter `p` at function entry. */
predicate ssaDefInitializesParam(WriteDefinition def, Parameter p);
/**
* Holds if flow should be allowed into uncertain SSA definition `def` from
* previous definitions or reads.
*/
default predicate allowFlowIntoUncertainDef(UncertainWriteDefinition def) { none() }
/** A (potential) guard. */
class Guard {
/** Gets a textual representation of this guard. */
string toString();
/** Holds if the `i`th node of basic block `bb` evaluates this guard. */
predicate hasCfgNode(BasicBlock bb, int i);
}
/** Holds if `guard` controls block `bb` upon evaluating to `branch`. */
predicate guardControlsBlock(Guard guard, BasicBlock bb, boolean branch);
/** Gets an immediate conditional successor of basic block `bb`, if any. */
BasicBlock getAConditionalBasicBlockSuccessor(BasicBlock bb, boolean branch);
}
/**
* Constructs the type `Node` and associated value step relations, which are
* intended to be included in the `DataFlow::Node` type and local step relations.
*
* Additionally, this module also provides a barrier guards implementation.
*/
module DataFlowIntegration<DataFlowIntegrationInputSig DfInput> {
private import codeql.util.Boolean
pragma[nomagic]
private predicate adjacentDefReachesReadExt(
DefinitionExt def, SourceVariable v, BasicBlock bb1, int i1, BasicBlock bb2, int i2
) {
adjacentDefReadExt(def, v, bb1, i1, bb2, i2) and
(
def.definesAt(v, bb1, i1, _)
or
variableRead(bb1, i1, v, true)
)
or
exists(BasicBlock bb3, int i3 |
adjacentDefReachesReadExt(def, v, bb1, i1, bb3, i3) and
variableRead(bb3, i3, v, false) and
adjacentDefReadExt(def, v, bb3, i3, bb2, i2)
)
}
pragma[nomagic]
private predicate adjacentDefReachesUncertainReadExt(
DefinitionExt def, SourceVariable v, BasicBlock bb1, int i1, BasicBlock bb2, int i2
) {
adjacentDefReachesReadExt(def, v, bb1, i1, bb2, i2) and
variableRead(bb2, i2, v, false)
}
/**
* Holds if the reference to `def` at index `i` in basic block `bb` can reach
* another definition `next` of the same underlying source variable, without
* passing through another write or non-pseudo read.
*
* The reference is either a read of `def` or `def` itself.
*/
pragma[nomagic]
private predicate lastRefBeforeRedefExt(
DefinitionExt def, SourceVariable v, BasicBlock bb, int i, BasicBlock input,
DefinitionExt next
) {
lastRefRedefExt(def, v, bb, i, input, next) and
not variableRead(bb, i, v, false)
or
exists(BasicBlock bb0, int i0 |
lastRefRedefExt(def, v, bb0, i0, input, next) and
adjacentDefReachesUncertainReadExt(def, v, bb, i, bb0, i0)
)
}
/** Same as `adjacentDefReadExt`, but skips uncertain reads. */
pragma[nomagic]
private predicate adjacentDefSkipUncertainReadsExt(
DefinitionExt def, SourceVariable v, BasicBlock bb1, int i1, BasicBlock bb2, int i2
) {
adjacentDefReachesReadExt(def, v, bb1, i1, bb2, i2) and
variableRead(bb2, i2, v, true)
}
pragma[nomagic]
private predicate adjacentReadPairExt(DefinitionExt def, ReadNode read1, ReadNode read2) {
exists(SourceVariable v, BasicBlock bb1, int i1, BasicBlock bb2, int i2 |
read1.readsAt(bb1, i1, v) and
adjacentDefSkipUncertainReadsExt(def, v, bb1, i1, bb2, i2) and
read2.readsAt(bb2, i2, v)
)
}
final private class DefinitionExtFinal = DefinitionExt;
/** An SSA definition into which another SSA definition may flow. */
private class SsaInputDefinitionExt extends DefinitionExtFinal {
SsaInputDefinitionExt() {
this instanceof PhiNode
or
this instanceof PhiReadNode
or
DfInput::allowFlowIntoUncertainDef(this)
}
/** Holds if `def` may flow into this definition via basic block `input`. */
predicate hasInputFromBlock(
DefinitionExt def, SourceVariable v, BasicBlock bb, int i, BasicBlock input
) {
lastRefBeforeRedefExt(def, v, bb, i, input, this)
}
}
cached
private newtype TNode =
TParamNode(DfInput::Parameter p) { DfInput::ssaDefInitializesParam(_, p) } or
TExprNode(DfInput::Expr e, Boolean isPost) {
e = DfInput::getARead(_)
or
DfInput::ssaDefAssigns(_, e) and
isPost = false
} or
TSsaDefinitionNode(DefinitionExt def) or
TSsaInputNode(SsaInputDefinitionExt def, BasicBlock input) {
def.hasInputFromBlock(_, _, _, _, input)
}
/**
* A data flow node that we need to reference in the value step relation.
*
* Note that only the `SsaNode` subclass is expected to be added as additional
* nodes in `DataFlow::Node`. The other subclasses are expected to already be
* present and are included here in order to reference them in the step relation.
*/
abstract private class NodeImpl extends TNode {
/** Gets the location of this node. */
abstract Location getLocation();
/** Gets a textual representation of this node. */
abstract string toString();
}
final class Node = NodeImpl;
/** A parameter node. */
private class ParameterNodeImpl extends NodeImpl, TParamNode {
private DfInput::Parameter p;
ParameterNodeImpl() { this = TParamNode(p) }
/** Gets the underlying parameter. */
DfInput::Parameter getParameter() { result = p }
override string toString() { result = p.toString() }
override Location getLocation() { result = p.getLocation() }
}
final class ParameterNode = ParameterNodeImpl;
/** A (post-update) expression node. */
abstract private class ExprNodePreOrPostImpl extends NodeImpl, TExprNode {
DfInput::Expr e;
boolean isPost;
ExprNodePreOrPostImpl() { this = TExprNode(e, isPost) }
/** Gets the underlying expression. */
DfInput::Expr getExpr() { result = e }
override Location getLocation() {
exists(BasicBlock bb, int i |
e.hasCfgNode(bb, i) and
result = bb.getNode(i).getLocation()
)
}
}
final class ExprNodePreOrPost = ExprNodePreOrPostImpl;
/** An expression node. */
private class ExprNodeImpl extends ExprNodePreOrPostImpl {
ExprNodeImpl() { isPost = false }
override string toString() { result = e.toString() }
}
final class ExprNode = ExprNodeImpl;
/** A post-update expression node. */
private class ExprPostUpdateNodeImpl extends ExprNodePreOrPostImpl {
ExprPostUpdateNodeImpl() { isPost = true }
/** Gets the pre-update expression node. */
ExprNode getPreUpdateNode() { result = TExprNode(e, false) }
override string toString() { result = e.toString() + " [postupdate]" }
}
final class ExprPostUpdateNode = ExprPostUpdateNodeImpl;
private class ReadNodeImpl extends ExprNodeImpl {
private BasicBlock bb_;
private int i_;
private SourceVariable v_;
ReadNodeImpl() {
variableRead(bb_, i_, v_, true) and
this.getExpr().hasCfgNode(bb_, i_)
}
SourceVariable getVariable() { result = v_ }
pragma[nomagic]
predicate readsAt(BasicBlock bb, int i, SourceVariable v) {
bb = bb_ and
i = i_ and
v = v_
}
}
final private class ReadNode = ReadNodeImpl;
/** A synthesized SSA data flow node. */
abstract private class SsaNodeImpl extends NodeImpl {
/** Gets the underlying SSA definition. */
abstract DefinitionExt getDefinitionExt();
}
final class SsaNode = SsaNodeImpl;
/** An SSA definition, viewed as a node in a data flow graph. */
private class SsaDefinitionExtNodeImpl extends SsaNodeImpl, TSsaDefinitionNode {
private DefinitionExt def;
SsaDefinitionExtNodeImpl() { this = TSsaDefinitionNode(def) }
override DefinitionExt getDefinitionExt() { result = def }
override Location getLocation() { result = def.getLocation() }
override string toString() { result = def.toString() }
}
final class SsaDefinitionExtNode = SsaDefinitionExtNodeImpl;
/**
* A node that represents an input to an SSA phi (read) definition.
*
* This allows for barrier guards to filter input to phi nodes. For example, in
*
* ```rb
* x = taint
* if x != "safe" then
* x = "safe"
* end
* sink x
* ```
*
* the `false` edge out of `x != "safe"` guards the input from `x = taint` into the
* `phi` node after the condition.
*
* It is also relevant to filter input into phi read nodes:
*
* ```rb
* x = taint
* if b then
* if x != "safe1" then
* return
* end
* else
* if x != "safe2" then
* return
* end
* end
*
* sink x
* ```
*
* both inputs into the phi read node after the outer condition are guarded.
*/
private class SsaInputNodeImpl extends SsaNodeImpl, TSsaInputNode {
private SsaInputDefinitionExt def_;
private BasicBlock input_;
SsaInputNodeImpl() { this = TSsaInputNode(def_, input_) }
/** Holds if this node represents input into SSA definition `def` via basic block `input`. */
predicate isInputInto(DefinitionExt def, BasicBlock input) {
def = def_ and
input = input_
}
override SsaInputDefinitionExt getDefinitionExt() { result = def_ }
override Location getLocation() { result = input_.getNode(input_.length() - 1).getLocation() }
override string toString() { result = "[input] " + def_.toString() }
}
final class SsaInputNode = SsaInputNodeImpl;
/**
* Holds if `nodeFrom` is a node for SSA definition `def`, which can input
* node `nodeTo`.
*/
pragma[nomagic]
private predicate inputFromDef(
DefinitionExt def, SsaDefinitionExtNode nodeFrom, SsaInputNode nodeTo
) {
exists(SourceVariable v, BasicBlock bb, int i, BasicBlock input, SsaInputDefinitionExt next |
next.hasInputFromBlock(def, v, bb, i, input) and
def = nodeFrom.getDefinitionExt() and
def.definesAt(v, bb, i, _) and
nodeTo = TSsaInputNode(next, input)
)
}
/**
* Holds if `nodeFrom` is a last read of SSA definition `def`, which
* can reach input node `nodeTo`.
*/
pragma[nomagic]
private predicate inputFromRead(DefinitionExt def, ReadNode nodeFrom, SsaInputNode nodeTo) {
exists(SourceVariable v, BasicBlock bb, int i, BasicBlock input, SsaInputDefinitionExt next |
next.hasInputFromBlock(def, v, bb, i, input) and
nodeFrom.readsAt(bb, i, v) and
nodeTo = TSsaInputNode(next, input)
)
}
pragma[nomagic]
private predicate firstReadExt(DefinitionExt def, ReadNode read) {
exists(SourceVariable v, BasicBlock bb1, int i1, BasicBlock bb2, int i2 |
def.definesAt(v, bb1, i1, _) and
adjacentDefSkipUncertainReadsExt(def, v, bb1, i1, bb2, i2) and
read.readsAt(bb2, i2, v)
)
}
/**
* Holds if there is a local flow step from `nodeFrom` to `nodeTo`.
*
* `isUseStep` is `true` when `nodeFrom` is a (post-update) read node and
* `nodeTo` is a read node or phi (read) node.
*/
predicate localFlowStep(DefinitionExt def, Node nodeFrom, Node nodeTo, boolean isUseStep) {
(
// Flow from assignment into SSA definition
DfInput::ssaDefAssigns(def, nodeFrom.(ExprNode).getExpr())
or
// Flow from parameter into entry definition
DfInput::ssaDefInitializesParam(def, nodeFrom.(ParameterNode).getParameter())
) and
nodeTo.(SsaDefinitionExtNode).getDefinitionExt() = def and
isUseStep = false
or
// Flow from SSA definition to first read
def = nodeFrom.(SsaDefinitionExtNode).getDefinitionExt() and
firstReadExt(def, nodeTo) and
isUseStep = false
or
// Flow from (post-update) read to next read
adjacentReadPairExt(def, [nodeFrom, nodeFrom.(ExprPostUpdateNode).getPreUpdateNode()], nodeTo) and
nodeFrom != nodeTo and
isUseStep = true
or
// Flow into phi (read) SSA definition node from def
inputFromDef(def, nodeFrom, nodeTo) and
isUseStep = false
or
// Flow into phi (read) SSA definition node from (post-update) read
inputFromRead(def, [nodeFrom, nodeFrom.(ExprPostUpdateNode).getPreUpdateNode()], nodeTo) and
isUseStep = true
or
// Flow from input node to def
nodeTo.(SsaDefinitionExtNode).getDefinitionExt() = def and
def = nodeFrom.(SsaInputNode).getDefinitionExt() and
isUseStep = false
}
/** Holds if the value of `nodeTo` is given by `nodeFrom`. */
predicate localMustFlowStep(DefinitionExt def, Node nodeFrom, Node nodeTo) {
(
// Flow from assignment into SSA definition
DfInput::ssaDefAssigns(def, nodeFrom.(ExprNode).getExpr())
or
// Flow from parameter into entry definition
DfInput::ssaDefInitializesParam(def, nodeFrom.(ParameterNode).getParameter())
) and
nodeTo.(SsaDefinitionExtNode).getDefinitionExt() = def
or
// Flow from SSA definition to read
nodeFrom.(SsaDefinitionExtNode).getDefinitionExt() = def and
nodeTo.(ExprNode).getExpr() = DfInput::getARead(def)
}
pragma[nomagic]
private predicate guardControlsSsaRead(
DfInput::Guard g, boolean branch, Definition def, ExprNode n
) {
exists(BasicBlock bb, DfInput::Expr e |
e = n.getExpr() and
DfInput::getARead(def) = e and
DfInput::guardControlsBlock(g, bb, branch) and
e.hasCfgNode(bb, _)
)
}
pragma[nomagic]
private predicate guardControlsPhiInput(
DfInput::Guard g, boolean branch, Definition def, BasicBlock input, SsaInputDefinitionExt phi
) {
phi.hasInputFromBlock(def, _, _, _, input) and
(
DfInput::guardControlsBlock(g, input, branch)
or
exists(int last |
last = input.length() - 1 and
g.hasCfgNode(input, last) and
DfInput::getAConditionalBasicBlockSuccessor(input, branch) = phi.getBasicBlock()
)
)
}
/**
* Gets a node that reads SSA defininition `def`, and which is guarded by
* `g` evaluating to `branch`.
*/
pragma[nomagic]
Node getABarrierNode(DfInput::Guard g, Definition def, boolean branch) {
guardControlsSsaRead(g, branch, def, result)
or
exists(BasicBlock input, SsaInputDefinitionExt phi |
guardControlsPhiInput(g, branch, def, input, phi) and
result.(SsaInputNode).isInputInto(phi, input)
)
}
}
}