Merge pull request #15816 from MathiasVP/remove-ssa-pruning-stage

C++: Remove the pruning stage from dataflow SSA
This commit is contained in:
Jeroen Ketema
2024-03-06 11:04:05 +01:00
committed by GitHub
2 changed files with 13 additions and 384 deletions

View File

@@ -10,13 +10,12 @@ private import semmle.code.cpp.models.interfaces.FunctionInputsAndOutputs as FIO
private import semmle.code.cpp.ir.internal.IRCppLanguage
private import semmle.code.cpp.ir.dataflow.internal.ModelUtil
private import DataFlowPrivate
private import ssa0.SsaInternals as SsaInternals0
import SsaInternalsCommon
private module SourceVariables {
cached
private newtype TSourceVariable =
TMkSourceVariable(SsaInternals0::SourceVariable base, int ind) {
TMkSourceVariable(BaseSourceVariable base, int ind) {
ind = [0 .. countIndirectionsForCppType(base.getLanguageType()) + 1]
}
@@ -30,7 +29,7 @@ private module SourceVariables {
}
class SourceVariable extends TSourceVariable {
SsaInternals0::SourceVariable base;
BaseSourceVariable base;
int ind;
SourceVariable() { this = TMkSourceVariable(base, ind) }
@@ -42,7 +41,7 @@ private module SourceVariables {
* Gets the base source variable (i.e., the variable without any
* indirections) of this source variable.
*/
SsaInternals0::SourceVariable getBaseVariable() { result = base }
BaseSourceVariable getBaseVariable() { result = base }
/** Gets a textual representation of this element. */
string toString() { result = repeatStars(this.getIndirection()) + base.toString() }
@@ -105,16 +104,7 @@ predicate hasRawIndirectInstruction(Instruction instr, int indirectionIndex) {
cached
private newtype TDefOrUseImpl =
TDefImpl(BaseSourceVariableInstruction base, Operand address, int indirectionIndex) {
isDef(_, _, address, base, _, indirectionIndex) and
(
// We only include the definition if the SSA pruning stage
// concluded that the definition is live after the write.
any(SsaInternals0::Def def).getAddressOperand() = address
or
// Since the pruning stage doesn't know about global variables we can't use the above check to
// rule out dead assignments to globals.
base.(VariableAddressInstruction).getAstVariable() instanceof GlobalLikeVariable
)
isDef(_, _, address, base, _, indirectionIndex)
} or
TUseImpl(BaseSourceVariableInstruction base, Operand operand, int indirectionIndex) {
isUse(_, operand, base, _, indirectionIndex) and
@@ -133,8 +123,7 @@ private newtype TDefOrUseImpl =
TIteratorDef(
Operand iteratorDerefAddress, BaseSourceVariableInstruction container, int indirectionIndex
) {
isIteratorDef(container, iteratorDerefAddress, _, _, indirectionIndex) and
any(SsaInternals0::Def def | def.isIteratorDef()).getAddressOperand() = iteratorDerefAddress
isIteratorDef(container, iteratorDerefAddress, _, _, indirectionIndex)
} or
TIteratorUse(
Operand iteratorAddress, BaseSourceVariableInstruction container, int indirectionIndex
@@ -984,17 +973,6 @@ predicate fromPhiNode(SsaPhiNode nodeFrom, Node nodeTo) {
)
}
/**
* Holds if there is a write at index `i` in basic block `bb` to variable `v` that's
* subsequently read (as determined by the SSA pruning stage).
*/
private predicate variableWriteCand(IRBlock bb, int i, SourceVariable v) {
exists(SsaInternals0::Def def, SsaInternals0::SourceVariable v0 |
def.asDefOrUse().hasIndexInBlock(bb, i, v0) and
v0 = v.getBaseVariable()
)
}
private predicate sourceVariableIsGlobal(
SourceVariable sv, GlobalLikeVariable global, IRFunction func, int indirectionIndex
) {
@@ -1018,16 +996,14 @@ private module SsaInput implements SsaImplCommon::InputSig<Location> {
predicate variableWrite(IRBlock bb, int i, SourceVariable v, boolean certain) {
DataFlowImplCommon::forceCachingInSameStage() and
(
variableWriteCand(bb, i, v) or
sourceVariableIsGlobal(v, _, _, _)
) and
exists(DefImpl def | def.hasIndexInBlock(bb, i, v) |
if def.isCertain() then certain = true else certain = false
)
or
exists(GlobalDefImpl global |
global.hasIndexInBlock(bb, i, v) and
certain = true
exists(DefImpl def | def.hasIndexInBlock(bb, i, v) |
if def.isCertain() then certain = true else certain = false
)
or
exists(GlobalDefImpl global |
global.hasIndexInBlock(bb, i, v) and
certain = true
)
)
}

View File

@@ -1,347 +0,0 @@
/**
* This module defines an initial SSA pruning stage that doesn't take
* indirections into account.
*/
private import codeql.ssa.Ssa as SsaImplCommon
private import semmle.code.cpp.ir.IR
private import semmle.code.cpp.ir.dataflow.internal.DataFlowImplCommon as DataFlowImplCommon
private import semmle.code.cpp.models.interfaces.Allocation as Alloc
private import semmle.code.cpp.models.interfaces.DataFlow as DataFlow
private import semmle.code.cpp.ir.implementation.raw.internal.SideEffects as SideEffects
private import semmle.code.cpp.ir.internal.IRCppLanguage
private import semmle.code.cpp.ir.dataflow.internal.DataFlowPrivate
private import semmle.code.cpp.ir.dataflow.internal.DataFlowUtil
private import semmle.code.cpp.ir.dataflow.internal.SsaInternalsCommon
private module SourceVariables {
class SourceVariable extends BaseSourceVariable {
/**
* Gets the base source variable of this `SourceVariable`.
*/
BaseSourceVariable getBaseVariable() { result = this }
}
}
import SourceVariables
private newtype TDefOrUseImpl =
TDefImpl(Operand address) { isDef(_, _, address, _, _, _) } or
TUseImpl(Operand operand) {
isUse(_, operand, _, _, _) and
not isDef(true, _, operand, _, _, _)
} or
TIteratorDef(BaseSourceVariableInstruction container, Operand iteratorAddress) {
isIteratorDef(container, iteratorAddress, _, _, _)
} or
TIteratorUse(BaseSourceVariableInstruction container, Operand iteratorAddress) {
isIteratorUse(container, iteratorAddress, _, _)
} or
TFinalParameterUse(Parameter p) {
any(Indirection indirection).getType() = p.getUnspecifiedType()
}
abstract private class DefOrUseImpl extends TDefOrUseImpl {
/** Gets a textual representation of this element. */
abstract string toString();
/** Gets the block of this definition or use. */
final IRBlock getBlock() { this.hasIndexInBlock(result, _) }
/** Holds if this definition or use has index `index` in block `block`. */
abstract predicate hasIndexInBlock(IRBlock block, int index);
final predicate hasIndexInBlock(IRBlock block, int index, SourceVariable sv) {
this.hasIndexInBlock(block, index) and
sv = this.getSourceVariable()
}
/** Gets the location of this element. */
abstract Cpp::Location getLocation();
abstract BaseSourceVariableInstruction getBase();
final BaseSourceVariable getBaseSourceVariable() {
result = this.getBase().getBaseSourceVariable()
}
/** Gets the variable that is defined or used. */
final SourceVariable getSourceVariable() {
result.getBaseVariable() = this.getBaseSourceVariable()
}
abstract predicate isCertain();
}
abstract class DefImpl extends DefOrUseImpl {
Operand address;
Operand getAddressOperand() { result = address }
abstract Node0Impl getValue();
override string toString() { result = address.toString() }
override Cpp::Location getLocation() { result = this.getAddressOperand().getLocation() }
final override predicate hasIndexInBlock(IRBlock block, int index) {
this.getAddressOperand().getUse() = block.getInstruction(index)
}
}
private class DirectDef extends DefImpl, TDefImpl {
DirectDef() { this = TDefImpl(address) }
override BaseSourceVariableInstruction getBase() { isDef(_, _, address, result, _, _) }
override Node0Impl getValue() { isDef(_, result, address, _, _, _) }
override predicate isCertain() { isDef(true, _, address, _, _, _) }
}
private class IteratorDef extends DefImpl, TIteratorDef {
BaseSourceVariableInstruction container;
IteratorDef() { this = TIteratorDef(container, address) }
override BaseSourceVariableInstruction getBase() { result = container }
override Node0Impl getValue() { isIteratorDef(_, address, result, _, _) }
override predicate isCertain() { none() }
}
abstract class UseImpl extends DefOrUseImpl { }
abstract private class OperandBasedUse extends UseImpl {
Operand operand;
override string toString() { result = operand.toString() }
final override predicate hasIndexInBlock(IRBlock block, int index) {
// Ideally, this would just be implemented as:
// ```
// operand.getUse() = block.getInstruction(index)
// ```
// but because the IR generated for a snippet such as
// ```
// int x = *p++;
// ```
// looks like
// ```
// r1(glval<int>) = VariableAddress[x] :
// r2(glval<int *>) = VariableAddress[p] :
// r3(int *) = Load[p] : &:r2, m1
// r4(int) = Constant[1] :
// r5(int *) = PointerAdd[4] : r3, r4
// m3(int *) = Store[p] : &:r2, r5
// r6(int *) = CopyValue : r3
// r7(int) = Load[?] : &:r6, ~m2
// m2(int) = Store[x] : &:r1, r7
// ```
// we need to ensure that the `r3` operand of the `CopyValue` instruction isn't seen as a fresh use
// of `p` that happens after the increment. So if the base instruction of this use comes from a
// post-fix crement operation we set the index of the SSA use that wraps the `r3` operand at the
// `CopyValue` instruction to be the same index as the `r3` operand at the `PointerAdd` instruction.
// This ensures that the SSA library doesn't create flow from the `PointerAdd` to `r6`.
exists(BaseSourceVariableInstruction base | base = this.getBase() |
if base.getAst() = any(Cpp::PostfixCrementOperation c).getOperand()
then
exists(Operand op |
op =
min(Operand cand, int i |
isUse(_, cand, base, _, _) and
block.getInstruction(i) = cand.getUse()
|
cand order by i
) and
block.getInstruction(index) = op.getUse()
)
else operand.getUse() = block.getInstruction(index)
)
}
final override Cpp::Location getLocation() { result = operand.getLocation() }
}
private class DirectUse extends OperandBasedUse, TUseImpl {
DirectUse() { this = TUseImpl(operand) }
override BaseSourceVariableInstruction getBase() { isUse(_, operand, result, _, _) }
override predicate isCertain() { isUse(true, operand, _, _, _) }
}
private class IteratorUse extends OperandBasedUse, TIteratorUse {
BaseSourceVariableInstruction container;
IteratorUse() { this = TIteratorUse(container, operand) }
override BaseSourceVariableInstruction getBase() { result = container }
override predicate isCertain() { none() }
}
private class FinalParameterUse extends UseImpl, TFinalParameterUse {
Parameter p;
FinalParameterUse() { this = TFinalParameterUse(p) }
override string toString() { result = p.toString() }
final override predicate hasIndexInBlock(IRBlock block, int index) {
// Ideally, this should always be a `ReturnInstruction`, but if
// someone forgets to write a `return` statement in a function
// with a non-void return type we generate an `UnreachedInstruction`.
// In this case we still want to generate flow out of such functions
// if they write to a parameter. So we pick the index of the
// `UnreachedInstruction` as the index of this use.
// Note that a function may have both a `ReturnInstruction` and an
// `UnreachedInstruction`. If that's the case this predicate will
// return multiple results. I don't think this is detrimental to
// performance, however.
exists(Instruction return |
return instanceof ReturnInstruction or
return instanceof UnreachedInstruction
|
block.getInstruction(index) = return and
return.getEnclosingFunction() = p.getFunction()
)
}
final override Cpp::Location getLocation() {
// Parameters can have multiple locations. When there's a unique location we use
// that one, but if multiple locations exist we default to an unknown location.
result = unique( | | p.getLocation())
or
not exists(unique( | | p.getLocation())) and
result instanceof UnknownDefaultLocation
}
override BaseSourceVariableInstruction getBase() {
exists(InitializeParameterInstruction init |
init.getParameter() = p and
// This is always a `VariableAddressInstruction`
result = init.getAnOperand().getDef()
)
}
override predicate isCertain() { any() }
}
private module SsaInput implements SsaImplCommon::InputSig<Location> {
import InputSigCommon
import SourceVariables
/**
* Holds if the `i`'th write in block `bb` writes to the variable `v`.
* `certain` is `true` if the write is guaranteed to overwrite the entire variable.
*/
predicate variableWrite(IRBlock bb, int i, SourceVariable v, boolean certain) {
DataFlowImplCommon::forceCachingInSameStage() and
exists(DefImpl def | def.hasIndexInBlock(bb, i, v) |
if def.isCertain() then certain = true else certain = false
)
}
/**
* Holds if the `i`'th read in block `bb` reads to the variable `v`.
* `certain` is `true` if the read is guaranteed.
*/
predicate variableRead(IRBlock bb, int i, SourceVariable v, boolean certain) {
exists(UseImpl use | use.hasIndexInBlock(bb, i, v) |
if use.isCertain() then certain = true else certain = false
)
}
}
private newtype TSsaDefOrUse =
TDefOrUse(DefOrUseImpl defOrUse) {
defOrUse instanceof UseImpl
or
// If `defOrUse` is a definition we only include it if the
// SSA library concludes that it's live after the write.
exists(DefinitionExt def, SourceVariable sv, IRBlock bb, int i |
def.definesAt(sv, bb, i, _) and
defOrUse.(DefImpl).hasIndexInBlock(bb, i, sv)
)
} or
TPhi(PhiNode phi)
abstract private class SsaDefOrUse extends TSsaDefOrUse {
string toString() { result = "SsaDefOrUse" }
DefOrUseImpl asDefOrUse() { none() }
PhiNode asPhi() { none() }
abstract Location getLocation();
}
class DefOrUse extends TDefOrUse, SsaDefOrUse {
DefOrUseImpl defOrUse;
DefOrUse() { this = TDefOrUse(defOrUse) }
final override DefOrUseImpl asDefOrUse() { result = defOrUse }
final override Location getLocation() { result = defOrUse.getLocation() }
final SourceVariable getSourceVariable() { result = defOrUse.getSourceVariable() }
}
class Phi extends TPhi, SsaDefOrUse {
PhiNode phi;
Phi() { this = TPhi(phi) }
final override PhiNode asPhi() { result = phi }
final override Location getLocation() { result = phi.getBasicBlock().getLocation() }
}
class UseOrPhi extends SsaDefOrUse {
UseOrPhi() {
this.asDefOrUse() instanceof UseImpl
or
this instanceof Phi
}
final override Location getLocation() {
result = this.asDefOrUse().getLocation() or result = this.(Phi).getLocation()
}
override string toString() {
result = this.asDefOrUse().toString()
or
this instanceof Phi and
result = "Phi"
}
}
class Def extends DefOrUse {
override DefImpl defOrUse;
Operand getAddressOperand() { result = defOrUse.getAddressOperand() }
Instruction getAddress() { result = this.getAddressOperand().getDef() }
Node0Impl getValue() { result = defOrUse.getValue() }
override string toString() { result = this.asDefOrUse().toString() }
BaseSourceVariableInstruction getBase() { result = defOrUse.getBase() }
predicate isIteratorDef() { defOrUse instanceof IteratorDef }
}
private module SsaImpl = SsaImplCommon::Make<Location, SsaInput>;
class PhiNode extends SsaImpl::DefinitionExt {
PhiNode() {
this instanceof SsaImpl::PhiNode or
this instanceof SsaImpl::PhiReadNode
}
}
class DefinitionExt = SsaImpl::DefinitionExt;