Merge pull request #8445 from dbartol/dbartol/ir-range/semantic-scratch

Sign, Modulus, and Range analysis for C++ using sharable semantic layer
This commit is contained in:
Dave Bartolomeo
2022-03-30 07:08:09 -04:00
committed by GitHub
29 changed files with 3785 additions and 0 deletions

View File

@@ -0,0 +1,7 @@
import SemanticExpr
import SemanticBound
import SemanticSSA
import SemanticGuard
import SemanticCFG
import SemanticType
import SemanticOpcode

View File

@@ -0,0 +1,42 @@
/**
* Semantic wrapper around the language-specific bounds library.
*/
private import SemanticExpr
private import SemanticExprSpecific::SemanticExprConfig as Specific
private import SemanticSSA
/**
* A valid base for an expression bound.
*
* Can be either a variable (`SemSsaBound`) or zero (`SemZeroBound`).
*/
class SemBound instanceof Specific::Bound {
final string toString() { result = super.toString() }
final SemExpr getExpr(int delta) { result = Specific::getBoundExpr(this, delta) }
}
/**
* A bound that is a constant zero.
*/
class SemZeroBound extends SemBound {
SemZeroBound() { Specific::zeroBound(this) }
}
/**
* A bound that is an SSA definition.
*/
class SemSsaBound extends SemBound {
/**
* The variables whose value is used as the bound.
*
* Can be multi-valued in some implementations. If so, all variables will be equivalent.
*/
SemSsaVariable var;
SemSsaBound() { Specific::ssaBound(this, var) }
/** Gets a variable whose value is used as the bound. */
final SemSsaVariable getAVariable() { result = var }
}

View File

@@ -0,0 +1,22 @@
/**
* Semantic interface to the control flow graph.
*/
private import Semantic
private import SemanticExprSpecific::SemanticExprConfig as Specific
/**
* A basic block in the control-flow graph.
*/
class SemBasicBlock extends Specific::BasicBlock {
/** Holds if this block (transitively) dominates `otherblock`. */
final predicate bbDominates(SemBasicBlock otherBlock) { Specific::bbDominates(this, otherBlock) }
/** Holds if this block has dominance information. */
final predicate hasDominanceInformation() { Specific::hasDominanceInformation(this) }
/** Gets an expression that is evaluated in this basic block. */
final SemExpr getAnExpr() { result.getBasicBlock() = this }
final int getUniqueId() { result = Specific::getBasicBlockUniqueId(this) }
}

View File

@@ -0,0 +1,309 @@
/**
* Semantic interface for expressions.
*/
private import Semantic
private import SemanticExprSpecific::SemanticExprConfig as Specific
/**
* An language-neutral expression.
*
* The expression computes a value of type `getSemType()`. The actual computation is determined by
* the expression's opcode (`getOpcode()`).
*/
class SemExpr instanceof Specific::Expr {
final string toString() { result = super.toString() }
final Specific::Location getLocation() { result = super.getLocation() }
Opcode getOpcode() { result instanceof Opcode::Unknown }
SemType getSemType() { result = Specific::getUnknownExprType(this) }
final SemBasicBlock getBasicBlock() { result = Specific::getExprBasicBlock(this) }
}
/** An expression with an opcode other than `Unknown`. */
abstract private class SemKnownExpr extends SemExpr {
Opcode opcode;
SemType type;
final override Opcode getOpcode() { result = opcode }
final override SemType getSemType() { result = type }
}
/** An expression that returns a literal value. */
class SemLiteralExpr extends SemKnownExpr {
SemLiteralExpr() {
Specific::integerLiteral(this, type, _) and opcode instanceof Opcode::Constant
or
Specific::largeIntegerLiteral(this, type, _) and opcode instanceof Opcode::Constant
or
Specific::booleanLiteral(this, type, _) and opcode instanceof Opcode::Constant
or
Specific::floatingPointLiteral(this, type, _) and opcode instanceof Opcode::Constant
or
Specific::nullLiteral(this, type) and opcode instanceof Opcode::Constant
or
Specific::stringLiteral(this, type, _) and opcode instanceof Opcode::StringConstant
}
}
/** An expression that returns a numeric literal value. */
class SemNumericLiteralExpr extends SemLiteralExpr {
SemNumericLiteralExpr() {
Specific::integerLiteral(this, _, _)
or
Specific::largeIntegerLiteral(this, _, _)
or
Specific::floatingPointLiteral(this, _, _)
}
/**
* Gets an approximation of the value of the literal, as a `float`.
*
* If the value can be precisely represented as a `float`, the result will be exact. If the actual
* value cannot be precisely represented (for example, it is an integer with more than 53
* significant bits), then the result is an approximation.
*/
float getApproximateFloatValue() { none() }
}
/** An expression that returns an integer literal value. */
class SemIntegerLiteralExpr extends SemNumericLiteralExpr {
SemIntegerLiteralExpr() {
Specific::integerLiteral(this, _, _)
or
Specific::largeIntegerLiteral(this, _, _)
}
/**
* Gets the value of the literal, if it can be represented as an `int`.
*
* If the value is outside the range of an `int`, use `getApproximateFloatValue()` to get a value
* that is equal to the actual integer value, within rounding error.
*/
final int getIntValue() { Specific::integerLiteral(this, _, result) }
final override float getApproximateFloatValue() {
result = getIntValue()
or
Specific::largeIntegerLiteral(this, _, result)
}
}
/**
* An expression that returns a floating-point literal value.
*/
class SemFloatingPointLiteralExpr extends SemNumericLiteralExpr {
float value;
SemFloatingPointLiteralExpr() { Specific::floatingPointLiteral(this, _, value) }
final override float getApproximateFloatValue() { result = value }
/** Gets the value of the literal. */
final float getFloatValue() { result = value }
}
/**
* An expression that consumes two operands.
*/
class SemBinaryExpr extends SemKnownExpr {
SemExpr leftOperand;
SemExpr rightOperand;
SemBinaryExpr() { Specific::binaryExpr(this, opcode, type, leftOperand, rightOperand) }
/** Gets the left operand. */
final SemExpr getLeftOperand() { result = leftOperand }
/** Gets the right operand. */
final SemExpr getRightOperand() { result = rightOperand }
/** Holds if `a` and `b` are the two operands, in either order. */
final predicate hasOperands(SemExpr a, SemExpr b) {
a = getLeftOperand() and b = getRightOperand()
or
a = getRightOperand() and b = getLeftOperand()
}
/** Gets the two operands. */
final SemExpr getAnOperand() { result = getLeftOperand() or result = getRightOperand() }
}
/** An expression that performs and ordered comparison of two operands. */
class SemRelationalExpr extends SemBinaryExpr {
SemRelationalExpr() {
opcode instanceof Opcode::CompareLT
or
opcode instanceof Opcode::CompareLE
or
opcode instanceof Opcode::CompareGT
or
opcode instanceof Opcode::CompareGE
}
/**
* Get the operand that will be less than the other operand if the result of the comparison is
* `true`.
*
* For `x < y` or `x <= y`, this will return `x`.
* For `x > y` or `x >= y`, this will return `y`.`
*/
final SemExpr getLesserOperand() {
if opcode instanceof Opcode::CompareLT or opcode instanceof Opcode::CompareLE
then result = getLeftOperand()
else result = getRightOperand()
}
/**
* Get the operand that will be greater than the other operand if the result of the comparison is
* `true`.
*
* For `x < y` or `x <= y`, this will return `y`.
* For `x > y` or `x >= y`, this will return `x`.`
*/
final SemExpr getGreaterOperand() {
if opcode instanceof Opcode::CompareGT or opcode instanceof Opcode::CompareGE
then result = getLeftOperand()
else result = getRightOperand()
}
/** Holds if this comparison returns `false` if the two operands are equal. */
final predicate isStrict() {
opcode instanceof Opcode::CompareLT or opcode instanceof Opcode::CompareGT
}
}
class SemAddExpr extends SemBinaryExpr {
SemAddExpr() { opcode instanceof Opcode::Add }
}
class SemSubExpr extends SemBinaryExpr {
SemSubExpr() { opcode instanceof Opcode::Sub }
}
class SemMulExpr extends SemBinaryExpr {
SemMulExpr() { opcode instanceof Opcode::Mul }
}
class SemDivExpr extends SemBinaryExpr {
SemDivExpr() { opcode instanceof Opcode::Div }
}
class SemRemExpr extends SemBinaryExpr {
SemRemExpr() { opcode instanceof Opcode::Rem }
}
class SemShiftLeftExpr extends SemBinaryExpr {
SemShiftLeftExpr() { opcode instanceof Opcode::ShiftLeft }
}
class SemShiftRightExpr extends SemBinaryExpr {
SemShiftRightExpr() { opcode instanceof Opcode::ShiftRight }
}
class SemShiftRightUnsignedExpr extends SemBinaryExpr {
SemShiftRightUnsignedExpr() { opcode instanceof Opcode::ShiftRightUnsigned }
}
class SemBitAndExpr extends SemBinaryExpr {
SemBitAndExpr() { opcode instanceof Opcode::BitAnd }
}
class SemBitOrExpr extends SemBinaryExpr {
SemBitOrExpr() { opcode instanceof Opcode::BitOr }
}
class SemBitXorExpr extends SemBinaryExpr {
SemBitXorExpr() { opcode instanceof Opcode::BitXor }
}
class SemUnaryExpr extends SemKnownExpr {
SemExpr operand;
SemUnaryExpr() { Specific::unaryExpr(this, opcode, type, operand) }
final SemExpr getOperand() { result = operand }
}
class SemBoxExpr extends SemUnaryExpr {
SemBoxExpr() { opcode instanceof Opcode::Box }
}
class SemUnboxExpr extends SemUnaryExpr {
SemUnboxExpr() { opcode instanceof Opcode::Unbox }
}
class SemConvertExpr extends SemUnaryExpr {
SemConvertExpr() { opcode instanceof Opcode::Convert }
}
class SemCopyValueExpr extends SemUnaryExpr {
SemCopyValueExpr() { opcode instanceof Opcode::CopyValue }
}
class SemNegateExpr extends SemUnaryExpr {
SemNegateExpr() { opcode instanceof Opcode::Negate }
}
class SemBitComplementExpr extends SemUnaryExpr {
SemBitComplementExpr() { opcode instanceof Opcode::BitComplement }
}
class SemLogicalNotExpr extends SemUnaryExpr {
SemLogicalNotExpr() { opcode instanceof Opcode::LogicalNot }
}
class SemAddOneExpr extends SemUnaryExpr {
SemAddOneExpr() { opcode instanceof Opcode::AddOne }
}
class SemSubOneExpr extends SemUnaryExpr {
SemSubOneExpr() { opcode instanceof Opcode::SubOne }
}
private class SemNullaryExpr extends SemKnownExpr {
SemNullaryExpr() { Specific::nullaryExpr(this, opcode, type) }
}
class SemInitializeParameterExpr extends SemNullaryExpr {
SemInitializeParameterExpr() { opcode instanceof Opcode::InitializeParameter }
}
class SemLoadExpr extends SemNullaryExpr {
SemLoadExpr() { opcode instanceof Opcode::Load }
final SemSsaVariable getDef() { result.getAUse() = this }
}
class SemSsaLoadExpr extends SemLoadExpr {
SemSsaLoadExpr() { exists(getDef()) }
}
class SemNonSsaLoadExpr extends SemLoadExpr {
SemNonSsaLoadExpr() { not exists(getDef()) }
}
class SemStoreExpr extends SemUnaryExpr {
SemStoreExpr() { opcode instanceof Opcode::Store }
}
class SemConditionalExpr extends SemKnownExpr {
SemExpr condition;
SemExpr trueResult;
SemExpr falseResult;
SemConditionalExpr() {
opcode instanceof Opcode::Conditional and
Specific::conditionalExpr(this, type, condition, trueResult, falseResult)
}
final SemExpr getBranchExpr(boolean branch) {
branch = true and result = trueResult
or
branch = false and result = falseResult
}
}

View File

@@ -0,0 +1,297 @@
/**
* C++-specific implementation of the semantic interface.
*/
private import cpp as Cpp
private import semmle.code.cpp.ir.IR as IR
private import Semantic
private import experimental.semmle.code.cpp.rangeanalysis.Bound as IRBound
private import semmle.code.cpp.controlflow.IRGuards as IRGuards
module SemanticExprConfig {
class Location = Cpp::Location;
class Expr = IR::Instruction;
SemBasicBlock getExprBasicBlock(Expr e) { result = getSemanticBasicBlock(e.getBlock()) }
private predicate anyConstantExpr(Expr expr, SemType type, string value) {
exists(IR::ConstantInstruction instr | instr = expr |
type = getSemanticType(instr.getResultIRType()) and
value = instr.getValue()
)
}
predicate integerLiteral(Expr expr, SemIntegerType type, int value) {
exists(string valueString |
anyConstantExpr(expr, type, valueString) and
value = valueString.toInt()
)
}
predicate largeIntegerLiteral(Expr expr, SemIntegerType type, float approximateFloatValue) {
exists(string valueString |
anyConstantExpr(expr, type, valueString) and
not exists(valueString.toInt()) and
approximateFloatValue = valueString.toFloat()
)
}
predicate floatingPointLiteral(Expr expr, SemFloatingPointType type, float value) {
exists(string valueString |
anyConstantExpr(expr, type, valueString) and value = valueString.toFloat()
)
}
predicate booleanLiteral(Expr expr, SemBooleanType type, boolean value) {
exists(string valueString |
anyConstantExpr(expr, type, valueString) and
(
valueString = "true" and value = true
or
valueString = "false" and value = false
)
)
}
predicate nullLiteral(Expr expr, SemAddressType type) { anyConstantExpr(expr, type, _) }
predicate stringLiteral(Expr expr, SemType type, string value) {
anyConstantExpr(expr, type, value) and expr instanceof IR::StringConstantInstruction
}
predicate binaryExpr(Expr expr, Opcode opcode, SemType type, Expr leftOperand, Expr rightOperand) {
exists(IR::BinaryInstruction instr | instr = expr |
type = getSemanticType(instr.getResultIRType()) and
leftOperand = instr.getLeft() and
rightOperand = instr.getRight() and
// REVIEW: Merge the two `Opcode` types.
opcode.toString() = instr.getOpcode().toString()
)
}
predicate unaryExpr(Expr expr, Opcode opcode, SemType type, Expr operand) {
type = getSemanticType(expr.getResultIRType()) and
(
exists(IR::UnaryInstruction instr | instr = expr |
operand = instr.getUnary() and
// REVIEW: Merge the two operand types.
opcode.toString() = instr.getOpcode().toString()
)
or
exists(IR::StoreInstruction instr | instr = expr |
operand = instr.getSourceValue() and
opcode instanceof Opcode::Store
)
)
}
predicate nullaryExpr(Expr expr, Opcode opcode, SemType type) {
type = getSemanticType(expr.getResultIRType()) and
(
expr instanceof IR::LoadInstruction and opcode instanceof Opcode::Load
or
expr instanceof IR::InitializeParameterInstruction and
opcode instanceof Opcode::InitializeParameter
)
}
predicate conditionalExpr(
Expr expr, SemType type, Expr condition, Expr trueResult, Expr falseResult
) {
none()
}
SemType getUnknownExprType(Expr expr) { result = getSemanticType(expr.getResultIRType()) }
class BasicBlock = IR::IRBlock;
predicate bbDominates(BasicBlock dominator, BasicBlock dominated) {
dominator.dominates(dominated)
}
predicate hasDominanceInformation(BasicBlock block) { any() }
int getBasicBlockUniqueId(BasicBlock block) {
// REVIEW: `getDisplayIndex()` is not intended for use in real queries, but for now it's the
// best we can do because `equivalentRelation` won't accept a predicate whose parameters are IPA
// types.
result = block.getDisplayIndex()
}
class SsaVariable instanceof IR::Instruction {
SsaVariable() { super.hasMemoryResult() }
final string toString() { result = super.toString() }
final Location getLocation() { result = super.getLocation() }
}
predicate explicitUpdate(SsaVariable v, Expr sourceExpr) { v = sourceExpr }
predicate phi(SsaVariable v) { v instanceof IR::PhiInstruction }
SsaVariable getAPhiInput(SsaVariable v) { result = v.(IR::PhiInstruction).getAnInput() }
Expr getAUse(SsaVariable v) { result.(IR::LoadInstruction).getSourceValue() = v }
SemType getSsaVariableType(SsaVariable v) {
result = getSemanticType(v.(IR::Instruction).getResultIRType())
}
BasicBlock getSsaVariableBasicBlock(SsaVariable v) { result = v.(IR::Instruction).getBlock() }
private newtype TReadPosition =
TReadPositionBlock(IR::IRBlock block) or
TReadPositionPhiInputEdge(IR::IRBlock pred, IR::IRBlock succ) {
exists(IR::PhiInputOperand input |
pred = input.getPredecessorBlock() and
succ = input.getUse().getBlock()
)
}
class SsaReadPosition extends TReadPosition {
string toString() { none() }
Location getLocation() { none() }
predicate hasRead(SsaVariable v) { none() }
}
private class SsaReadPositionBlock extends SsaReadPosition, TReadPositionBlock {
IR::IRBlock block;
SsaReadPositionBlock() { this = TReadPositionBlock(block) }
final override string toString() { result = block.toString() }
final override Location getLocation() { result = block.getLocation() }
final override predicate hasRead(SsaVariable v) {
exists(IR::Operand operand |
operand.getDef() = v and not operand instanceof IR::PhiInputOperand
)
}
}
private class SsaReadPositionPhiInputEdge extends SsaReadPosition, TReadPositionPhiInputEdge {
IR::IRBlock pred;
IR::IRBlock succ;
SsaReadPositionPhiInputEdge() { this = TReadPositionPhiInputEdge(pred, succ) }
final override string toString() { result = pred.toString() + "->" + succ.toString() }
final override Location getLocation() { result = succ.getLocation() }
final override predicate hasRead(SsaVariable v) {
exists(IR::PhiInputOperand operand |
operand.getDef() = v and
operand.getPredecessorBlock() = pred and
operand.getUse().getBlock() = succ
)
}
}
predicate hasReadOfSsaVariable(SsaReadPosition pos, SsaVariable v) { pos.hasRead(v) }
predicate readBlock(SsaReadPosition pos, BasicBlock block) { pos = TReadPositionBlock(block) }
predicate phiInputEdge(SsaReadPosition pos, BasicBlock origBlock, BasicBlock phiBlock) {
pos = TReadPositionPhiInputEdge(origBlock, phiBlock)
}
predicate phiInput(SsaReadPosition pos, SsaVariable phi, SsaVariable input) {
exists(IR::PhiInputOperand operand |
pos = TReadPositionPhiInputEdge(operand.getPredecessorBlock(), operand.getUse().getBlock())
|
phi = operand.getUse() and input = operand.getDef()
)
}
class Bound instanceof IRBound::Bound {
Bound() {
this instanceof IRBound::ZeroBound
or
this.(IRBound::ValueNumberBound).getValueNumber().getAnInstruction() instanceof SsaVariable
}
string toString() { result = super.toString() }
final Location getLocation() { result = super.getLocation() }
}
private class ValueNumberBound extends Bound {
IRBound::ValueNumberBound bound;
ValueNumberBound() { bound = this }
override string toString() {
result =
min(SsaVariable instr |
instr = bound.getValueNumber().getAnInstruction()
|
instr
order by
instr.(IR::Instruction).getBlock().getDisplayIndex(),
instr.(IR::Instruction).getDisplayIndexInBlock()
).toString()
}
}
predicate zeroBound(Bound bound) { bound instanceof IRBound::ZeroBound }
predicate ssaBound(Bound bound, SsaVariable v) {
v = bound.(IRBound::ValueNumberBound).getValueNumber().getAnInstruction()
}
Expr getBoundExpr(Bound bound, int delta) {
result = bound.(IRBound::Bound).getInstruction(delta)
}
class Guard = IRGuards::IRGuardCondition;
predicate guard(Guard guard, BasicBlock block) {
block = guard.(IRGuards::IRGuardCondition).getBlock()
}
Expr getGuardAsExpr(Guard guard) { result = guard }
predicate equalityGuard(Guard guard, Expr e1, Expr e2, boolean polarity) {
guard.(IRGuards::IRGuardCondition).comparesEq(e1.getAUse(), e2.getAUse(), 0, true, polarity)
}
predicate guardDirectlyControlsBlock(Guard guard, BasicBlock controlled, boolean branch) {
guard.(IRGuards::IRGuardCondition).controls(controlled, branch)
}
predicate guardHasBranchEdge(Guard guard, BasicBlock bb1, BasicBlock bb2, boolean branch) {
guard.(IRGuards::IRGuardCondition).controlsEdge(bb1, bb2, branch)
}
Guard comparisonGuard(Expr e) { result = e }
predicate implies_v2(Guard g1, boolean b1, Guard g2, boolean b2) {
none() // TODO
}
}
SemExpr getSemanticExpr(IR::Instruction instr) { result = instr }
IR::Instruction getCppInstruction(SemExpr e) { e = result }
SemBasicBlock getSemanticBasicBlock(IR::IRBlock block) { result = block }
IR::IRBlock getCppBasicBlock(SemBasicBlock block) { block = result }
SemSsaVariable getSemanticSsaVariable(IR::Instruction instr) { result = instr }
IR::Instruction getCppSsaVariableInstruction(SemSsaVariable v) { v = result }
SemBound getSemanticBound(IRBound::Bound bound) { result = bound }
IRBound::Bound getCppBound(SemBound bound) { bound = result }
SemGuard getSemanticGuard(IRGuards::IRGuardCondition guard) { result = guard }
IRGuards::IRGuardCondition getCppGuard(SemGuard guard) { guard = result }

View File

@@ -0,0 +1,65 @@
/**
* Semantic interface to the guards library.
*/
private import Semantic
private import SemanticExprSpecific::SemanticExprConfig as Specific
class SemGuard instanceof Specific::Guard {
SemBasicBlock block;
SemGuard() { Specific::guard(this, block) }
final string toString() { result = super.toString() }
final Specific::Location getLocation() { result = super.getLocation() }
final predicate isEquality(SemExpr e1, SemExpr e2, boolean polarity) {
Specific::equalityGuard(this, e1, e2, polarity)
}
final predicate directlyControls(SemBasicBlock controlled, boolean branch) {
Specific::guardDirectlyControlsBlock(this, controlled, branch)
}
final predicate hasBranchEdge(SemBasicBlock bb1, SemBasicBlock bb2, boolean branch) {
Specific::guardHasBranchEdge(this, bb1, bb2, branch)
}
final SemBasicBlock getBasicBlock() { result = block }
final SemExpr asExpr() { result = Specific::getGuardAsExpr(this) }
}
predicate semImplies_v2(SemGuard g1, boolean b1, SemGuard g2, boolean b2) {
Specific::implies_v2(g1, b1, g2, b2)
}
/**
* Holds if `guard` directly controls the position `controlled` with the
* value `testIsTrue`.
*/
predicate semGuardDirectlyControlsSsaRead(
SemGuard guard, SemSsaReadPosition controlled, boolean testIsTrue
) {
guard.directlyControls(controlled.(SemSsaReadPositionBlock).getBlock(), testIsTrue)
or
exists(SemSsaReadPositionPhiInputEdge controlledEdge | controlledEdge = controlled |
guard.directlyControls(controlledEdge.getOrigBlock(), testIsTrue) or
guard.hasBranchEdge(controlledEdge.getOrigBlock(), controlledEdge.getPhiBlock(), testIsTrue)
)
}
/**
* Holds if `guard` controls the position `controlled` with the value `testIsTrue`.
*/
predicate semGuardControlsSsaRead(SemGuard guard, SemSsaReadPosition controlled, boolean testIsTrue) {
semGuardDirectlyControlsSsaRead(guard, controlled, testIsTrue)
or
exists(SemGuard guard0, boolean testIsTrue0 |
semImplies_v2(guard0, testIsTrue0, guard, testIsTrue) and
semGuardControlsSsaRead(guard0, controlled, testIsTrue0)
)
}
SemGuard semGetComparisonGuard(SemRelationalExpr e) { result = Specific::comparisonGuard(e) }

View File

@@ -0,0 +1,179 @@
/**
* Definitions of all possible opcodes for `SemExpr`.
*/
private newtype TOpcode =
TInitializeParameter() or
TCopyValue() or
TLoad() or
TStore() or
TAdd() or
TSub() or
TMul() or
TDiv() or
TRem() or
TNegate() or
TShiftLeft() or
TShiftRight() or
TShiftRightUnsigned() or // TODO: Based on type
TBitAnd() or
TBitOr() or
TBitXor() or
TBitComplement() or
TLogicalNot() or
TCompareEQ() or
TCompareNE() or
TCompareLT() or
TCompareGT() or
TCompareLE() or
TCompareGE() or
TPointerAdd() or
TPointerSub() or
TPointerDiff() or
TConvert() or
TConstant() or
TStringConstant() or
TAddOne() or // TODO: Combine with `TAdd`
TSubOne() or // TODO: Combine with `TSub`
TConditional() or // TODO: Represent as flow
TCall() or
TBox() or
TUnbox() or
TUnknown()
class Opcode extends TOpcode {
string toString() { result = "???" }
}
module Opcode {
class InitializeParameter extends Opcode, TInitializeParameter {
override string toString() { result = "InitializeParameter" }
}
class CopyValue extends Opcode, TCopyValue {
override string toString() { result = "CopyValue" }
}
class Load extends Opcode, TLoad {
override string toString() { result = "Load" }
}
class Store extends Opcode, TStore {
override string toString() { result = "Store" }
}
class Add extends Opcode, TAdd {
override string toString() { result = "Add" }
}
class Sub extends Opcode, TSub {
override string toString() { result = "Sub" }
}
class Mul extends Opcode, TMul {
override string toString() { result = "Mul" }
}
class Div extends Opcode, TDiv {
override string toString() { result = "Div" }
}
class Rem extends Opcode, TRem {
override string toString() { result = "Rem" }
}
class Negate extends Opcode, TNegate {
override string toString() { result = "Negate" }
}
class ShiftLeft extends Opcode, TShiftLeft {
override string toString() { result = "ShiftLeft" }
}
class ShiftRight extends Opcode, TShiftRight {
override string toString() { result = "ShiftRight" }
}
class ShiftRightUnsigned extends Opcode, TShiftRightUnsigned {
override string toString() { result = "ShiftRightUnsigned" }
}
class BitAnd extends Opcode, TBitAnd {
override string toString() { result = "BitAnd" }
}
class BitOr extends Opcode, TBitOr {
override string toString() { result = "BitOr" }
}
class BitXor extends Opcode, TBitXor {
override string toString() { result = "BitXor" }
}
class BitComplement extends Opcode, TBitComplement {
override string toString() { result = "BitComplement" }
}
class LogicalNot extends Opcode, TLogicalNot {
override string toString() { result = "LogicalNot" }
}
class CompareEQ extends Opcode, TCompareEQ {
override string toString() { result = "CompareEQ" }
}
class CompareNE extends Opcode, TCompareNE {
override string toString() { result = "CompareNE" }
}
class CompareLT extends Opcode, TCompareLT {
override string toString() { result = "CompareLT" }
}
class CompareLE extends Opcode, TCompareLE {
override string toString() { result = "CompareLE" }
}
class CompareGT extends Opcode, TCompareGT {
override string toString() { result = "CompareGT" }
}
class CompareGE extends Opcode, TCompareGE {
override string toString() { result = "CompareGE" }
}
class Convert extends Opcode, TConvert {
override string toString() { result = "Convert" }
}
class AddOne extends Opcode, TAddOne {
override string toString() { result = "AddOne" }
}
class SubOne extends Opcode, TSubOne {
override string toString() { result = "SubOne" }
}
class Conditional extends Opcode, TConditional {
override string toString() { result = "Conditional" }
}
class Constant extends Opcode, TConstant {
override string toString() { result = "Constant" }
}
class StringConstant extends Opcode, TStringConstant {
override string toString() { result = "StringConstant" }
}
class Box extends Opcode, TBox {
override string toString() { result = "Box" }
}
class Unbox extends Opcode, TUnbox {
override string toString() { result = "Unbox" }
}
class Unknown extends Opcode, TUnknown {
override string toString() { result = "Unknown" }
}
}

View File

@@ -0,0 +1,75 @@
/**
* Semantic interface to the SSA library.
*/
private import Semantic
private import SemanticExprSpecific::SemanticExprConfig as Specific
class SemSsaVariable instanceof Specific::SsaVariable {
final string toString() { result = super.toString() }
final Specific::Location getLocation() { result = super.getLocation() }
final SemLoadExpr getAUse() { result = Specific::getAUse(this) }
final SemType getType() { result = Specific::getSsaVariableType(this) }
final SemBasicBlock getBasicBlock() { result = Specific::getSsaVariableBasicBlock(this) }
}
class SemSsaExplicitUpdate extends SemSsaVariable {
SemExpr sourceExpr;
SemSsaExplicitUpdate() { Specific::explicitUpdate(this, sourceExpr) }
final SemExpr getSourceExpr() { result = sourceExpr }
}
class SemSsaPhiNode extends SemSsaVariable {
SemSsaPhiNode() { Specific::phi(this) }
final SemSsaVariable getAPhiInput() { result = Specific::getAPhiInput(this) }
}
class SemSsaReadPosition instanceof Specific::SsaReadPosition {
final string toString() { result = super.toString() }
final Specific::Location getLocation() { result = super.getLocation() }
final predicate hasReadOfVar(SemSsaVariable var) { Specific::hasReadOfSsaVariable(this, var) }
}
class SemSsaReadPositionPhiInputEdge extends SemSsaReadPosition {
SemBasicBlock origBlock;
SemBasicBlock phiBlock;
SemSsaReadPositionPhiInputEdge() { Specific::phiInputEdge(this, origBlock, phiBlock) }
predicate phiInput(SemSsaPhiNode phi, SemSsaVariable inp) { Specific::phiInput(this, phi, inp) }
SemBasicBlock getOrigBlock() { result = origBlock }
SemBasicBlock getPhiBlock() { result = phiBlock }
}
class SemSsaReadPositionBlock extends SemSsaReadPosition {
SemBasicBlock block;
SemSsaReadPositionBlock() { Specific::readBlock(this, block) }
SemBasicBlock getBlock() { result = block }
SemExpr getAnExpr() { result = getBlock().getAnExpr() }
}
/**
* Holds if `inp` is an input to `phi` along a back edge.
*/
predicate semBackEdge(SemSsaPhiNode phi, SemSsaVariable inp, SemSsaReadPositionPhiInputEdge edge) {
edge.phiInput(phi, inp) and
// Conservatively assume that every edge is a back edge if we don't have dominance information.
(
phi.getBasicBlock().bbDominates(edge.getOrigBlock()) or
not edge.getOrigBlock().hasDominanceInformation()
)
}

View File

@@ -0,0 +1,301 @@
/**
* Minimal, language-neutral type system for semantic analysis.
*/
private import SemanticTypeSpecific as Specific
class LanguageType = Specific::Type;
cached
private newtype TSemType =
TSemVoidType() { Specific::voidType(_) } or
TSemUnknownType() { Specific::unknownType(_) } or
TSemErrorType() { Specific::errorType(_) } or
TSemBooleanType(int byteSize) { Specific::booleanType(_, byteSize) } or
TSemIntegerType(int byteSize, boolean signed) { Specific::integerType(_, byteSize, signed) } or
TSemFloatingPointType(int byteSize) { Specific::floatingPointType(_, byteSize) } or
TSemAddressType(int byteSize) { Specific::addressType(_, byteSize) } or
TSemFunctionAddressType(int byteSize) { Specific::functionAddressType(_, byteSize) } or
TSemOpaqueType(int byteSize, Specific::OpaqueTypeTag tag) {
Specific::opaqueType(_, byteSize, tag)
}
/**
* The language-neutral type of a semantic expression,
* The interface to `SemType` and its subclasses is the same across all languages for which the IR
* is supported, so analyses that expect to be used for multiple languages should generally use
* `SemType` rather than a language-specific type.
*
* Many types from the language-specific type system will map to a single canonical `SemType`. Two
* types that map to the same `SemType` are considered equivalent by semantic analysis. As an
* example, in C++, all pointer types map to the same instance of `SemAddressType`.
*/
class SemType extends TSemType {
/** Gets a textual representation of this type. */
string toString() { none() }
/**
* Gets a string that uniquely identifies this `SemType`. This string is often the same as the
* result of `SemType.toString()`, but for some types it may be more verbose to ensure uniqueness.
*/
string getIdentityString() { result = toString() }
/**
* Gets the size of the type, in bytes, if known.
*
* This will hold for all `SemType` objects except `SemUnknownType` and `SemErrorType`.
*/
// This predicate is overridden with `pragma[noinline]` in every leaf subclass.
// This allows callers to ask for things like _the_ floating-point type of
// size 4 without getting a join that first finds all types of size 4 and
// _then_ restricts them to floating-point types.
int getByteSize() { none() }
}
/**
* An unknown type. Generally used to represent results and operands that access an unknown set of
* memory locations, such as the side effects of a function call.
*/
class SemUnknownType extends SemType, TSemUnknownType {
final override string toString() { result = "unknown" }
final override int getByteSize() { none() }
}
/**
* A void type, which has no values. Used to represent the result type of an expression that does
* not produce a result.
*/
class SemVoidType extends SemType, TSemVoidType {
final override string toString() { result = "void" }
final override int getByteSize() { result = 0 }
}
/**
* An error type. Used when an error in the source code prevents the extractor from determining the
* proper type.
*/
class SemErrorType extends SemType, TSemErrorType {
final override string toString() { result = "error" }
final override int getByteSize() { result = 0 }
}
private class SemSizedType extends SemType {
int byteSize;
SemSizedType() {
this = TSemBooleanType(byteSize) or
this = TSemIntegerType(byteSize, _) or
this = TSemFloatingPointType(byteSize) or
this = TSemAddressType(byteSize) or
this = TSemFunctionAddressType(byteSize) or
this = TSemOpaqueType(byteSize, _)
}
// Don't override `getByteSize()` here. The optimizer seems to generate better code when this is
// overridden only in the leaf classes.
}
/**
* A Boolean type, which can hold the values `true` (non-zero) or `false` (zero).
*/
class SemBooleanType extends SemSizedType, TSemBooleanType {
final override string toString() { result = "bool" + byteSize.toString() }
pragma[noinline]
final override int getByteSize() { result = byteSize }
}
/**
* A numeric type. This includes `SemSignedIntegerType`, `SemUnsignedIntegerType`, and
* `SemFloatingPointType`.
*/
class SemNumericType extends SemSizedType {
SemNumericType() {
this = TSemIntegerType(byteSize, _) or
this = TSemFloatingPointType(byteSize)
}
// Don't override `getByteSize()` here. The optimizer seems to generate better code when this is
// overridden only in the leaf classes.
}
/**
* An integer type. This includes `SemSignedIntegerType` and `SemUnsignedIntegerType`.
*/
class SemIntegerType extends SemNumericType {
boolean signed;
SemIntegerType() { this = TSemIntegerType(byteSize, signed) }
/** Holds if this integer type is signed. */
final predicate isSigned() { signed = true }
/** Holds if this integer type is unsigned. */
final predicate isUnsigned() { not isSigned() }
// Don't override `getByteSize()` here. The optimizer seems to generate better code when this is
// overridden only in the leaf classes.
}
/**
* A signed two's-complement integer. Also used to represent enums whose underlying type is a signed
* integer, as well as character types whose representation is signed.
*/
class SemSignedIntegerType extends SemIntegerType {
SemSignedIntegerType() { signed = true }
final override string toString() { result = "int" + byteSize.toString() }
pragma[noinline]
final override int getByteSize() { result = byteSize }
}
/**
* An unsigned two's-complement integer. Also used to represent enums whose underlying type is an
* unsigned integer, as well as character types whose representation is unsigned.
*/
class SemUnsignedIntegerType extends SemIntegerType {
SemUnsignedIntegerType() { signed = false }
final override string toString() { result = "uint" + byteSize.toString() }
pragma[noinline]
final override int getByteSize() { result = byteSize }
}
/**
* A floating-point type.
*/
class SemFloatingPointType extends SemNumericType, TSemFloatingPointType {
final override string toString() { result = "float" + byteSize.toString() }
pragma[noinline]
final override int getByteSize() { result = byteSize }
}
/**
* An address type, representing the memory address of data. Used to represent pointers, references,
* and lvalues, include those that are garbage collected.
*
* The address of a function is represented by the separate `SemFunctionAddressType`.
*/
class SemAddressType extends SemSizedType, TSemAddressType {
final override string toString() { result = "addr" + byteSize.toString() }
pragma[noinline]
final override int getByteSize() { result = byteSize }
}
/**
* An address type, representing the memory address of code. Used to represent function pointers,
* function references, and the target of a direct function call.
*/
class SemFunctionAddressType extends SemSizedType, TSemFunctionAddressType {
final override string toString() { result = "func" + byteSize.toString() }
pragma[noinline]
final override int getByteSize() { result = byteSize }
}
/**
* A type with known size that does not fit any of the other kinds of type. Used to represent
* classes, structs, unions, fixed-size arrays, pointers-to-member, and more.
*/
class SemOpaqueType extends SemSizedType, TSemOpaqueType {
Specific::OpaqueTypeTag tag;
SemOpaqueType() { this = TSemOpaqueType(byteSize, tag) }
final override string toString() {
result = "opaque" + byteSize.toString() + "{" + tag.toString() + "}"
}
final override string getIdentityString() {
result = "opaque" + byteSize.toString() + "{" + Specific::getOpaqueTagIdentityString(tag) + "}"
}
/**
* Gets the "tag" that differentiates this type from other incompatible opaque types that have the
* same size.
*/
final Specific::OpaqueTypeTag getTag() { result = tag }
pragma[noinline]
final override int getByteSize() { result = byteSize }
}
cached
SemType getSemanticType(Specific::Type type) {
exists(int byteSize |
Specific::booleanType(type, byteSize) and result = TSemBooleanType(byteSize)
or
exists(boolean signed |
Specific::integerType(type, byteSize, signed) and
result = TSemIntegerType(byteSize, signed)
)
or
Specific::floatingPointType(type, byteSize) and result = TSemFloatingPointType(byteSize)
or
Specific::addressType(type, byteSize) and result = TSemAddressType(byteSize)
or
Specific::functionAddressType(type, byteSize) and result = TSemFunctionAddressType(byteSize)
or
exists(Specific::OpaqueTypeTag tag |
Specific::opaqueType(type, byteSize, tag) and result = TSemOpaqueType(byteSize, tag)
)
)
or
Specific::errorType(type) and result = TSemErrorType()
or
Specific::unknownType(type) and result = TSemUnknownType()
}
/**
* Holds if the conversion from `fromType` to `toType` can never overflow or underflow.
*/
predicate conversionCannotOverflow(SemNumericType fromType, SemNumericType toType) {
// Identity cast
fromType = toType
or
// Treat any cast to an FP type as safe. It can lose precision, but not overflow.
toType instanceof SemFloatingPointType and fromType = any(SemNumericType n)
or
exists(SemIntegerType fromInteger, SemIntegerType toInteger, int fromSize, int toSize |
fromInteger = fromType and
toInteger = toType and
fromSize = fromInteger.getByteSize() and
toSize = toInteger.getByteSize()
|
// Conversion to a larger type. Safe unless converting signed -> unsigned.
fromSize < toSize and
(
toInteger.isSigned()
or
not fromInteger.isSigned()
)
)
}
/**
* INTERNAL: Do not use.
* Query predicates used to check invariants that should hold for all `SemType` objects.
*/
module SemTypeConsistency {
/**
* Holds if the type has no result for `getSemanticType()`.
*/
query predicate missingSemType(Specific::Type type, string message) {
not exists(getSemanticType(type)) and
message = "`Type` does not have a corresponding `SemType`."
}
/**
* Holds if the type has more than one result for `getSemanticType()`.
*/
query predicate multipleSemTypes(Specific::Type type, string message) {
strictcount(getSemanticType(type)) > 1 and
message =
"`Type` " + type + " has multiple `SemType`s: " +
concat(getSemanticType(type).toString(), ", ")
}
}

View File

@@ -0,0 +1,43 @@
/**
* C++-specific implementation of the semantic type system.
*/
private import semmle.code.cpp.ir.IR as IR
private import cpp as Cpp
private import semmle.code.cpp.ir.internal.IRCppLanguage as Language
class Type = IR::IRType;
class OpaqueTypeTag = Language::OpaqueTypeTag;
predicate voidType(Type type) { type instanceof IR::IRVoidType }
predicate errorType(Type type) { type instanceof IR::IRErrorType }
predicate unknownType(Type type) { type instanceof IR::IRUnknownType }
predicate booleanType(Type type, int byteSize) { byteSize = type.(IR::IRBooleanType).getByteSize() }
predicate integerType(Type type, int byteSize, boolean signed) {
byteSize = type.(IR::IRSignedIntegerType).getByteSize() and signed = true
or
byteSize = type.(IR::IRUnsignedIntegerType).getByteSize() and signed = false
}
predicate floatingPointType(Type type, int byteSize) {
byteSize = type.(IR::IRFloatingPointType).getByteSize()
}
predicate addressType(Type type, int byteSize) { byteSize = type.(IR::IRAddressType).getByteSize() }
predicate functionAddressType(Type type, int byteSize) {
byteSize = type.(IR::IRFunctionAddressType).getByteSize()
}
predicate opaqueType(Type type, int byteSize, OpaqueTypeTag tag) {
exists(IR::IROpaqueType opaque | opaque = type |
byteSize = opaque.getByteSize() and tag = opaque.getTag()
)
}
predicate getOpaqueTagIdentityString = Language::getOpaqueTagIdentityString/1;

View File

@@ -0,0 +1,31 @@
/**
* Simple constant analysis using the Semantic interface.
*/
private import experimental.semmle.code.cpp.semantic.Semantic
private import ConstantAnalysisSpecific as Specific
/** An expression that always has the same integer value. */
pragma[nomagic]
private predicate constantIntegerExpr(SemExpr e, int val) {
// An integer literal
e.(SemIntegerLiteralExpr).getIntValue() = val
or
// Copy of another constant
exists(SemSsaExplicitUpdate v, SemExpr src |
e = v.getAUse() and
src = v.getSourceExpr() and
constantIntegerExpr(src, val)
)
or
// Language-specific enhancements
val = Specific::getIntConstantValue(e)
}
/** An expression that always has the same integer value. */
class SemConstantIntegerExpr extends SemExpr {
SemConstantIntegerExpr() { constantIntegerExpr(this, _) }
/** Gets the integer value of this expression. */
int getIntValue() { constantIntegerExpr(this, result) }
}

View File

@@ -0,0 +1,10 @@
/**
* C++-specific implementation of constant analysis.
*/
private import experimental.semmle.code.cpp.semantic.Semantic
/**
* Gets the constant integer value of the specified expression, if any.
*/
int getIntConstantValue(SemExpr expr) { none() }

View File

@@ -0,0 +1,310 @@
/**
* Provides inferences of the form: `e` equals `b + v` modulo `m` where `e` is
* an expression, `b` is a `Bound` (typically zero or the value of an SSA
* variable), and `v` is an integer in the range `[0 .. m-1]`.
*/
private import ModulusAnalysisSpecific::Private
private import experimental.semmle.code.cpp.semantic.Semantic
private import ConstantAnalysis
private import RangeUtils
/**
* Holds if `e + delta` equals `v` at `pos`.
*/
private predicate valueFlowStepSsa(SemSsaVariable v, SemSsaReadPosition pos, SemExpr e, int delta) {
semSsaUpdateStep(v, e, delta) and pos.hasReadOfVar(v)
or
exists(SemGuard guard, boolean testIsTrue |
pos.hasReadOfVar(v) and
guard = semEqFlowCond(v, e, delta, true, testIsTrue) and
semGuardDirectlyControlsSsaRead(guard, pos, testIsTrue)
)
}
/**
* Holds if `add` is the addition of `larg` and `rarg`, neither of which are
* `ConstantIntegerExpr`s.
*/
private predicate nonConstAddition(SemExpr add, SemExpr larg, SemExpr rarg) {
exists(SemAddExpr a | a = add |
larg = a.getLeftOperand() and
rarg = a.getRightOperand()
) and
not larg instanceof SemConstantIntegerExpr and
not rarg instanceof SemConstantIntegerExpr
}
/**
* Holds if `sub` is the subtraction of `larg` and `rarg`, where `rarg` is not
* a `ConstantIntegerExpr`.
*/
private predicate nonConstSubtraction(SemExpr sub, SemExpr larg, SemExpr rarg) {
exists(SemSubExpr s | s = sub |
larg = s.getLeftOperand() and
rarg = s.getRightOperand()
) and
not rarg instanceof SemConstantIntegerExpr
}
/** Gets an expression that is the remainder modulo `mod` of `arg`. */
private SemExpr modExpr(SemExpr arg, int mod) {
exists(SemRemExpr rem |
result = rem and
arg = rem.getLeftOperand() and
rem.getRightOperand().(SemConstantIntegerExpr).getIntValue() = mod and
mod >= 2
)
or
exists(SemConstantIntegerExpr c |
mod = 2.pow([1 .. 30]) and
c.getIntValue() = mod - 1 and
result.(SemBitAndExpr).hasOperands(arg, c)
)
}
/**
* Gets a guard that tests whether `v` is congruent with `val` modulo `mod` on
* its `testIsTrue` branch.
*/
private SemGuard moduloCheck(SemSsaVariable v, int val, int mod, boolean testIsTrue) {
exists(SemExpr rem, SemConstantIntegerExpr c, int r, boolean polarity |
result.isEquality(rem, c, polarity) and
c.getIntValue() = r and
rem = modExpr(v.getAUse(), mod) and
(
testIsTrue = polarity and val = r
or
testIsTrue = polarity.booleanNot() and
mod = 2 and
val = 1 - r and
(r = 0 or r = 1)
)
)
}
/**
* Holds if a guard ensures that `v` at `pos` is congruent with `val` modulo `mod`.
*/
private predicate moduloGuardedRead(SemSsaVariable v, SemSsaReadPosition pos, int val, int mod) {
exists(SemGuard guard, boolean testIsTrue |
pos.hasReadOfVar(v) and
guard = moduloCheck(v, val, mod, testIsTrue) and
semGuardControlsSsaRead(guard, pos, testIsTrue)
)
}
/** Holds if `factor` is a power of 2 that divides `mask`. */
bindingset[mask]
private predicate andmaskFactor(int mask, int factor) {
mask % factor = 0 and
factor = 2.pow([1 .. 30])
}
/** Holds if `e` is evenly divisible by `factor`. */
private predicate evenlyDivisibleExpr(SemExpr e, int factor) {
exists(SemConstantIntegerExpr c, int k | k = c.getIntValue() |
e.(SemMulExpr).getAnOperand() = c and factor = k.abs() and factor >= 2
or
e.(SemShiftLeftExpr).getRightOperand() = c and factor = 2.pow(k) and k > 0
or
e.(SemBitAndExpr).getAnOperand() = c and factor = max(int f | andmaskFactor(k, f))
)
}
/**
* Holds if `rix` is the number of input edges to `phi`.
*/
private predicate maxPhiInputRank(SemSsaPhiNode phi, int rix) {
rix = max(int r | rankedPhiInput(phi, _, _, r))
}
/**
* Gets the remainder of `val` modulo `mod`.
*
* For `mod = 0` the result equals `val` and for `mod > 1` the result is within
* the range `[0 .. mod-1]`.
*/
bindingset[val, mod]
private int remainder(int val, int mod) {
mod = 0 and result = val
or
mod > 1 and result = ((val % mod) + mod) % mod
}
/**
* Holds if `inp` is an input to `phi` and equals `phi` modulo `mod` along `edge`.
*/
private predicate phiSelfModulus(
SemSsaPhiNode phi, SemSsaVariable inp, SemSsaReadPositionPhiInputEdge edge, int mod
) {
exists(SemSsaBound phibound, int v, int m |
edge.phiInput(phi, inp) and
phibound.getAVariable() = phi and
ssaModulus(inp, edge, phibound, v, m) and
mod = m.gcd(v) and
mod != 1
)
}
/**
* Holds if `b + val` modulo `mod` is a candidate congruence class for `phi`.
*/
private predicate phiModulusInit(SemSsaPhiNode phi, SemBound b, int val, int mod) {
exists(SemSsaVariable inp, SemSsaReadPositionPhiInputEdge edge |
edge.phiInput(phi, inp) and
ssaModulus(inp, edge, b, val, mod)
)
}
/**
* Holds if all inputs to `phi` numbered `1` to `rix` are equal to `b + val` modulo `mod`.
*/
private predicate phiModulusRankStep(SemSsaPhiNode phi, SemBound b, int val, int mod, int rix) {
rix = 0 and
phiModulusInit(phi, b, val, mod)
or
exists(SemSsaVariable inp, SemSsaReadPositionPhiInputEdge edge, int v1, int m1 |
mod != 1 and
val = remainder(v1, mod)
|
exists(int v2, int m2 |
rankedPhiInput(phi, inp, edge, rix) and
phiModulusRankStep(phi, b, v1, m1, rix - 1) and
ssaModulus(inp, edge, b, v2, m2) and
mod = m1.gcd(m2).gcd(v1 - v2)
)
or
exists(int m2 |
rankedPhiInput(phi, inp, edge, rix) and
phiModulusRankStep(phi, b, v1, m1, rix - 1) and
phiSelfModulus(phi, inp, edge, m2) and
mod = m1.gcd(m2)
)
)
}
/**
* Holds if `phi` is equal to `b + val` modulo `mod`.
*/
private predicate phiModulus(SemSsaPhiNode phi, SemBound b, int val, int mod) {
exists(int r |
maxPhiInputRank(phi, r) and
phiModulusRankStep(phi, b, val, mod, r)
)
}
/**
* Holds if `v` at `pos` is equal to `b + val` modulo `mod`.
*/
private predicate ssaModulus(SemSsaVariable v, SemSsaReadPosition pos, SemBound b, int val, int mod) {
phiModulus(v, b, val, mod) and pos.hasReadOfVar(v)
or
b.(SemSsaBound).getAVariable() = v and pos.hasReadOfVar(v) and val = 0 and mod = 0
or
exists(SemExpr e, int val0, int delta |
semExprModulus(e, b, val0, mod) and
valueFlowStepSsa(v, pos, e, delta) and
val = remainder(val0 + delta, mod)
)
or
moduloGuardedRead(v, pos, val, mod) and b instanceof SemZeroBound
}
/**
* Holds if `e` is equal to `b + val` modulo `mod`.
*
* There are two cases for the modulus:
* - `mod = 0`: The equality `e = b + val` is an ordinary equality.
* - `mod > 1`: `val` lies within the range `[0 .. mod-1]`.
*/
cached
predicate semExprModulus(SemExpr e, SemBound b, int val, int mod) {
not ignoreExprModulus(e) and
(
e = b.getExpr(val) and mod = 0
or
evenlyDivisibleExpr(e, mod) and
val = 0 and
b instanceof SemZeroBound
or
exists(SemSsaVariable v, SemSsaReadPositionBlock bb |
ssaModulus(v, bb, b, val, mod) and
e = v.getAUse() and
bb.getAnExpr() = e
)
or
exists(SemExpr mid, int val0, int delta |
semExprModulus(mid, b, val0, mod) and
semValueFlowStep(e, mid, delta) and
val = remainder(val0 + delta, mod)
)
or
exists(SemConditionalExpr cond, int v1, int v2, int m1, int m2 |
cond = e and
condExprBranchModulus(cond, true, b, v1, m1) and
condExprBranchModulus(cond, false, b, v2, m2) and
mod = m1.gcd(m2).gcd(v1 - v2) and
mod != 1 and
val = remainder(v1, mod)
)
or
exists(SemBound b1, SemBound b2, int v1, int v2, int m1, int m2 |
addModulus(e, true, b1, v1, m1) and
addModulus(e, false, b2, v2, m2) and
mod = m1.gcd(m2) and
mod != 1 and
val = remainder(v1 + v2, mod)
|
b = b1 and b2 instanceof SemZeroBound
or
b = b2 and b1 instanceof SemZeroBound
)
or
exists(int v1, int v2, int m1, int m2 |
subModulus(e, true, b, v1, m1) and
subModulus(e, false, any(SemZeroBound zb), v2, m2) and
mod = m1.gcd(m2) and
mod != 1 and
val = remainder(v1 - v2, mod)
)
)
}
private predicate condExprBranchModulus(
SemConditionalExpr cond, boolean branch, SemBound b, int val, int mod
) {
semExprModulus(cond.getBranchExpr(branch), b, val, mod)
}
private predicate addModulus(SemExpr add, boolean isLeft, SemBound b, int val, int mod) {
exists(SemExpr larg, SemExpr rarg | nonConstAddition(add, larg, rarg) |
semExprModulus(larg, b, val, mod) and isLeft = true
or
semExprModulus(rarg, b, val, mod) and isLeft = false
)
}
private predicate subModulus(SemExpr sub, boolean isLeft, SemBound b, int val, int mod) {
exists(SemExpr larg, SemExpr rarg | nonConstSubtraction(sub, larg, rarg) |
semExprModulus(larg, b, val, mod) and isLeft = true
or
semExprModulus(rarg, b, val, mod) and isLeft = false
)
}
/**
* Holds if `inp` is an input to `phi` along `edge` and this input has index `r`
* in an arbitrary 1-based numbering of the input edges to `phi`.
*/
private predicate rankedPhiInput(
SemSsaPhiNode phi, SemSsaVariable inp, SemSsaReadPositionPhiInputEdge edge, int r
) {
edge.phiInput(phi, inp) and
edge =
rank[r](SemSsaReadPositionPhiInputEdge e |
e.phiInput(phi, _)
|
e order by e.getOrigBlock().getUniqueId()
)
}

View File

@@ -0,0 +1,8 @@
/**
* C++-specific implementation of modulus analysis.
*/
module Private {
private import experimental.semmle.code.cpp.semantic.Semantic
predicate ignoreExprModulus(SemExpr e) { none() }
}

View File

@@ -0,0 +1,807 @@
/**
* Provides classes and predicates for range analysis.
*
* An inferred bound can either be a specific integer, the abstract value of an
* SSA variable, or the abstract value of an interesting expression. The latter
* category includes array lengths that are not SSA variables.
*
* If an inferred bound relies directly on a condition, then this condition is
* reported as the reason for the bound.
*/
/*
* This library tackles range analysis as a flow problem. Consider e.g.:
* ```
* len = arr.length;
* if (x < len) { ... y = x-1; ... y ... }
* ```
* In this case we would like to infer `y <= arr.length - 2`, and this is
* accomplished by tracking the bound through a sequence of steps:
* ```
* arr.length --> len = .. --> x < len --> x-1 --> y = .. --> y
* ```
*
* In its simplest form the step relation `E1 --> E2` relates two expressions
* such that `E1 <= B` implies `E2 <= B` for any `B` (with a second separate
* step relation handling lower bounds). Examples of such steps include
* assignments `E2 = E1` and conditions `x <= E1` where `E2` is a use of `x`
* guarded by the condition.
*
* In order to handle subtractions and additions with constants, and strict
* comparisons, the step relation is augmented with an integer delta. With this
* generalization `E1 --(delta)--> E2` relates two expressions and an integer
* such that `E1 <= B` implies `E2 <= B + delta` for any `B`. This corresponds
* to the predicate `boundFlowStep`.
*
* The complete range analysis is then implemented as the transitive closure of
* the step relation summing the deltas along the way. If `E1` transitively
* steps to `E2`, `delta` is the sum of deltas along the path, and `B` is an
* interesting bound equal to the value of `E1` then `E2 <= B + delta`. This
* corresponds to the predicate `bounded`.
*
* Phi nodes need a little bit of extra handling. Consider `x0 = phi(x1, x2)`.
* There are essentially two cases:
* - If `x1 <= B + d1` and `x2 <= B + d2` then `x0 <= B + max(d1,d2)`.
* - If `x1 <= B + d1` and `x2 <= x0 + d2` with `d2 <= 0` then `x0 <= B + d1`.
* The first case is for whenever a bound can be proven without taking looping
* into account. The second case is relevant when `x2` comes from a back-edge
* where we can prove that the variable has been non-increasing through the
* loop-iteration as this means that any upper bound that holds prior to the
* loop also holds for the variable during the loop.
* This generalizes to a phi node with `n` inputs, so if
* `x0 = phi(x1, ..., xn)` and `xi <= B + delta` for one of the inputs, then we
* also have `x0 <= B + delta` if we can prove either:
* - `xj <= B + d` with `d <= delta` or
* - `xj <= x0 + d` with `d <= 0`
* for each input `xj`.
*
* As all inferred bounds can be related directly to a path in the source code
* the only source of non-termination is if successive redundant (and thereby
* increasingly worse) bounds are calculated along a loop in the source code.
* We prevent this by weakening the bound to a small finite set of bounds when
* a path follows a second back-edge (we postpone weakening till the second
* back-edge as a precise bound might require traversing a loop once).
*/
private import RangeAnalysisSpecific as Specific
private import RangeUtils
private import SignAnalysisCommon
private import ModulusAnalysis
private import experimental.semmle.code.cpp.semantic.Semantic
private import ConstantAnalysis
cached
private module RangeAnalysisCache {
cached
module RangeAnalysisPublic {
/**
* Holds if `b + delta` is a valid bound for `e`.
* - `upper = true` : `e <= b + delta`
* - `upper = false` : `e >= b + delta`
*
* The reason for the bound is given by `reason` and may be either a condition
* or `NoReason` if the bound was proven directly without the use of a bounding
* condition.
*/
cached
predicate semBounded(SemExpr e, SemBound b, int delta, boolean upper, SemReason reason) {
bounded(e, b, delta, upper, _, _, reason) and
bestBound(e, b, delta, upper)
}
}
/**
* Holds if `guard = boundFlowCond(_, _, _, _, _) or guard = eqFlowCond(_, _, _, _, _)`.
*/
cached
predicate possibleReason(SemGuard guard) {
guard = boundFlowCond(_, _, _, _, _) or guard = semEqFlowCond(_, _, _, _, _)
}
}
private import RangeAnalysisCache
import RangeAnalysisPublic
/**
* Holds if `b + delta` is a valid bound for `e` and this is the best such delta.
* - `upper = true` : `e <= b + delta`
* - `upper = false` : `e >= b + delta`
*/
private predicate bestBound(SemExpr e, SemBound b, int delta, boolean upper) {
delta = min(int d | bounded(e, b, d, upper, _, _, _)) and upper = true
or
delta = max(int d | bounded(e, b, d, upper, _, _, _)) and upper = false
}
/**
* Holds if `comp` corresponds to:
* - `upper = true` : `v <= e + delta` or `v < e + delta`
* - `upper = false` : `v >= e + delta` or `v > e + delta`
*/
private predicate boundCondition(
SemRelationalExpr comp, SemSsaVariable v, SemExpr e, int delta, boolean upper
) {
comp.getLesserOperand() = semSsaRead(v, delta) and e = comp.getGreaterOperand() and upper = true
or
comp.getGreaterOperand() = semSsaRead(v, delta) and e = comp.getLesserOperand() and upper = false
or
exists(SemSubExpr sub, SemConstantIntegerExpr c, int d |
// (v - d) - e < c
comp.getLesserOperand() = sub and
comp.getGreaterOperand() = c and
sub.getLeftOperand() = semSsaRead(v, d) and
sub.getRightOperand() = e and
upper = true and
delta = d + c.getIntValue()
or
// (v - d) - e > c
comp.getGreaterOperand() = sub and
comp.getLesserOperand() = c and
sub.getLeftOperand() = semSsaRead(v, d) and
sub.getRightOperand() = e and
upper = false and
delta = d + c.getIntValue()
or
// e - (v - d) < c
comp.getLesserOperand() = sub and
comp.getGreaterOperand() = c and
sub.getLeftOperand() = e and
sub.getRightOperand() = semSsaRead(v, d) and
upper = false and
delta = d - c.getIntValue()
or
// e - (v - d) > c
comp.getGreaterOperand() = sub and
comp.getLesserOperand() = c and
sub.getLeftOperand() = e and
sub.getRightOperand() = semSsaRead(v, d) and
upper = true and
delta = d - c.getIntValue()
)
}
/**
* Holds if `comp` is a comparison between `x` and `y` for which `y - x` has a
* fixed value modulo some `mod > 1`, such that the comparison can be
* strengthened by `strengthen` when evaluating to `testIsTrue`.
*/
private predicate modulusComparison(SemRelationalExpr comp, boolean testIsTrue, int strengthen) {
exists(
SemBound b, int v1, int v2, int mod1, int mod2, int mod, boolean resultIsStrict, int d, int k
|
// If `x <= y` and `x =(mod) b + v1` and `y =(mod) b + v2` then
// `0 <= y - x =(mod) v2 - v1`. By choosing `k =(mod) v2 - v1` with
// `0 <= k < mod` we get `k <= y - x`. If the resulting comparison is
// strict then the strengthening amount is instead `k - 1` modulo `mod`:
// `x < y` means `0 <= y - x - 1 =(mod) k - 1` so `k - 1 <= y - x - 1` and
// thus `k - 1 < y - x` with `0 <= k - 1 < mod`.
semExprModulus(comp.getLesserOperand(), b, v1, mod1) and
semExprModulus(comp.getGreaterOperand(), b, v2, mod2) and
mod = mod1.gcd(mod2) and
mod != 1 and
(testIsTrue = true or testIsTrue = false) and
(
if comp.isStrict()
then resultIsStrict = testIsTrue
else resultIsStrict = testIsTrue.booleanNot()
) and
(
resultIsStrict = true and d = 1
or
resultIsStrict = false and d = 0
) and
(
testIsTrue = true and k = v2 - v1
or
testIsTrue = false and k = v1 - v2
) and
strengthen = (((k - d) % mod) + mod) % mod
)
}
/**
* Gets a condition that tests whether `v` is bounded by `e + delta`.
*
* If the condition evaluates to `testIsTrue`:
* - `upper = true` : `v <= e + delta`
* - `upper = false` : `v >= e + delta`
*/
private SemGuard boundFlowCond(
SemSsaVariable v, SemExpr e, int delta, boolean upper, boolean testIsTrue
) {
exists(
SemRelationalExpr comp, int d1, int d2, int d3, int strengthen, boolean compIsUpper,
boolean resultIsStrict
|
comp = result.asExpr() and
boundCondition(comp, v, e, d1, compIsUpper) and
(testIsTrue = true or testIsTrue = false) and
upper = compIsUpper.booleanXor(testIsTrue.booleanNot()) and
(
if comp.isStrict()
then resultIsStrict = testIsTrue
else resultIsStrict = testIsTrue.booleanNot()
) and
(
if getTrackedTypeForSsaVariable(v) instanceof SemIntegerType
then
upper = true and strengthen = -1
or
upper = false and strengthen = 1
else strengthen = 0
) and
(
exists(int k | modulusComparison(comp, testIsTrue, k) and d2 = strengthen * k)
or
not modulusComparison(comp, testIsTrue, _) and d2 = 0
) and
// A strict inequality `x < y` can be strengthened to `x <= y - 1`.
(
resultIsStrict = true and d3 = strengthen
or
resultIsStrict = false and d3 = 0
) and
delta = d1 + d2 + d3
)
or
exists(boolean testIsTrue0 |
semImplies_v2(result, testIsTrue, boundFlowCond(v, e, delta, upper, testIsTrue0), testIsTrue0)
)
or
result = semEqFlowCond(v, e, delta, true, testIsTrue) and
(upper = true or upper = false)
or
// guard that tests whether `v2` is bounded by `e + delta + d1 - d2` and
// exists a guard `guardEq` such that `v = v2 - d1 + d2`.
exists(SemSsaVariable v2, SemGuard guardEq, boolean eqIsTrue, int d1, int d2 |
guardEq = semEqFlowCond(v, semSsaRead(v2, d1), d2, true, eqIsTrue) and
result = boundFlowCond(v2, e, delta + d1 - d2, upper, testIsTrue) and
// guardEq needs to control guard
guardEq.directlyControls(result.getBasicBlock(), eqIsTrue)
)
}
private newtype TSemReason =
TSemNoReason() or
TSemCondReason(SemGuard guard) { possibleReason(guard) }
/**
* A reason for an inferred bound. This can either be `CondReason` if the bound
* is due to a specific condition, or `NoReason` if the bound is inferred
* without going through a bounding condition.
*/
abstract class SemReason extends TSemReason {
/** Gets a textual representation of this reason. */
abstract string toString();
}
/**
* A reason for an inferred bound that indicates that the bound is inferred
* without going through a bounding condition.
*/
class SemNoReason extends SemReason, TSemNoReason {
override string toString() { result = "NoReason" }
}
/** A reason for an inferred bound pointing to a condition. */
class SemCondReason extends SemReason, TSemCondReason {
/** Gets the condition that is the reason for the bound. */
SemGuard getCond() { this = TSemCondReason(result) }
override string toString() { result = getCond().toString() }
}
/**
* Holds if `e + delta` is a valid bound for `v` at `pos`.
* - `upper = true` : `v <= e + delta`
* - `upper = false` : `v >= e + delta`
*/
private predicate boundFlowStepSsa(
SemSsaVariable v, SemSsaReadPosition pos, SemExpr e, int delta, boolean upper, SemReason reason
) {
semSsaUpdateStep(v, e, delta) and
pos.hasReadOfVar(v) and
(upper = true or upper = false) and
reason = TSemNoReason()
or
exists(SemGuard guard, boolean testIsTrue |
pos.hasReadOfVar(v) and
guard = boundFlowCond(v, e, delta, upper, testIsTrue) and
semGuardDirectlyControlsSsaRead(guard, pos, testIsTrue) and
reason = TSemCondReason(guard)
)
}
/** Holds if `v != e + delta` at `pos` and `v` is of integral type. */
private predicate unequalFlowStepIntegralSsa(
SemSsaVariable v, SemSsaReadPosition pos, SemExpr e, int delta, SemReason reason
) {
getTrackedTypeForSsaVariable(v) instanceof SemIntegerType and
exists(SemGuard guard, boolean testIsTrue |
pos.hasReadOfVar(v) and
guard = semEqFlowCond(v, e, delta, false, testIsTrue) and
semGuardDirectlyControlsSsaRead(guard, pos, testIsTrue) and
reason = TSemCondReason(guard)
)
}
/**
* An expression that does conversion, boxing, or unboxing
*/
private class ConvertOrBoxExpr extends SemUnaryExpr {
ConvertOrBoxExpr() {
this instanceof SemConvertExpr
or
this instanceof SemBoxExpr
or
this instanceof SemUnboxExpr
}
}
/**
* A cast that can be ignored for the purpose of range analysis.
*/
private class SafeCastExpr extends ConvertOrBoxExpr {
SafeCastExpr() { conversionCannotOverflow(getTrackedType(getOperand()), getTrackedType(this)) }
}
/**
* Holds if `typ` is a small integral type with the given lower and upper bounds.
*/
private predicate typeBound(SemIntegerType typ, int lowerbound, int upperbound) {
exists(int bitSize | bitSize = typ.getByteSize() * 8 |
bitSize < 32 and
(
if typ.isSigned()
then (
upperbound = 1.bitShiftLeft(bitSize - 1) - 1 and
lowerbound = -upperbound - 1
) else (
lowerbound = 0 and
upperbound = 1.bitShiftLeft(bitSize) - 1
)
)
)
}
/**
* A cast to a small integral type that may overflow or underflow.
*/
private class NarrowingCastExpr extends ConvertOrBoxExpr {
NarrowingCastExpr() {
not this instanceof SafeCastExpr and
typeBound(getTrackedType(this), _, _)
}
/** Gets the lower bound of the resulting type. */
int getLowerBound() { typeBound(getTrackedType(this), result, _) }
/** Gets the upper bound of the resulting type. */
int getUpperBound() { typeBound(getTrackedType(this), _, result) }
}
/** Holds if `e >= 1` as determined by sign analysis. */
private predicate strictlyPositiveIntegralExpr(SemExpr e) {
semStrictlyPositive(e) and getTrackedType(e) instanceof SemIntegerType
}
/** Holds if `e <= -1` as determined by sign analysis. */
private predicate strictlyNegativeIntegralExpr(SemExpr e) {
semStrictlyNegative(e) and getTrackedType(e) instanceof SemIntegerType
}
/**
* Holds if `e1 + delta` is a valid bound for `e2`.
* - `upper = true` : `e2 <= e1 + delta`
* - `upper = false` : `e2 >= e1 + delta`
*/
private predicate boundFlowStep(SemExpr e2, SemExpr e1, int delta, boolean upper) {
semValueFlowStep(e2, e1, delta) and
(upper = true or upper = false)
or
e2.(SafeCastExpr).getOperand() = e1 and
delta = 0 and
(upper = true or upper = false)
or
exists(SemExpr x | e2.(SemAddExpr).hasOperands(e1, x) |
// `x instanceof ConstantIntegerExpr` is covered by valueFlowStep
not x instanceof SemConstantIntegerExpr and
not e1 instanceof SemConstantIntegerExpr and
if strictlyPositiveIntegralExpr(x)
then upper = false and delta = 1
else
if semPositive(x)
then upper = false and delta = 0
else
if strictlyNegativeIntegralExpr(x)
then upper = true and delta = -1
else
if semNegative(x)
then upper = true and delta = 0
else none()
)
or
exists(SemExpr x, SemSubExpr sub |
e2 = sub and
sub.getLeftOperand() = e1 and
sub.getRightOperand() = x
|
// `x instanceof ConstantIntegerExpr` is covered by valueFlowStep
not x instanceof SemConstantIntegerExpr and
if strictlyPositiveIntegralExpr(x)
then upper = true and delta = -1
else
if semPositive(x)
then upper = true and delta = 0
else
if strictlyNegativeIntegralExpr(x)
then upper = false and delta = 1
else
if semNegative(x)
then upper = false and delta = 0
else none()
)
or
e2.(SemRemExpr).getRightOperand() = e1 and
semPositive(e1) and
delta = -1 and
upper = true
or
e2.(SemRemExpr).getLeftOperand() = e1 and semPositive(e1) and delta = 0 and upper = true
or
e2.(SemBitAndExpr).getAnOperand() = e1 and
semPositive(e1) and
delta = 0 and
upper = true
or
e2.(SemBitOrExpr).getAnOperand() = e1 and
semPositive(e2) and
delta = 0 and
upper = false
or
Specific::hasBound(e2, e1, delta, upper)
}
/** Holds if `e2 = e1 * factor` and `factor > 0`. */
private predicate boundFlowStepMul(SemExpr e2, SemExpr e1, int factor) {
exists(SemConstantIntegerExpr c, int k | k = c.getIntValue() and k > 0 |
e2.(SemMulExpr).hasOperands(e1, c) and factor = k
or
exists(SemShiftLeftExpr e |
e = e2 and e.getLeftOperand() = e1 and e.getRightOperand() = c and factor = 2.pow(k)
)
)
}
/**
* Holds if `e2 = e1 / factor` and `factor > 0`.
*
* This conflates division, right shift, and unsigned right shift and is
* therefore only valid for non-negative numbers.
*/
private predicate boundFlowStepDiv(SemExpr e2, SemExpr e1, int factor) {
exists(SemConstantIntegerExpr c, int k | k = c.getIntValue() and k > 0 |
exists(SemDivExpr e |
e = e2 and e.getLeftOperand() = e1 and e.getRightOperand() = c and factor = k
)
or
exists(SemShiftRightExpr e |
e = e2 and e.getLeftOperand() = e1 and e.getRightOperand() = c and factor = 2.pow(k)
)
or
exists(SemShiftRightUnsignedExpr e |
e = e2 and e.getLeftOperand() = e1 and e.getRightOperand() = c and factor = 2.pow(k)
)
)
}
/**
* Holds if `b + delta` is a valid bound for `v` at `pos`.
* - `upper = true` : `v <= b + delta`
* - `upper = false` : `v >= b + delta`
*/
private predicate boundedSsa(
SemSsaVariable v, SemSsaReadPosition pos, SemBound b, int delta, boolean upper,
boolean fromBackEdge, int origdelta, SemReason reason
) {
exists(SemExpr mid, int d1, int d2, SemReason r1, SemReason r2 |
boundFlowStepSsa(v, pos, mid, d1, upper, r1) and
bounded(mid, b, d2, upper, fromBackEdge, origdelta, r2) and
// upper = true: v <= mid + d1 <= b + d1 + d2 = b + delta
// upper = false: v >= mid + d1 >= b + d1 + d2 = b + delta
delta = d1 + d2 and
(if r1 instanceof SemNoReason then reason = r2 else reason = r1)
)
or
exists(int d, SemReason r1, SemReason r2 |
boundedSsa(v, pos, b, d, upper, fromBackEdge, origdelta, r2) or
boundedPhi(v, b, d, upper, fromBackEdge, origdelta, r2)
|
unequalIntegralSsa(v, pos, b, d, r1) and
(
upper = true and delta = d - 1
or
upper = false and delta = d + 1
) and
(
reason = r1
or
reason = r2 and not r2 instanceof SemNoReason
)
)
}
/**
* Holds if `v != b + delta` at `pos` and `v` is of integral type.
*/
private predicate unequalIntegralSsa(
SemSsaVariable v, SemSsaReadPosition pos, SemBound b, int delta, SemReason reason
) {
exists(SemExpr e, int d1, int d2 |
unequalFlowStepIntegralSsa(v, pos, e, d1, reason) and
bounded(e, b, d2, true, _, _, _) and
bounded(e, b, d2, false, _, _, _) and
delta = d2 + d1
)
}
/** Weakens a delta to lie in the range `[-1..1]`. */
bindingset[delta, upper]
private int weakenDelta(boolean upper, int delta) {
delta in [-1 .. 1] and result = delta
or
upper = true and result = -1 and delta < -1
or
upper = false and result = 1 and delta > 1
}
/**
* Holds if `b + delta` is a valid bound for `inp` when used as an input to
* `phi` along `edge`.
* - `upper = true` : `inp <= b + delta`
* - `upper = false` : `inp >= b + delta`
*/
private predicate boundedPhiInp(
SemSsaPhiNode phi, SemSsaVariable inp, SemSsaReadPositionPhiInputEdge edge, SemBound b, int delta,
boolean upper, boolean fromBackEdge, int origdelta, SemReason reason
) {
edge.phiInput(phi, inp) and
exists(int d, boolean fromBackEdge0 |
boundedSsa(inp, edge, b, d, upper, fromBackEdge0, origdelta, reason)
or
boundedPhi(inp, b, d, upper, fromBackEdge0, origdelta, reason)
or
b.(SemSsaBound).getAVariable() = inp and
d = 0 and
(upper = true or upper = false) and
fromBackEdge0 = false and
origdelta = 0 and
reason = TSemNoReason()
|
if semBackEdge(phi, inp, edge)
then
fromBackEdge = true and
(
fromBackEdge0 = true and delta = weakenDelta(upper, d - origdelta) + origdelta
or
fromBackEdge0 = false and delta = d
)
else (
delta = d and fromBackEdge = fromBackEdge0
)
)
}
/**
* Holds if `b + delta` is a valid bound for `inp` when used as an input to
* `phi` along `edge`.
* - `upper = true` : `inp <= b + delta`
* - `upper = false` : `inp >= b + delta`
*
* Equivalent to `boundedPhiInp(phi, inp, edge, b, delta, upper, _, _, _)`.
*/
pragma[noinline]
private predicate boundedPhiInp1(
SemSsaPhiNode phi, SemBound b, boolean upper, SemSsaVariable inp,
SemSsaReadPositionPhiInputEdge edge, int delta
) {
boundedPhiInp(phi, inp, edge, b, delta, upper, _, _, _)
}
/**
* Holds if `phi` is a valid bound for `inp` when used as an input to `phi`
* along `edge`.
* - `upper = true` : `inp <= phi`
* - `upper = false` : `inp >= phi`
*/
private predicate selfBoundedPhiInp(
SemSsaPhiNode phi, SemSsaVariable inp, SemSsaReadPositionPhiInputEdge edge, boolean upper
) {
exists(int d, SemSsaBound phibound |
phibound.getAVariable() = phi and
boundedPhiInp(phi, inp, edge, phibound, d, upper, _, _, _) and
(
upper = true and d <= 0
or
upper = false and d >= 0
)
)
}
/**
* Holds if `b + delta` is a valid bound for some input, `inp`, to `phi`, and
* thus a candidate bound for `phi`.
* - `upper = true` : `inp <= b + delta`
* - `upper = false` : `inp >= b + delta`
*/
pragma[noinline]
private predicate boundedPhiCand(
SemSsaPhiNode phi, boolean upper, SemBound b, int delta, boolean fromBackEdge, int origdelta,
SemReason reason
) {
exists(SemSsaVariable inp, SemSsaReadPositionPhiInputEdge edge |
boundedPhiInp(phi, inp, edge, b, delta, upper, fromBackEdge, origdelta, reason)
)
}
/**
* Holds if the candidate bound `b + delta` for `phi` is valid for the phi input
* `inp` along `edge`.
*/
private predicate boundedPhiCandValidForEdge(
SemSsaPhiNode phi, SemBound b, int delta, boolean upper, boolean fromBackEdge, int origdelta,
SemReason reason, SemSsaVariable inp, SemSsaReadPositionPhiInputEdge edge
) {
boundedPhiCand(phi, upper, b, delta, fromBackEdge, origdelta, reason) and
(
exists(int d | boundedPhiInp1(phi, b, upper, inp, edge, d) | upper = true and d <= delta)
or
exists(int d | boundedPhiInp1(phi, b, upper, inp, edge, d) | upper = false and d >= delta)
or
selfBoundedPhiInp(phi, inp, edge, upper)
)
}
/**
* Holds if `b + delta` is a valid bound for `phi`.
* - `upper = true` : `phi <= b + delta`
* - `upper = false` : `phi >= b + delta`
*/
private predicate boundedPhi(
SemSsaPhiNode phi, SemBound b, int delta, boolean upper, boolean fromBackEdge, int origdelta,
SemReason reason
) {
forex(SemSsaVariable inp, SemSsaReadPositionPhiInputEdge edge | edge.phiInput(phi, inp) |
boundedPhiCandValidForEdge(phi, b, delta, upper, fromBackEdge, origdelta, reason, inp, edge)
)
}
/**
* Holds if `e` has an upper (for `upper = true`) or lower
* (for `upper = false`) bound of `b`.
*/
private predicate baseBound(SemExpr e, int b, boolean upper) {
Specific::hasConstantBound(e, b, upper)
or
upper = false and
b = 0 and
semPositive(e.(SemBitAndExpr).getAnOperand()) and
// REVIEW: We let the language opt out here to preserve original results.
not Specific::ignoreZeroLowerBound(e)
}
/**
* Holds if the value being cast has an upper (for `upper = true`) or lower
* (for `upper = false`) bound within the bounds of the resulting type.
* For `upper = true` this means that the cast will not overflow and for
* `upper = false` this means that the cast will not underflow.
*/
private predicate safeNarrowingCast(NarrowingCastExpr cast, boolean upper) {
exists(int bound | bounded(cast.getOperand(), any(SemZeroBound zb), bound, upper, _, _, _) |
upper = true and bound <= cast.getUpperBound()
or
upper = false and bound >= cast.getLowerBound()
)
}
pragma[noinline]
private predicate boundedCastExpr(
NarrowingCastExpr cast, SemBound b, int delta, boolean upper, boolean fromBackEdge, int origdelta,
SemReason reason
) {
bounded(cast.getOperand(), b, delta, upper, fromBackEdge, origdelta, reason)
}
/**
* Holds if `b + delta` is a valid bound for `e`.
* - `upper = true` : `e <= b + delta`
* - `upper = false` : `e >= b + delta`
*/
private predicate bounded(
SemExpr e, SemBound b, int delta, boolean upper, boolean fromBackEdge, int origdelta,
SemReason reason
) {
not Specific::ignoreExprBound(e) and
(
e = b.getExpr(delta) and
(upper = true or upper = false) and
fromBackEdge = false and
origdelta = delta and
reason = TSemNoReason()
or
baseBound(e, delta, upper) and
b instanceof SemZeroBound and
fromBackEdge = false and
origdelta = delta and
reason = TSemNoReason()
or
exists(SemSsaVariable v, SemSsaReadPositionBlock bb |
boundedSsa(v, bb, b, delta, upper, fromBackEdge, origdelta, reason) and
e = v.getAUse() and
bb.getBlock() = e.getBasicBlock()
)
or
exists(SemExpr mid, int d1, int d2 |
boundFlowStep(e, mid, d1, upper) and
// Constants have easy, base-case bounds, so let's not infer any recursive bounds.
not e instanceof SemConstantIntegerExpr and
bounded(mid, b, d2, upper, fromBackEdge, origdelta, reason) and
// upper = true: e <= mid + d1 <= b + d1 + d2 = b + delta
// upper = false: e >= mid + d1 >= b + d1 + d2 = b + delta
delta = d1 + d2
)
or
exists(SemSsaPhiNode phi |
boundedPhi(phi, b, delta, upper, fromBackEdge, origdelta, reason) and
e = phi.getAUse()
)
or
exists(SemExpr mid, int factor, int d |
boundFlowStepMul(e, mid, factor) and
not e instanceof SemConstantIntegerExpr and
bounded(mid, b, d, upper, fromBackEdge, origdelta, reason) and
b instanceof SemZeroBound and
delta = d * factor
)
or
exists(SemExpr mid, int factor, int d |
boundFlowStepDiv(e, mid, factor) and
not e instanceof SemConstantIntegerExpr and
bounded(mid, b, d, upper, fromBackEdge, origdelta, reason) and
b instanceof SemZeroBound and
d >= 0 and
delta = d / factor
)
or
exists(NarrowingCastExpr cast |
cast = e and
safeNarrowingCast(cast, upper.booleanNot()) and
boundedCastExpr(cast, b, delta, upper, fromBackEdge, origdelta, reason)
)
or
exists(
SemConditionalExpr cond, int d1, int d2, boolean fbe1, boolean fbe2, int od1, int od2,
SemReason r1, SemReason r2
|
cond = e and
boundedConditionalExpr(cond, b, upper, true, d1, fbe1, od1, r1) and
boundedConditionalExpr(cond, b, upper, false, d2, fbe2, od2, r2) and
(
delta = d1 and fromBackEdge = fbe1 and origdelta = od1 and reason = r1
or
delta = d2 and fromBackEdge = fbe2 and origdelta = od2 and reason = r2
)
|
upper = true and delta = d1.maximum(d2)
or
upper = false and delta = d1.minimum(d2)
)
)
}
private predicate boundedConditionalExpr(
SemConditionalExpr cond, SemBound b, boolean upper, boolean branch, int delta,
boolean fromBackEdge, int origdelta, SemReason reason
) {
bounded(cond.getBranchExpr(branch), b, delta, upper, fromBackEdge, origdelta, reason)
}

View File

@@ -0,0 +1,88 @@
/**
* C++-specific implementation of range analysis.
*/
private import experimental.semmle.code.cpp.semantic.Semantic
/**
* Holds if the specified expression should be excluded from the result of `ssaRead()`.
*
* This predicate is to keep the results identical to the original Java implementation. It should be
* removed once we have the new implementation matching the old results exactly.
*/
predicate ignoreSsaReadCopy(SemExpr e) { none() }
/**
* Ignore the bound on this expression.
*
* This predicate is to keep the results identical to the original Java implementation. It should be
* removed once we have the new implementation matching the old results exactly.
*/
predicate ignoreExprBound(SemExpr e) { none() }
/**
* Ignore any inferred zero lower bound on this expression.
*
* This predicate is to keep the results identical to the original Java implementation. It should be
* removed once we have the new implementation matching the old results exactly.
*/
predicate ignoreZeroLowerBound(SemExpr e) { none() }
/**
* Holds if the specified expression should be excluded from the result of `ssaRead()`.
*
* This predicate is to keep the results identical to the original Java implementation. It should be
* removed once we have the new implementation matching the old results exactly.
*/
predicate ignoreSsaReadArithmeticExpr(SemExpr e) { none() }
/**
* Holds if the specified variable should be excluded from the result of `ssaRead()`.
*
* This predicate is to keep the results identical to the original Java implementation. It should be
* removed once we have the new implementation matching the old results exactly.
*/
predicate ignoreSsaReadAssignment(SemSsaVariable v) { none() }
/**
* Adds additional results to `ssaRead()` that are specific to Java.
*
* This predicate handles propagation of offsets for post-increment and post-decrement expressions
* in exactly the same way as the old Java implementation. Once the new implementation matches the
* old one, we should remove this predicate and propagate deltas for all similar patterns, whether
* or not they come from a post-increment/decrement expression.
*/
SemExpr specificSsaRead(SemSsaVariable v, int delta) { none() }
/**
* Holds if `e >= bound` (if `upper = false`) or `e <= bound` (if `upper = true`).
*/
predicate hasConstantBound(SemExpr e, int bound, boolean upper) { none() }
/**
* Holds if `e >= bound + delta` (if `upper = false`) or `e <= bound + delta` (if `upper = true`).
*/
predicate hasBound(SemExpr e, SemExpr bound, int delta, boolean upper) { none() }
/**
* Holds if the value of `dest` is known to be `src + delta`.
*/
predicate additionalValueFlowStep(SemExpr dest, SemExpr src, int delta) { none() }
/**
* Gets the type that range analysis should use to track the result of the specified expression,
* if a type other than the original type of the expression is to be used.
*
* This predicate is commonly used in languages that support immutable "boxed" types that are
* actually references but whose values can be tracked as the type contained in the box.
*/
SemType getAlternateType(SemExpr e) { none() }
/**
* Gets the type that range analysis should use to track the result of the specified source
* variable, if a type other than the original type of the expression is to be used.
*
* This predicate is commonly used in languages that support immutable "boxed" types that are
* actually references but whose values can be tracked as the type contained in the box.
*/
SemType getAlternateTypeForSsaVariable(SemSsaVariable var) { none() }

View File

@@ -0,0 +1,135 @@
/**
* Provides utility predicates for range analysis.
*/
private import experimental.semmle.code.cpp.semantic.Semantic
private import RangeAnalysisSpecific as Specific
private import ConstantAnalysis
/**
* Gets an expression that equals `v - d`.
*/
SemExpr semSsaRead(SemSsaVariable v, int delta) {
// There are various language-specific extension points that can be removed once we no longer
// expect to match the original Java implementation's results exactly.
result = v.getAUse() and delta = 0
or
exists(int d1, SemConstantIntegerExpr c |
result.(SemAddExpr).hasOperands(semSsaRead(v, d1), c) and
delta = d1 - c.getIntValue() and
not Specific::ignoreSsaReadArithmeticExpr(result)
)
or
exists(SemSubExpr sub, int d1, SemConstantIntegerExpr c |
result = sub and
sub.getLeftOperand() = semSsaRead(v, d1) and
sub.getRightOperand() = c and
delta = d1 + c.getIntValue() and
not Specific::ignoreSsaReadArithmeticExpr(result)
)
or
result = v.(SemSsaExplicitUpdate).getSourceExpr() and
delta = 0 and
not Specific::ignoreSsaReadAssignment(v)
or
result = Specific::specificSsaRead(v, delta)
or
result.(SemCopyValueExpr).getOperand() = semSsaRead(v, delta) and
not Specific::ignoreSsaReadCopy(result)
or
result.(SemStoreExpr).getOperand() = semSsaRead(v, delta)
}
/**
* Gets a condition that tests whether `v` equals `e + delta`.
*
* If the condition evaluates to `testIsTrue`:
* - `isEq = true` : `v == e + delta`
* - `isEq = false` : `v != e + delta`
*/
SemGuard semEqFlowCond(SemSsaVariable v, SemExpr e, int delta, boolean isEq, boolean testIsTrue) {
exists(boolean eqpolarity |
result.isEquality(semSsaRead(v, delta), e, eqpolarity) and
(testIsTrue = true or testIsTrue = false) and
eqpolarity.booleanXor(testIsTrue).booleanNot() = isEq
)
or
exists(boolean testIsTrue0 |
semImplies_v2(result, testIsTrue, semEqFlowCond(v, e, delta, isEq, testIsTrue0), testIsTrue0)
)
}
/**
* Holds if `v` is an `SsaExplicitUpdate` that equals `e + delta`.
*/
predicate semSsaUpdateStep(SemSsaExplicitUpdate v, SemExpr e, int delta) {
exists(SemExpr defExpr | defExpr = v.getSourceExpr() |
defExpr.(SemCopyValueExpr).getOperand() = e and delta = 0
or
defExpr.(SemStoreExpr).getOperand() = e and delta = 0
or
defExpr.(SemAddOneExpr).getOperand() = e and delta = 1
or
defExpr.(SemSubOneExpr).getOperand() = e and delta = -1
or
e = defExpr and
not (
defExpr instanceof SemCopyValueExpr or
defExpr instanceof SemStoreExpr or
defExpr instanceof SemAddOneExpr or
defExpr instanceof SemSubOneExpr
) and
delta = 0
)
}
/**
* Holds if `e1 + delta` equals `e2`.
*/
predicate semValueFlowStep(SemExpr e2, SemExpr e1, int delta) {
e2.(SemCopyValueExpr).getOperand() = e1 and delta = 0
or
e2.(SemStoreExpr).getOperand() = e1 and delta = 0
or
e2.(SemAddOneExpr).getOperand() = e1 and delta = 1
or
e2.(SemSubOneExpr).getOperand() = e1 and delta = -1
or
Specific::additionalValueFlowStep(e2, e1, delta)
or
exists(SemExpr x | e2.(SemAddExpr).hasOperands(e1, x) |
x.(SemConstantIntegerExpr).getIntValue() = delta
)
or
exists(SemExpr x, SemSubExpr sub |
e2 = sub and
sub.getLeftOperand() = e1 and
sub.getRightOperand() = x
|
x.(SemConstantIntegerExpr).getIntValue() = -delta
)
}
/**
* Gets the type used to track the specified expression's range information.
*
* Usually, this just `e.getSemType()`, but the language can override this to track immutable boxed
* primitive types as the underlying primitive type.
*/
SemType getTrackedType(SemExpr e) {
result = Specific::getAlternateType(e)
or
not exists(Specific::getAlternateType(e)) and result = e.getSemType()
}
/**
* Gets the type used to track the specified source variable's range information.
*
* Usually, this just `e.getType()`, but the language can override this to track immutable boxed
* primitive types as the underlying primitive type.
*/
SemType getTrackedTypeForSsaVariable(SemSsaVariable var) {
result = Specific::getAlternateTypeForSsaVariable(var)
or
not exists(Specific::getAlternateTypeForSsaVariable(var)) and result = var.getType()
}

View File

@@ -0,0 +1,267 @@
private import experimental.semmle.code.cpp.semantic.Semantic
newtype TSign =
TNeg() or
TZero() or
TPos()
/** Class representing expression signs (+, -, 0). */
class Sign extends TSign {
/** Gets the string representation of this sign. */
string toString() {
result = "-" and this = TNeg()
or
result = "0" and this = TZero()
or
result = "+" and this = TPos()
}
/** Gets a possible sign after incrementing an expression that has this sign. */
Sign inc() {
this = TNeg() and result = TNeg()
or
this = TNeg() and result = TZero()
or
this = TZero() and result = TPos()
or
this = TPos() and result = TPos()
}
/** Gets a possible sign after decrementing an expression that has this sign. */
Sign dec() { result.inc() = this }
/** Gets a possible sign after negating an expression that has this sign. */
Sign neg() {
this = TNeg() and result = TPos()
or
this = TZero() and result = TZero()
or
this = TPos() and result = TNeg()
}
/**
* Gets a possible sign after bitwise complementing an expression that has this
* sign.
*/
Sign bitnot() {
this = TNeg() and result = TPos()
or
this = TNeg() and result = TZero()
or
this = TZero() and result = TNeg()
or
this = TPos() and result = TNeg()
}
/**
* Gets a possible sign after adding an expression with sign `s` to an expression
* that has this sign.
*/
Sign add(Sign s) {
this = TZero() and result = s
or
s = TZero() and result = this
or
this = s and this = result
or
this = TPos() and s = TNeg()
or
this = TNeg() and s = TPos()
}
/**
* Gets a possible sign after subtracting an expression with sign `s` from an expression
* that has this sign.
*/
Sign sub(Sign s) { result = add(s.neg()) }
/**
* Gets a possible sign after multiplying an expression with sign `s` to an expression
* that has this sign.
*/
Sign mul(Sign s) {
result = TZero() and this = TZero()
or
result = TZero() and s = TZero()
or
result = TNeg() and this = TPos() and s = TNeg()
or
result = TNeg() and this = TNeg() and s = TPos()
or
result = TPos() and this = TPos() and s = TPos()
or
result = TPos() and this = TNeg() and s = TNeg()
}
/**
* Gets a possible sign after integer dividing an expression that has this sign
* by an expression with sign `s`.
*/
Sign div(Sign s) {
result = TZero() and s = TNeg() // ex: 3 / -5 = 0
or
result = TZero() and s = TPos() // ex: 3 / 5 = 0
or
result = TNeg() and this = TPos() and s = TNeg()
or
result = TNeg() and this = TNeg() and s = TPos()
or
result = TPos() and this = TPos() and s = TPos()
or
result = TPos() and this = TNeg() and s = TNeg()
}
/**
* Gets a possible sign after modulo dividing an expression that has this sign
* by an expression with sign `s`.
*/
Sign rem(Sign s) {
result = TZero() and s = TNeg()
or
result = TZero() and s = TPos()
or
result = this and s = TNeg()
or
result = this and s = TPos()
}
/**
* Gets a possible sign after bitwise `and` of an expression that has this sign
* and an expression with sign `s`.
*/
Sign bitand(Sign s) {
result = TZero() and this = TZero()
or
result = TZero() and s = TZero()
or
result = TZero() and this = TPos()
or
result = TZero() and s = TPos()
or
result = TNeg() and this = TNeg() and s = TNeg()
or
result = TPos() and this = TNeg() and s = TPos()
or
result = TPos() and this = TPos() and s = TNeg()
or
result = TPos() and this = TPos() and s = TPos()
}
/**
* Gets a possible sign after bitwise `or` of an expression that has this sign
* and an expression with sign `s`.
*/
Sign bitor(Sign s) {
result = TZero() and this = TZero() and s = TZero()
or
result = TNeg() and this = TNeg()
or
result = TNeg() and s = TNeg()
or
result = TPos() and this = TPos() and s = TZero()
or
result = TPos() and this = TZero() and s = TPos()
or
result = TPos() and this = TPos() and s = TPos()
}
/**
* Gets a possible sign after bitwise `xor` of an expression that has this sign
* and an expression with sign `s`.
*/
Sign bitxor(Sign s) {
result = TZero() and this = s
or
result = this and s = TZero()
or
result = s and this = TZero()
or
result = TPos() and this = TPos() and s = TPos()
or
result = TNeg() and this = TNeg() and s = TPos()
or
result = TNeg() and this = TPos() and s = TNeg()
or
result = TPos() and this = TNeg() and s = TNeg()
}
/**
* Gets a possible sign after left shift of an expression that has this sign
* by an expression with sign `s`.
*/
Sign lshift(Sign s) {
result = TZero() and this = TZero()
or
result = this and s = TZero()
or
this != TZero() and s != TZero()
}
/**
* Gets a possible sign after right shift of an expression that has this sign
* by an expression with sign `s`.
*/
Sign rshift(Sign s) {
result = TZero() and this = TZero()
or
result = this and s = TZero()
or
result = TNeg() and this = TNeg()
or
result != TNeg() and this = TPos() and s != TZero()
}
/**
* Gets a possible sign after unsigned right shift of an expression that has
* this sign by an expression with sign `s`.
*/
Sign urshift(Sign s) {
result = TZero() and this = TZero()
or
result = this and s = TZero()
or
result != TZero() and this = TNeg() and s != TZero()
or
result != TNeg() and this = TPos() and s != TZero()
}
/** Perform `op` on this sign. */
Sign applyUnaryOp(Opcode op) {
op instanceof Opcode::CopyValue and result = this
or
op instanceof Opcode::Store and result = this
or
op instanceof Opcode::AddOne and result = inc()
or
op instanceof Opcode::SubOne and result = dec()
or
op instanceof Opcode::Negate and result = neg()
or
op instanceof Opcode::BitComplement and result = bitnot()
}
/** Perform `op` on this sign and sign `s`. */
Sign applyBinaryOp(Sign s, Opcode op) {
op instanceof Opcode::Add and result = add(s)
or
op instanceof Opcode::Sub and result = sub(s)
or
op instanceof Opcode::Mul and result = mul(s)
or
op instanceof Opcode::Div and result = div(s)
or
op instanceof Opcode::Rem and result = rem(s)
or
op instanceof Opcode::BitAnd and result = bitand(s)
or
op instanceof Opcode::BitOr and result = bitor(s)
or
op instanceof Opcode::BitXor and result = bitxor(s)
or
op instanceof Opcode::ShiftLeft and result = lshift(s)
or
op instanceof Opcode::ShiftRight and result = rshift(s)
or
op instanceof Opcode::ShiftRightUnsigned and result = urshift(s)
}
}

View File

@@ -0,0 +1,493 @@
/**
* Provides sign analysis to determine whether expression are always positive
* or negative.
*
* The analysis is implemented as an abstract interpretation over the
* three-valued domain `{negative, zero, positive}`.
*/
private import SignAnalysisSpecific as Specific
private import experimental.semmle.code.cpp.semantic.Semantic
private import ConstantAnalysis
private import RangeUtils
private import Sign
/**
* An SSA definition for which the analysis can compute the sign.
*
* The actual computation of the sign is done in an override of the `getSign()` predicate. The
* charpred of any subclass must _not_ invoke `getSign()`, directly or indirectly. This ensures
* that the charpred does not introduce negative recursion. The `getSign()` predicate may be
* recursive.
*/
abstract private class SignDef instanceof SemSsaVariable {
final string toString() { result = super.toString() }
/** Gets the possible signs of this SSA definition. */
abstract Sign getSign();
}
/** An SSA definition whose sign is computed based on standard flow. */
abstract private class FlowSignDef extends SignDef {
abstract override Sign getSign();
}
/** An SSA definition whose sign is determined by the sign of that definitions source expression. */
private class ExplicitSignDef extends FlowSignDef {
SemSsaExplicitUpdate update;
ExplicitSignDef() { update = this }
final override Sign getSign() { result = semExprSign(update.getSourceExpr()) }
}
/** An SSA Phi definition, whose sign is the union of the signs of its inputs. */
private class PhiSignDef extends FlowSignDef {
SemSsaPhiNode phi;
PhiSignDef() { phi = this }
final override Sign getSign() {
exists(SemSsaVariable inp, SemSsaReadPositionPhiInputEdge edge |
edge.phiInput(phi, inp) and
result = semSsaSign(inp, edge)
)
}
}
/** An SSA definition whose sign is computed by a language-specific implementation. */
abstract class CustomSignDef extends SignDef {
abstract override Sign getSign();
}
/**
* An expression for which the analysis can compute the sign.
*
* The actual computation of the sign is done in an override of the `getSign()` predicate. The
* charpred of any subclass must _not_ invoke `getSign()`, directly or indirectly. This ensures
* that the charpred does not introduce negative recursion. The `getSign()` predicate may be
* recursive.
*
* Concrete implementations extend one of the following subclasses:
* - `ConstantSignExpr`, for expressions with a compile-time constant value.
* - `FlowSignExpr`, for expressions whose sign can be computed from the signs of their operands.
* - `CustomsignExpr`, for expressions shose sign can be computed by a language-specific
* implementation.
*
* If the same expression matches more than one of the above subclasses, the sign is computed as
* follows:
* - The sign of a `ConstantSignExpr` is computed solely from `ConstantSignExpr.getSign()`,
* regardless of any other subclasses.
* - If a non-`ConstantSignExpr` expression matches exactly one of `FlowSignExpr` or
* `CustomSignExpr`, the sign is computed by that class' `getSign()` predicate.
* - If a non-`ConstantSignExpr` expression matches both `FlowSignExpr` and `CustomSignExpr`, the
* sign is the _intersection_ of the signs of those two classes' `getSign()` predicates. Thus,
* both classes have the opportunity to _restrict_ the set of possible signs, not to generate new
* possible signs.
* - If an expression does not match any of the three subclasses, then it can have any sign.
*
* Note that the `getSign()` predicate is introduced only in subclasses of `SignExpr`.
*/
abstract class SignExpr instanceof SemExpr {
SignExpr() { not Specific::ignoreExprSign(this) }
final string toString() { result = super.toString() }
abstract Sign getSign();
}
/** An expression whose sign is determined by its constant numeric value. */
private class ConstantSignExpr extends SignExpr {
ConstantSignExpr() {
this instanceof SemConstantIntegerExpr or
exists(this.(SemNumericLiteralExpr).getApproximateFloatValue())
}
final override Sign getSign() {
exists(int i | this.(SemConstantIntegerExpr).getIntValue() = i |
i < 0 and result = TNeg()
or
i = 0 and result = TZero()
or
i > 0 and result = TPos()
)
or
not exists(this.(SemConstantIntegerExpr).getIntValue()) and
exists(float f | f = this.(SemNumericLiteralExpr).getApproximateFloatValue() |
f < 0 and result = TNeg()
or
f = 0 and result = TZero()
or
f > 0 and result = TPos()
)
}
}
abstract private class NonConstantSignExpr extends SignExpr {
NonConstantSignExpr() { not this instanceof ConstantSignExpr }
final override Sign getSign() {
// The result is the _intersection_ of the signs computed from flow and by the language.
(result = this.(FlowSignExpr).getSignRestriction() or not this instanceof FlowSignExpr) and
(result = this.(CustomSignExpr).getSignRestriction() or not this instanceof CustomSignExpr)
}
}
/** An expression whose sign is computed from the signs of its operands. */
abstract private class FlowSignExpr extends NonConstantSignExpr {
abstract Sign getSignRestriction();
}
/** An expression whose sign is computed by a language-specific implementation. */
abstract class CustomSignExpr extends NonConstantSignExpr {
abstract Sign getSignRestriction();
}
/** An expression whose sign is unknown. */
private class UnknownSignExpr extends SignExpr {
UnknownSignExpr() {
not this instanceof FlowSignExpr and
not this instanceof CustomSignExpr and
not this instanceof ConstantSignExpr and
(
// Only track numeric types.
getTrackedType(this) instanceof SemNumericType
or
// Unless the language says to track this expression anyway.
Specific::trackUnknownNonNumericExpr(this)
)
}
final override Sign getSign() { semAnySign(result) }
}
/**
* A `Load` expression whose sign is computed from the sign of its SSA definition, restricted by
* inference from any intervening guards.
*/
class UseSignExpr extends FlowSignExpr {
SemSsaVariable v;
UseSignExpr() { v.getAUse() = this }
override Sign getSignRestriction() {
// Propagate via SSA
// Propagate the sign from the def of `v`, incorporating any inference from guards.
result = semSsaSign(v, any(SemSsaReadPositionBlock bb | bb.getAnExpr() = this))
or
// No block for this read. Just use the sign of the def.
// REVIEW: How can this happen?
not exists(SemSsaReadPositionBlock bb | bb.getAnExpr() = this) and
result = semSsaDefSign(v)
}
}
/** A binary expression whose sign is computed from the signs of its operands. */
private class BinarySignExpr extends FlowSignExpr {
SemBinaryExpr binary;
BinarySignExpr() { binary = this }
override Sign getSignRestriction() {
result =
semExprSign(binary.getLeftOperand())
.applyBinaryOp(semExprSign(binary.getRightOperand()), binary.getOpcode())
or
exists(SemDivExpr div | div = binary |
result = semExprSign(div.getLeftOperand()) and
result != TZero() and
div.getRightOperand().(SemFloatingPointLiteralExpr).getFloatValue() = 0
)
}
}
/**
* A `Convert`, `Box`, or `Unbox` expression.
*/
private class SemCastExpr extends SemUnaryExpr {
SemCastExpr() {
this instanceof SemConvertExpr
or
this instanceof SemBoxExpr
or
this instanceof SemUnboxExpr
}
}
/** A unary expression whose sign is computed from the sign of its operand. */
private class UnarySignExpr extends FlowSignExpr {
SemUnaryExpr unary;
UnarySignExpr() { unary = this and not this instanceof SemCastExpr }
override Sign getSignRestriction() {
result = semExprSign(unary.getOperand()).applyUnaryOp(unary.getOpcode())
}
}
/**
* A `Convert`, `Box`, or `Unbox` expression, whose sign is computed based on
* the sign of its operand and the source and destination types.
*/
abstract private class CastSignExpr extends FlowSignExpr {
SemUnaryExpr cast;
CastSignExpr() { cast = this and cast instanceof SemCastExpr }
override Sign getSignRestriction() { result = semExprSign(cast.getOperand()) }
}
/**
* A `Convert` expression.
*/
private class ConvertSignExpr extends CastSignExpr {
override SemConvertExpr cast;
}
/**
* A `Box` expression.
*/
private class BoxSignExpr extends CastSignExpr {
override SemBoxExpr cast;
}
/**
* An `Unbox` expression.
*/
private class UnboxSignExpr extends CastSignExpr {
override SemUnboxExpr cast;
UnboxSignExpr() {
exists(SemType fromType | fromType = getTrackedType(cast.getOperand()) |
// Only numeric source types are handled here.
fromType instanceof SemNumericType
)
}
}
private predicate unknownSign(SemExpr e) { e instanceof UnknownSignExpr }
/**
* Holds if `lowerbound` is a lower bound for `v` at `pos`. This is restricted
* to only include bounds for which we might determine a sign.
*/
private predicate lowerBound(
SemExpr lowerbound, SemSsaVariable v, SemSsaReadPosition pos, boolean isStrict
) {
exists(boolean testIsTrue, SemRelationalExpr comp |
pos.hasReadOfVar(v) and
semGuardControlsSsaRead(semGetComparisonGuard(comp), pos, testIsTrue) and
not unknownSign(lowerbound)
|
testIsTrue = true and
comp.getLesserOperand() = lowerbound and
comp.getGreaterOperand() = semSsaRead(v, 0) and
(if comp.isStrict() then isStrict = true else isStrict = false)
or
testIsTrue = false and
comp.getGreaterOperand() = lowerbound and
comp.getLesserOperand() = semSsaRead(v, 0) and
(if comp.isStrict() then isStrict = false else isStrict = true)
)
}
/**
* Holds if `upperbound` is an upper bound for `v` at `pos`. This is restricted
* to only include bounds for which we might determine a sign.
*/
private predicate upperBound(
SemExpr upperbound, SemSsaVariable v, SemSsaReadPosition pos, boolean isStrict
) {
exists(boolean testIsTrue, SemRelationalExpr comp |
pos.hasReadOfVar(v) and
semGuardControlsSsaRead(semGetComparisonGuard(comp), pos, testIsTrue) and
not unknownSign(upperbound)
|
testIsTrue = true and
comp.getGreaterOperand() = upperbound and
comp.getLesserOperand() = semSsaRead(v, 0) and
(if comp.isStrict() then isStrict = true else isStrict = false)
or
testIsTrue = false and
comp.getLesserOperand() = upperbound and
comp.getGreaterOperand() = semSsaRead(v, 0) and
(if comp.isStrict() then isStrict = false else isStrict = true)
)
}
/**
* Holds if `eqbound` is an equality/inequality for `v` at `pos`. This is
* restricted to only include bounds for which we might determine a sign. The
* boolean `isEq` gives the polarity:
* - `isEq = true` : `v = eqbound`
* - `isEq = false` : `v != eqbound`
*/
private predicate eqBound(SemExpr eqbound, SemSsaVariable v, SemSsaReadPosition pos, boolean isEq) {
exists(SemGuard guard, boolean testIsTrue, boolean polarity |
pos.hasReadOfVar(v) and
semGuardControlsSsaRead(guard, pos, testIsTrue) and
guard.isEquality(eqbound, semSsaRead(v, 0), polarity) and
isEq = polarity.booleanXor(testIsTrue).booleanNot() and
not unknownSign(eqbound)
)
}
/**
* Holds if `bound` is a bound for `v` at `pos` that needs to be positive in
* order for `v` to be positive.
*/
private predicate posBound(SemExpr bound, SemSsaVariable v, SemSsaReadPosition pos) {
upperBound(bound, v, pos, _) or
eqBound(bound, v, pos, true)
}
/**
* Holds if `bound` is a bound for `v` at `pos` that needs to be negative in
* order for `v` to be negative.
*/
private predicate negBound(SemExpr bound, SemSsaVariable v, SemSsaReadPosition pos) {
lowerBound(bound, v, pos, _) or
eqBound(bound, v, pos, true)
}
/**
* Holds if `bound` is a bound for `v` at `pos` that can restrict whether `v`
* can be zero.
*/
private predicate zeroBound(SemExpr bound, SemSsaVariable v, SemSsaReadPosition pos) {
lowerBound(bound, v, pos, _) or
upperBound(bound, v, pos, _) or
eqBound(bound, v, pos, _)
}
/** Holds if `bound` allows `v` to be positive at `pos`. */
private predicate posBoundOk(SemExpr bound, SemSsaVariable v, SemSsaReadPosition pos) {
posBound(bound, v, pos) and TPos() = semExprSign(bound)
}
/** Holds if `bound` allows `v` to be negative at `pos`. */
private predicate negBoundOk(SemExpr bound, SemSsaVariable v, SemSsaReadPosition pos) {
negBound(bound, v, pos) and TNeg() = semExprSign(bound)
}
/** Holds if `bound` allows `v` to be zero at `pos`. */
private predicate zeroBoundOk(SemExpr bound, SemSsaVariable v, SemSsaReadPosition pos) {
lowerBound(bound, v, pos, _) and TNeg() = semExprSign(bound)
or
lowerBound(bound, v, pos, false) and TZero() = semExprSign(bound)
or
upperBound(bound, v, pos, _) and TPos() = semExprSign(bound)
or
upperBound(bound, v, pos, false) and TZero() = semExprSign(bound)
or
eqBound(bound, v, pos, true) and TZero() = semExprSign(bound)
or
eqBound(bound, v, pos, false) and TZero() != semExprSign(bound)
}
/**
* Holds if there is a bound that might restrict whether `v` has the sign `s`
* at `pos`.
*/
private predicate hasGuard(SemSsaVariable v, SemSsaReadPosition pos, Sign s) {
s = TPos() and posBound(_, v, pos)
or
s = TNeg() and negBound(_, v, pos)
or
s = TZero() and zeroBound(_, v, pos)
}
/**
* Gets a possible sign of `v` at `pos` based on its definition, where the sign
* might be ruled out by a guard.
*/
pragma[noinline]
private Sign guardedSsaSign(SemSsaVariable v, SemSsaReadPosition pos) {
result = semSsaDefSign(v) and
pos.hasReadOfVar(v) and
hasGuard(v, pos, result)
}
/**
* Gets a possible sign of `v` at `pos` based on its definition, where no guard
* can rule it out.
*/
pragma[noinline]
private Sign unguardedSsaSign(SemSsaVariable v, SemSsaReadPosition pos) {
result = semSsaDefSign(v) and
pos.hasReadOfVar(v) and
not hasGuard(v, pos, result)
}
/**
* Gets a possible sign of `v` at read position `pos`, where a guard could have
* ruled out the sign but does not.
* This does not check that the definition of `v` also allows the sign.
*/
private Sign guardedSsaSignOk(SemSsaVariable v, SemSsaReadPosition pos) {
result = TPos() and
forex(SemExpr bound | posBound(bound, v, pos) | posBoundOk(bound, v, pos))
or
result = TNeg() and
forex(SemExpr bound | negBound(bound, v, pos) | negBoundOk(bound, v, pos))
or
result = TZero() and
forex(SemExpr bound | zeroBound(bound, v, pos) | zeroBoundOk(bound, v, pos))
}
/** Gets a possible sign for `v` at `pos`. */
private Sign semSsaSign(SemSsaVariable v, SemSsaReadPosition pos) {
result = unguardedSsaSign(v, pos)
or
result = guardedSsaSign(v, pos) and
result = guardedSsaSignOk(v, pos)
}
/** Gets a possible sign for `v`. */
pragma[nomagic]
Sign semSsaDefSign(SemSsaVariable v) { result = v.(SignDef).getSign() }
/** Gets a possible sign for `e`. */
cached
Sign semExprSign(SemExpr e) {
exists(Sign s | s = e.(SignExpr).getSign() |
if
getTrackedType(e) instanceof SemUnsignedIntegerType and
s = TNeg() and
not Specific::ignoreTypeRestrictions(e)
then result = TPos()
else result = s
)
}
/**
* Dummy predicate that holds for any sign. This is added to improve readability
* of cases where the sign is unrestricted.
*/
predicate semAnySign(Sign s) { any() }
/** Holds if `e` can be positive and cannot be negative. */
predicate semPositive(SemExpr e) {
semExprSign(e) = TPos() and
not semExprSign(e) = TNeg()
}
/** Holds if `e` can be negative and cannot be positive. */
predicate semNegative(SemExpr e) {
semExprSign(e) = TNeg() and
not semExprSign(e) = TPos()
}
/** Holds if `e` is strictly positive. */
predicate semStrictlyPositive(SemExpr e) {
semExprSign(e) = TPos() and
not semExprSign(e) = TNeg() and
not semExprSign(e) = TZero()
}
/** Holds if `e` is strictly negative. */
predicate semStrictlyNegative(SemExpr e) {
semExprSign(e) = TNeg() and
not semExprSign(e) = TPos() and
not semExprSign(e) = TZero()
}

View File

@@ -0,0 +1,23 @@
/**
* Provides C++-specific definitions for use in sign analysis.
*/
private import experimental.semmle.code.cpp.semantic.Semantic
/**
* Workaround to allow certain expressions to have a negative sign, even if the type of the
* expression is unsigned.
*/
predicate ignoreTypeRestrictions(SemExpr e) { none() }
/**
* Workaround to track the sign of cetain expressions even if the type of the expression is not
* numeric.
*/
predicate trackUnknownNonNumericExpr(SemExpr e) { none() }
/**
* Workaround to ignore tracking of certain expressions even if the type of the expression is
* numeric.
*/
predicate ignoreExprSign(SemExpr e) { none() }

View File

@@ -0,0 +1,30 @@
import cpp
import experimental.semmle.code.cpp.semantic.analysis.ModulusAnalysis
import experimental.semmle.code.cpp.semantic.Semantic
import semmle.code.cpp.ir.IR as IR
import TestUtilities.InlineExpectationsTest
class ModulusAnalysisTest extends InlineExpectationsTest {
ModulusAnalysisTest() { this = "ModulusAnalysisTest" }
override string getARelevantTag() { result = "mod" }
override predicate hasActualResult(Location location, string element, string tag, string value) {
exists(SemExpr e, IR::CallInstruction call |
call.getArgument(0) = e and
call.getStaticCallTarget().hasName("mod") and
tag = "mod" and
element = e.toString() and
location = e.getLocation() and
value = getAModString(e)
)
}
}
private string getAModString(SemExpr e) {
exists(SemBound b, int delta, int mod |
semExprModulus(e, b, delta, mod) and
result = b.toString() + "," + delta.toString() + "," + mod.toString() and
not (delta = 0 and mod = 0)
)
}

View File

@@ -0,0 +1,60 @@
template<typename T> void mod(T value);
const int c1 = 42;
const int c2 = 43;
void m(int i, bool cond, int x, int y) {
int eq = i + 3;
int mul = eq * c1 + 3; // congruent 3 mod 42
int seven = 7;
if (mul % c2 == seven) {
mod(mul); // congruent 3 mod 42, 7 mod 43
}
int j = cond
? i * 4 + 3
: i * 8 + 7;
mod(j); // $ mod=0,3,4
if (x % c1 == 3 && y % c1 == 7) {
// Need implies_v2
mod(x + y); // $ MISSING: 0,10,42
}
if (x % c1 == 3 && y % c1 == 7) {
// Need implies_v2
mod(x - y); // $ MISSING: mod=0,38,42
}
if (cond) {
j = i * 4 + 3;
}
else {
j = i * 8 + 7;
}
mod(j); // $ mod=0,3,4
if (cond) {
mod(j); // $ mod=0,3,4
} else {
mod(j); // $ mod=0,3,4
}
if ((x & 15) == 3) {
mod(x); // $ mod=0,3,16
}
}
void loops(int cap)
{
for (int i = 0; i < cap; i++)
mod(i);
for (int j = 0; j < cap; j += 1)
mod(j);
for (int k = 0; k < cap; k += 3)
mod(k); // $ mod=0,0,3
}

View File

@@ -0,0 +1,51 @@
import cpp
import experimental.semmle.code.cpp.semantic.analysis.RangeAnalysis
import experimental.semmle.code.cpp.semantic.Semantic
import semmle.code.cpp.ir.IR as IR
import TestUtilities.InlineExpectationsTest
class RangeAnalysisTest extends InlineExpectationsTest {
RangeAnalysisTest() { this = "RangeAnalysisTest" }
override string getARelevantTag() { result = "range" }
override predicate hasActualResult(Location location, string element, string tag, string value) {
exists(SemExpr e, IR::CallInstruction call |
call.getArgument(0) = e and
call.getStaticCallTarget().hasName("range") and
tag = "range" and
element = e.toString() and
location = e.getLocation() and
value = getARangeString(e)
)
}
}
private string getDirectionString(boolean d) {
result = "<=" and d = true
or
result = ">=" and d = false
}
bindingset[value]
private string getOffsetString(int value) {
if value >= 0 then result = "+" + value.toString() else result = value.toString()
}
bindingset[delta]
private string getBoundString(SemBound b, int delta) {
b instanceof SemZeroBound and result = delta.toString()
or
result =
strictconcat(b.(SemSsaBound).getAVariable().(IR::Instruction).getAst().toString(), ":") +
getOffsetString(delta)
}
private string getARangeString(SemExpr e) {
exists(SemBound b, int delta, boolean upper |
semBounded(e, b, delta, upper, _) and
if semBounded(e, b, delta, upper.booleanNot(), _)
then delta != 0 and result = "==" + getBoundString(b, delta)
else result = getDirectionString(upper) + getBoundString(b, delta)
)
}

View File

@@ -0,0 +1,51 @@
template<typename T> void range(T value);
int f1(int x, int y) {
if (x < 500) {
if (x > 400) {
range(x); // $ range=>=401 range=<=499
return x;
}
if (y - 2 == x && y > 300) {
range(x + y); // $ range=>=300 range=>=x+1 range=>=y-1
return x + y;
}
if (x != y + 1) {
range(x); // $ range=<=400
int sum = x + y;
} else {
if (y > 300) {
range(x); // $ range=>=302 range=<=400 range===y+1
range(y); // $ range=>=301 range=<=399 range===x-1
int sum = x + y;
}
}
if (x > 500) {
range(x); // $ range=<=400 range=>=501
return x;
}
}
return 0;
}
int f2(int x, int y, int z) {
if (x < 500) {
if (x > 400) {
range(x); // $ range=>=401 range=<=499
return x;
}
if (y == x - 1 && y > 300 && y + 2 == z && z == 350) {
range(x); // $ range===349 range===y+1 range===z-1
range(y); // $ range===348 range===x-1 range===z-2
range(z); // $ range===350 range===x+1 range===y+2
return x + y + z;
}
}
return 0;
}

View File

@@ -0,0 +1,24 @@
import cpp
import experimental.semmle.code.cpp.semantic.analysis.SignAnalysisCommon
import experimental.semmle.code.cpp.semantic.Semantic
import semmle.code.cpp.ir.IR as IR
import TestUtilities.InlineExpectationsTest
class SignAnalysisTest extends InlineExpectationsTest {
SignAnalysisTest() { this = "SignAnalysisTest" }
override string getARelevantTag() { result = "sign" }
override predicate hasActualResult(Location location, string element, string tag, string value) {
exists(SemExpr e, IR::CallInstruction call |
call.getArgument(0) = e and
call.getStaticCallTarget().hasName("sign") and
tag = "sign" and
element = e.toString() and
location = e.getLocation() and
value = getASignString(e)
)
}
}
private string getASignString(SemExpr e) { result = strictconcat(semExprSign(e).toString(), "") }

View File

@@ -0,0 +1,57 @@
template<typename T> T sign(T value);
int f1(int x, int y) {
if (x < 0) {
return sign(x); // $ sign=-
}
if (x < y) {
return sign(y); // $ sign=+
}
return 0;
}
void u(int x) {
unsigned c = (unsigned)x;
sign(c); // $ sign=+0
}
void constants() {
int i_pos = 1234;
sign(i_pos); // $ sign=+
int i_neg = -1234;
sign(i_neg); // $ sign=-
int i_zero = 0;
sign(i_zero); // $ sign=0
long l_pos = 1234;
sign(l_pos); // $ sign=+
long l_neg = -1234;
sign(l_neg); // $ sign=-
long l_zero = 0;
sign(l_zero); // $ sign=0
long l_pos_big = 0x00000001baadf00d;
sign(l_pos_big); // $ sign=+
float f_pos = 1.234f;
sign(f_pos); // $ sign=+
float f_neg = -1.234f;
sign(f_neg); // $ sign=-
float f_zero = 0.0f;
sign(f_zero); // $ sign=0
}
void arithmetic(int y) {
int x = 0;
sign(x + 1); // $ sign=+
x = -1;
sign(x); // $ sign=-
sign(x + 1); // $ sign=+-0 // Ideally 0 because it's constant
if (y < 0) {
sign(y); // $ sign=-
sign(y + 1); // $ sign=+-0 // Ideally -0 because it's only adding one.
int z = y;
sign(++z); // $ sign=+-0 // Ideally -0 because it's only adding one.
z = y;
sign(z++); // $ sign=-
sign(z); // $ sign=+-0 // Ideally -0 because it's only adding one.
}
}