From e8800a6dbaaf5cb6fb973d1d143f13028237e69d Mon Sep 17 00:00:00 2001 From: Mathias Vorreiter Pedersen Date: Mon, 3 Apr 2023 13:52:55 +0100 Subject: [PATCH] C++: Move the new range-analysis library out of experimental and into an 'internal' directory. --- .../rangeanalysis/new/SimpleRangeAnalysis.qll | 132 ++ .../new/internal/semantic/Semantic.qll | 7 + .../new/internal/semantic/SemanticBound.qll | 45 + .../new/internal/semantic/SemanticCFG.qll | 22 + .../new/internal/semantic/SemanticExpr.qll | 309 +++++ .../semantic/SemanticExprSpecific.qll | 450 ++++++ .../new/internal/semantic/SemanticGuard.qll | 65 + .../internal/semantic/SemanticLocation.qll | 23 + .../new/internal/semantic/SemanticOpcode.qll | 187 +++ .../new/internal/semantic/SemanticSSA.qll | 75 + .../new/internal/semantic/SemanticType.qll | 311 +++++ .../semantic/SemanticTypeSpecific.qll | 43 + .../new/internal/semantic/analysis/Bound.qll | 86 ++ .../semantic/analysis/ConstantAnalysis.qll | 31 + .../analysis/ConstantAnalysisSpecific.qll | 10 + .../internal/semantic/analysis/FloatDelta.qll | 20 + .../internal/semantic/analysis/IntDelta.qll | 29 + .../semantic/analysis/ModulusAnalysis.qll | 341 +++++ .../analysis/ModulusAnalysisSpecific.qll | 8 + .../semantic/analysis/RangeAnalysis.qll | 2 + .../semantic/analysis/RangeAnalysisImpl.qll | 107 ++ .../analysis/RangeAnalysisSpecific.qll | 92 ++ .../semantic/analysis/RangeAnalysisStage.qll | 1217 +++++++++++++++++ .../internal/semantic/analysis/RangeUtils.qll | 140 ++ .../new/internal/semantic/analysis/Sign.qll | 267 ++++ .../semantic/analysis/SignAnalysisCommon.qll | 510 +++++++ .../analysis/SignAnalysisSpecific.qll | 23 + 27 files changed, 4552 insertions(+) create mode 100644 cpp/ql/lib/semmle/code/cpp/rangeanalysis/new/SimpleRangeAnalysis.qll create mode 100644 cpp/ql/lib/semmle/code/cpp/rangeanalysis/new/internal/semantic/Semantic.qll create mode 100644 cpp/ql/lib/semmle/code/cpp/rangeanalysis/new/internal/semantic/SemanticBound.qll create mode 100644 cpp/ql/lib/semmle/code/cpp/rangeanalysis/new/internal/semantic/SemanticCFG.qll create mode 100644 cpp/ql/lib/semmle/code/cpp/rangeanalysis/new/internal/semantic/SemanticExpr.qll create mode 100644 cpp/ql/lib/semmle/code/cpp/rangeanalysis/new/internal/semantic/SemanticExprSpecific.qll create mode 100644 cpp/ql/lib/semmle/code/cpp/rangeanalysis/new/internal/semantic/SemanticGuard.qll create mode 100644 cpp/ql/lib/semmle/code/cpp/rangeanalysis/new/internal/semantic/SemanticLocation.qll create mode 100644 cpp/ql/lib/semmle/code/cpp/rangeanalysis/new/internal/semantic/SemanticOpcode.qll create mode 100644 cpp/ql/lib/semmle/code/cpp/rangeanalysis/new/internal/semantic/SemanticSSA.qll create mode 100644 cpp/ql/lib/semmle/code/cpp/rangeanalysis/new/internal/semantic/SemanticType.qll create mode 100644 cpp/ql/lib/semmle/code/cpp/rangeanalysis/new/internal/semantic/SemanticTypeSpecific.qll create mode 100644 cpp/ql/lib/semmle/code/cpp/rangeanalysis/new/internal/semantic/analysis/Bound.qll create mode 100644 cpp/ql/lib/semmle/code/cpp/rangeanalysis/new/internal/semantic/analysis/ConstantAnalysis.qll create mode 100644 cpp/ql/lib/semmle/code/cpp/rangeanalysis/new/internal/semantic/analysis/ConstantAnalysisSpecific.qll create mode 100644 cpp/ql/lib/semmle/code/cpp/rangeanalysis/new/internal/semantic/analysis/FloatDelta.qll create mode 100644 cpp/ql/lib/semmle/code/cpp/rangeanalysis/new/internal/semantic/analysis/IntDelta.qll create mode 100644 cpp/ql/lib/semmle/code/cpp/rangeanalysis/new/internal/semantic/analysis/ModulusAnalysis.qll create mode 100644 cpp/ql/lib/semmle/code/cpp/rangeanalysis/new/internal/semantic/analysis/ModulusAnalysisSpecific.qll create mode 100644 cpp/ql/lib/semmle/code/cpp/rangeanalysis/new/internal/semantic/analysis/RangeAnalysis.qll create mode 100644 cpp/ql/lib/semmle/code/cpp/rangeanalysis/new/internal/semantic/analysis/RangeAnalysisImpl.qll create mode 100644 cpp/ql/lib/semmle/code/cpp/rangeanalysis/new/internal/semantic/analysis/RangeAnalysisSpecific.qll create mode 100644 cpp/ql/lib/semmle/code/cpp/rangeanalysis/new/internal/semantic/analysis/RangeAnalysisStage.qll create mode 100644 cpp/ql/lib/semmle/code/cpp/rangeanalysis/new/internal/semantic/analysis/RangeUtils.qll create mode 100644 cpp/ql/lib/semmle/code/cpp/rangeanalysis/new/internal/semantic/analysis/Sign.qll create mode 100644 cpp/ql/lib/semmle/code/cpp/rangeanalysis/new/internal/semantic/analysis/SignAnalysisCommon.qll create mode 100644 cpp/ql/lib/semmle/code/cpp/rangeanalysis/new/internal/semantic/analysis/SignAnalysisSpecific.qll diff --git a/cpp/ql/lib/semmle/code/cpp/rangeanalysis/new/SimpleRangeAnalysis.qll b/cpp/ql/lib/semmle/code/cpp/rangeanalysis/new/SimpleRangeAnalysis.qll new file mode 100644 index 00000000000..04cb8a5a3fa --- /dev/null +++ b/cpp/ql/lib/semmle/code/cpp/rangeanalysis/new/SimpleRangeAnalysis.qll @@ -0,0 +1,132 @@ +/** + * Wrapper for the semantic range analysis library that mimics the + * interface of the simple range analysis library. + */ + +private import cpp +private import semmle.code.cpp.ir.IR +private import semmle.code.cpp.rangeanalysis.new.internal.semantic.SemanticBound +private import semmle.code.cpp.rangeanalysis.new.internal.semantic.SemanticExprSpecific +private import semmle.code.cpp.rangeanalysis.new.internal.semantic.analysis.RangeAnalysis + +/** + * Gets the lower bound of the expression. + * + * Note: expressions in C/C++ are often implicitly or explicitly cast to a + * different result type. Such casts can cause the value of the expression + * to overflow or to be truncated. This predicate computes the lower bound + * of the expression without including the effect of the casts. To compute + * the lower bound of the expression after all the casts have been applied, + * call `lowerBound` like this: + * + * `lowerBound(expr.getFullyConverted())` + */ +float lowerBound(Expr expr) { + exists(Instruction i, SemBound b | i.getAst() = expr and b instanceof SemZeroBound | + semBounded(getSemanticExpr(i), b, result, false, _) + ) +} + +/** + * Gets the upper bound of the expression. + * + * Note: expressions in C/C++ are often implicitly or explicitly cast to a + * different result type. Such casts can cause the value of the expression + * to overflow or to be truncated. This predicate computes the upper bound + * of the expression without including the effect of the casts. To compute + * the upper bound of the expression after all the casts have been applied, + * call `upperBound` like this: + * + * `upperBound(expr.getFullyConverted())` + */ +float upperBound(Expr expr) { + exists(Instruction i, SemBound b | i.getAst() = expr and b instanceof SemZeroBound | + semBounded(getSemanticExpr(i), b, result, true, _) + ) +} + +/** + * Holds if the upper bound of `expr` may have been widened. This means the + * upper bound is in practice likely to be overly wide. + */ +predicate upperBoundMayBeWidened(Expr e) { none() } + +/** + * Holds if `expr` has a provably empty range. For example: + * + * 10 < expr and expr < 5 + * + * The range of an expression can only be empty if it can never be + * executed. For example: + * + * ```cpp + * if (10 < x) { + * if (x < 5) { + * // Unreachable code + * return x; // x has an empty range: 10 < x && x < 5 + * } + * } + * ``` + */ +predicate exprWithEmptyRange(Expr expr) { lowerBound(expr) > upperBound(expr) } + +/** Holds if the definition might overflow negatively. */ +predicate defMightOverflowNegatively(RangeSsaDefinition def, StackVariable v) { none() } + +/** Holds if the definition might overflow positively. */ +predicate defMightOverflowPositively(RangeSsaDefinition def, StackVariable v) { none() } + +/** + * Holds if the definition might overflow (either positively or + * negatively). + */ +predicate defMightOverflow(RangeSsaDefinition def, StackVariable v) { + defMightOverflowNegatively(def, v) or + defMightOverflowPositively(def, v) +} + +/** + * Holds if the expression might overflow negatively. This predicate + * does not consider the possibility that the expression might overflow + * due to a conversion. + */ +predicate exprMightOverflowNegatively(Expr expr) { none() } + +/** + * Holds if the expression might overflow negatively. Conversions + * are also taken into account. For example the expression + * `(int16)(x+y)` might overflow due to the `(int16)` cast, rather than + * due to the addition. + */ +predicate convertedExprMightOverflowNegatively(Expr expr) { + exprMightOverflowNegatively(expr) or + convertedExprMightOverflowNegatively(expr.getConversion()) +} + +/** + * Holds if the expression might overflow positively. This predicate + * does not consider the possibility that the expression might overflow + * due to a conversion. + */ +predicate exprMightOverflowPositively(Expr expr) { none() } + +/** + * Holds if the expression might overflow positively. Conversions + * are also taken into account. For example the expression + * `(int16)(x+y)` might overflow due to the `(int16)` cast, rather than + * due to the addition. + */ +predicate convertedExprMightOverflowPositively(Expr expr) { + exprMightOverflowPositively(expr) or + convertedExprMightOverflowPositively(expr.getConversion()) +} + +/** + * Holds if the expression might overflow (either positively or + * negatively). The possibility that the expression might overflow + * due to an implicit or explicit cast is also considered. + */ +predicate convertedExprMightOverflow(Expr expr) { + convertedExprMightOverflowNegatively(expr) or + convertedExprMightOverflowPositively(expr) +} diff --git a/cpp/ql/lib/semmle/code/cpp/rangeanalysis/new/internal/semantic/Semantic.qll b/cpp/ql/lib/semmle/code/cpp/rangeanalysis/new/internal/semantic/Semantic.qll new file mode 100644 index 00000000000..2238a8b6c94 --- /dev/null +++ b/cpp/ql/lib/semmle/code/cpp/rangeanalysis/new/internal/semantic/Semantic.qll @@ -0,0 +1,7 @@ +import SemanticExpr +import SemanticBound +import SemanticSSA +import SemanticGuard +import SemanticCFG +import SemanticType +import SemanticOpcode diff --git a/cpp/ql/lib/semmle/code/cpp/rangeanalysis/new/internal/semantic/SemanticBound.qll b/cpp/ql/lib/semmle/code/cpp/rangeanalysis/new/internal/semantic/SemanticBound.qll new file mode 100644 index 00000000000..a1814f659fc --- /dev/null +++ b/cpp/ql/lib/semmle/code/cpp/rangeanalysis/new/internal/semantic/SemanticBound.qll @@ -0,0 +1,45 @@ +/** + * Semantic wrapper around the language-specific bounds library. + */ + +private import SemanticExpr +private import SemanticExprSpecific::SemanticExprConfig as Specific +private import SemanticSSA +private import SemanticLocation + +/** + * A valid base for an expression bound. + * + * Can be either a variable (`SemSsaBound`) or zero (`SemZeroBound`). + */ +class SemBound instanceof Specific::Bound { + final string toString() { result = super.toString() } + + final SemLocation getLocation() { result = super.getLocation() } + + final SemExpr getExpr(int delta) { result = Specific::getBoundExpr(this, delta) } +} + +/** + * A bound that is a constant zero. + */ +class SemZeroBound extends SemBound { + SemZeroBound() { Specific::zeroBound(this) } +} + +/** + * A bound that is an SSA definition. + */ +class SemSsaBound extends SemBound { + /** + * The variables whose value is used as the bound. + * + * Can be multi-valued in some implementations. If so, all variables will be equivalent. + */ + SemSsaVariable var; + + SemSsaBound() { Specific::ssaBound(this, var) } + + /** Gets a variable whose value is used as the bound. */ + final SemSsaVariable getAVariable() { result = var } +} diff --git a/cpp/ql/lib/semmle/code/cpp/rangeanalysis/new/internal/semantic/SemanticCFG.qll b/cpp/ql/lib/semmle/code/cpp/rangeanalysis/new/internal/semantic/SemanticCFG.qll new file mode 100644 index 00000000000..333edc46b6e --- /dev/null +++ b/cpp/ql/lib/semmle/code/cpp/rangeanalysis/new/internal/semantic/SemanticCFG.qll @@ -0,0 +1,22 @@ +/** + * Semantic interface to the control flow graph. + */ + +private import Semantic +private import SemanticExprSpecific::SemanticExprConfig as Specific + +/** + * A basic block in the control-flow graph. + */ +class SemBasicBlock extends Specific::BasicBlock { + /** Holds if this block (transitively) dominates `otherblock`. */ + final predicate bbDominates(SemBasicBlock otherBlock) { Specific::bbDominates(this, otherBlock) } + + /** Holds if this block has dominance information. */ + final predicate hasDominanceInformation() { Specific::hasDominanceInformation(this) } + + /** Gets an expression that is evaluated in this basic block. */ + final SemExpr getAnExpr() { result.getBasicBlock() = this } + + final int getUniqueId() { result = Specific::getBasicBlockUniqueId(this) } +} diff --git a/cpp/ql/lib/semmle/code/cpp/rangeanalysis/new/internal/semantic/SemanticExpr.qll b/cpp/ql/lib/semmle/code/cpp/rangeanalysis/new/internal/semantic/SemanticExpr.qll new file mode 100644 index 00000000000..2ea958931da --- /dev/null +++ b/cpp/ql/lib/semmle/code/cpp/rangeanalysis/new/internal/semantic/SemanticExpr.qll @@ -0,0 +1,309 @@ +/** + * Semantic interface for expressions. + */ + +private import Semantic +private import SemanticExprSpecific::SemanticExprConfig as Specific + +/** + * An language-neutral expression. + * + * The expression computes a value of type `getSemType()`. The actual computation is determined by + * the expression's opcode (`getOpcode()`). + */ +class SemExpr instanceof Specific::Expr { + final string toString() { result = super.toString() } + + final Specific::Location getLocation() { result = super.getLocation() } + + Opcode getOpcode() { result instanceof Opcode::Unknown } + + SemType getSemType() { result = Specific::getUnknownExprType(this) } + + final SemBasicBlock getBasicBlock() { result = Specific::getExprBasicBlock(this) } +} + +/** An expression with an opcode other than `Unknown`. */ +abstract private class SemKnownExpr extends SemExpr { + Opcode opcode; + SemType type; + + final override Opcode getOpcode() { result = opcode } + + final override SemType getSemType() { result = type } +} + +/** An expression that returns a literal value. */ +class SemLiteralExpr extends SemKnownExpr { + SemLiteralExpr() { + Specific::integerLiteral(this, type, _) and opcode instanceof Opcode::Constant + or + Specific::largeIntegerLiteral(this, type, _) and opcode instanceof Opcode::Constant + or + Specific::booleanLiteral(this, type, _) and opcode instanceof Opcode::Constant + or + Specific::floatingPointLiteral(this, type, _) and opcode instanceof Opcode::Constant + or + Specific::nullLiteral(this, type) and opcode instanceof Opcode::Constant + or + Specific::stringLiteral(this, type, _) and opcode instanceof Opcode::StringConstant + } +} + +/** An expression that returns a numeric literal value. */ +class SemNumericLiteralExpr extends SemLiteralExpr { + SemNumericLiteralExpr() { + Specific::integerLiteral(this, _, _) + or + Specific::largeIntegerLiteral(this, _, _) + or + Specific::floatingPointLiteral(this, _, _) + } + + /** + * Gets an approximation of the value of the literal, as a `float`. + * + * If the value can be precisely represented as a `float`, the result will be exact. If the actual + * value cannot be precisely represented (for example, it is an integer with more than 53 + * significant bits), then the result is an approximation. + */ + float getApproximateFloatValue() { none() } +} + +/** An expression that returns an integer literal value. */ +class SemIntegerLiteralExpr extends SemNumericLiteralExpr { + SemIntegerLiteralExpr() { + Specific::integerLiteral(this, _, _) + or + Specific::largeIntegerLiteral(this, _, _) + } + + /** + * Gets the value of the literal, if it can be represented as an `int`. + * + * If the value is outside the range of an `int`, use `getApproximateFloatValue()` to get a value + * that is equal to the actual integer value, within rounding error. + */ + final int getIntValue() { Specific::integerLiteral(this, _, result) } + + final override float getApproximateFloatValue() { + result = getIntValue() + or + Specific::largeIntegerLiteral(this, _, result) + } +} + +/** + * An expression that returns a floating-point literal value. + */ +class SemFloatingPointLiteralExpr extends SemNumericLiteralExpr { + float value; + + SemFloatingPointLiteralExpr() { Specific::floatingPointLiteral(this, _, value) } + + final override float getApproximateFloatValue() { result = value } + + /** Gets the value of the literal. */ + final float getFloatValue() { result = value } +} + +/** + * An expression that consumes two operands. + */ +class SemBinaryExpr extends SemKnownExpr { + SemExpr leftOperand; + SemExpr rightOperand; + + SemBinaryExpr() { Specific::binaryExpr(this, opcode, type, leftOperand, rightOperand) } + + /** Gets the left operand. */ + final SemExpr getLeftOperand() { result = leftOperand } + + /** Gets the right operand. */ + final SemExpr getRightOperand() { result = rightOperand } + + /** Holds if `a` and `b` are the two operands, in either order. */ + final predicate hasOperands(SemExpr a, SemExpr b) { + a = getLeftOperand() and b = getRightOperand() + or + a = getRightOperand() and b = getLeftOperand() + } + + /** Gets the two operands. */ + final SemExpr getAnOperand() { result = getLeftOperand() or result = getRightOperand() } +} + +/** An expression that performs and ordered comparison of two operands. */ +class SemRelationalExpr extends SemBinaryExpr { + SemRelationalExpr() { + opcode instanceof Opcode::CompareLT + or + opcode instanceof Opcode::CompareLE + or + opcode instanceof Opcode::CompareGT + or + opcode instanceof Opcode::CompareGE + } + + /** + * Get the operand that will be less than the other operand if the result of the comparison is + * `true`. + * + * For `x < y` or `x <= y`, this will return `x`. + * For `x > y` or `x >= y`, this will return `y`.` + */ + final SemExpr getLesserOperand() { + if opcode instanceof Opcode::CompareLT or opcode instanceof Opcode::CompareLE + then result = getLeftOperand() + else result = getRightOperand() + } + + /** + * Get the operand that will be greater than the other operand if the result of the comparison is + * `true`. + * + * For `x < y` or `x <= y`, this will return `y`. + * For `x > y` or `x >= y`, this will return `x`.` + */ + final SemExpr getGreaterOperand() { + if opcode instanceof Opcode::CompareGT or opcode instanceof Opcode::CompareGE + then result = getLeftOperand() + else result = getRightOperand() + } + + /** Holds if this comparison returns `false` if the two operands are equal. */ + final predicate isStrict() { + opcode instanceof Opcode::CompareLT or opcode instanceof Opcode::CompareGT + } +} + +class SemAddExpr extends SemBinaryExpr { + SemAddExpr() { opcode instanceof Opcode::Add or opcode instanceof Opcode::PointerAdd } +} + +class SemSubExpr extends SemBinaryExpr { + SemSubExpr() { opcode instanceof Opcode::Sub or opcode instanceof Opcode::PointerSub } +} + +class SemMulExpr extends SemBinaryExpr { + SemMulExpr() { opcode instanceof Opcode::Mul } +} + +class SemDivExpr extends SemBinaryExpr { + SemDivExpr() { opcode instanceof Opcode::Div } +} + +class SemRemExpr extends SemBinaryExpr { + SemRemExpr() { opcode instanceof Opcode::Rem } +} + +class SemShiftLeftExpr extends SemBinaryExpr { + SemShiftLeftExpr() { opcode instanceof Opcode::ShiftLeft } +} + +class SemShiftRightExpr extends SemBinaryExpr { + SemShiftRightExpr() { opcode instanceof Opcode::ShiftRight } +} + +class SemShiftRightUnsignedExpr extends SemBinaryExpr { + SemShiftRightUnsignedExpr() { opcode instanceof Opcode::ShiftRightUnsigned } +} + +class SemBitAndExpr extends SemBinaryExpr { + SemBitAndExpr() { opcode instanceof Opcode::BitAnd } +} + +class SemBitOrExpr extends SemBinaryExpr { + SemBitOrExpr() { opcode instanceof Opcode::BitOr } +} + +class SemBitXorExpr extends SemBinaryExpr { + SemBitXorExpr() { opcode instanceof Opcode::BitXor } +} + +class SemUnaryExpr extends SemKnownExpr { + SemExpr operand; + + SemUnaryExpr() { Specific::unaryExpr(this, opcode, type, operand) } + + final SemExpr getOperand() { result = operand } +} + +class SemBoxExpr extends SemUnaryExpr { + SemBoxExpr() { opcode instanceof Opcode::Box } +} + +class SemUnboxExpr extends SemUnaryExpr { + SemUnboxExpr() { opcode instanceof Opcode::Unbox } +} + +class SemConvertExpr extends SemUnaryExpr { + SemConvertExpr() { opcode instanceof Opcode::Convert } +} + +class SemCopyValueExpr extends SemUnaryExpr { + SemCopyValueExpr() { opcode instanceof Opcode::CopyValue } +} + +class SemNegateExpr extends SemUnaryExpr { + SemNegateExpr() { opcode instanceof Opcode::Negate } +} + +class SemBitComplementExpr extends SemUnaryExpr { + SemBitComplementExpr() { opcode instanceof Opcode::BitComplement } +} + +class SemLogicalNotExpr extends SemUnaryExpr { + SemLogicalNotExpr() { opcode instanceof Opcode::LogicalNot } +} + +class SemAddOneExpr extends SemUnaryExpr { + SemAddOneExpr() { opcode instanceof Opcode::AddOne } +} + +class SemSubOneExpr extends SemUnaryExpr { + SemSubOneExpr() { opcode instanceof Opcode::SubOne } +} + +private class SemNullaryExpr extends SemKnownExpr { + SemNullaryExpr() { Specific::nullaryExpr(this, opcode, type) } +} + +class SemInitializeParameterExpr extends SemNullaryExpr { + SemInitializeParameterExpr() { opcode instanceof Opcode::InitializeParameter } +} + +class SemLoadExpr extends SemNullaryExpr { + SemLoadExpr() { opcode instanceof Opcode::Load } + + final SemSsaVariable getDef() { result.getAUse() = this } +} + +class SemSsaLoadExpr extends SemLoadExpr { + SemSsaLoadExpr() { exists(getDef()) } +} + +class SemNonSsaLoadExpr extends SemLoadExpr { + SemNonSsaLoadExpr() { not exists(getDef()) } +} + +class SemStoreExpr extends SemUnaryExpr { + SemStoreExpr() { opcode instanceof Opcode::Store } +} + +class SemConditionalExpr extends SemKnownExpr { + SemExpr condition; + SemExpr trueResult; + SemExpr falseResult; + + SemConditionalExpr() { + opcode instanceof Opcode::Conditional and + Specific::conditionalExpr(this, type, condition, trueResult, falseResult) + } + + final SemExpr getBranchExpr(boolean branch) { + branch = true and result = trueResult + or + branch = false and result = falseResult + } +} diff --git a/cpp/ql/lib/semmle/code/cpp/rangeanalysis/new/internal/semantic/SemanticExprSpecific.qll b/cpp/ql/lib/semmle/code/cpp/rangeanalysis/new/internal/semantic/SemanticExprSpecific.qll new file mode 100644 index 00000000000..6ac6081c558 --- /dev/null +++ b/cpp/ql/lib/semmle/code/cpp/rangeanalysis/new/internal/semantic/SemanticExprSpecific.qll @@ -0,0 +1,450 @@ +/** + * C++-specific implementation of the semantic interface. + */ + +private import cpp as Cpp +private import semmle.code.cpp.ir.IR as IR +private import Semantic +private import analysis.Bound as IRBound +private import semmle.code.cpp.controlflow.IRGuards as IRGuards +private import semmle.code.cpp.ir.ValueNumbering + +module SemanticExprConfig { + class Location = Cpp::Location; + + /** A `ConvertInstruction` or a `CopyValueInstruction`. */ + private class Conversion extends IR::UnaryInstruction { + Conversion() { + this instanceof IR::CopyValueInstruction + or + this instanceof IR::ConvertInstruction + } + + /** Holds if this instruction converts a value of type `tFrom` to a value of type `tTo`. */ + predicate converts(SemType tFrom, SemType tTo) { + tFrom = getSemanticType(this.getUnary().getResultIRType()) and + tTo = getSemanticType(this.getResultIRType()) + } + } + + /** + * Gets a conversion-like instruction that consumes `op`, and + * which is guaranteed to not overflow. + */ + private IR::Instruction safeConversion(IR::Operand op) { + exists(Conversion conv, SemType tFrom, SemType tTo | + conv.converts(tFrom, tTo) and + conversionCannotOverflow(tFrom, tTo) and + conv.getUnaryOperand() = op and + result = conv + ) + } + + /** Holds if `i1 = i2` or if `i2` is a safe conversion that consumes `i1`. */ + private predicate idOrSafeConversion(IR::Instruction i1, IR::Instruction i2) { + not i1.getResultIRType() instanceof IR::IRVoidType and + ( + i1 = i2 + or + i2 = safeConversion(i1.getAUse()) and + i1.getBlock() = i2.getBlock() + ) + } + + module Equiv = QlBuiltins::EquivalenceRelation; + + /** + * The expressions on which we perform range analysis. + */ + class Expr extends Equiv::EquivalenceClass { + /** Gets the n'th instruction in this equivalence class. */ + private IR::Instruction getInstruction(int n) { + result = + rank[n + 1](IR::Instruction instr, int i, IR::IRBlock block | + this = Equiv::getEquivalenceClass(instr) and block.getInstruction(i) = instr + | + instr order by i + ) + } + + /** Gets a textual representation of this element. */ + string toString() { result = this.getUnconverted().toString() } + + /** Gets the basic block of this expression. */ + IR::IRBlock getBlock() { result = this.getUnconverted().getBlock() } + + /** Gets the unconverted instruction associated with this expression. */ + IR::Instruction getUnconverted() { result = this.getInstruction(0) } + + /** + * Gets the final instruction associated with this expression. This + * represents the result after applying all the safe conversions. + */ + IR::Instruction getConverted() { + exists(int n | + result = this.getInstruction(n) and + not exists(this.getInstruction(n + 1)) + ) + } + + /** Gets the type of the result produced by this instruction. */ + IR::IRType getResultIRType() { result = this.getConverted().getResultIRType() } + + /** Gets the location of the source code for this expression. */ + Location getLocation() { result = this.getUnconverted().getLocation() } + } + + SemBasicBlock getExprBasicBlock(Expr e) { result = getSemanticBasicBlock(e.getBlock()) } + + private predicate anyConstantExpr(Expr expr, SemType type, string value) { + exists(IR::ConstantInstruction instr | getSemanticExpr(instr) = expr | + type = getSemanticType(instr.getResultIRType()) and + value = instr.getValue() + ) + } + + predicate integerLiteral(Expr expr, SemIntegerType type, int value) { + exists(string valueString | + anyConstantExpr(expr, type, valueString) and + value = valueString.toInt() + ) + } + + predicate largeIntegerLiteral(Expr expr, SemIntegerType type, float approximateFloatValue) { + exists(string valueString | + anyConstantExpr(expr, type, valueString) and + not exists(valueString.toInt()) and + approximateFloatValue = valueString.toFloat() + ) + } + + predicate floatingPointLiteral(Expr expr, SemFloatingPointType type, float value) { + exists(string valueString | + anyConstantExpr(expr, type, valueString) and value = valueString.toFloat() + ) + } + + predicate booleanLiteral(Expr expr, SemBooleanType type, boolean value) { + exists(string valueString | + anyConstantExpr(expr, type, valueString) and + ( + valueString = "true" and value = true + or + valueString = "false" and value = false + ) + ) + } + + predicate nullLiteral(Expr expr, SemAddressType type) { anyConstantExpr(expr, type, _) } + + predicate stringLiteral(Expr expr, SemType type, string value) { + anyConstantExpr(expr, type, value) and + expr.getUnconverted() instanceof IR::StringConstantInstruction + } + + predicate binaryExpr(Expr expr, Opcode opcode, SemType type, Expr leftOperand, Expr rightOperand) { + exists(IR::BinaryInstruction instr | + instr = expr.getUnconverted() and + type = getSemanticType(instr.getResultIRType()) and + leftOperand = getSemanticExpr(instr.getLeft()) and + rightOperand = getSemanticExpr(instr.getRight()) and + // REVIEW: Merge the two `Opcode` types. + opcode.toString() = instr.getOpcode().toString() + ) + } + + predicate unaryExpr(Expr expr, Opcode opcode, SemType type, Expr operand) { + exists(IR::UnaryInstruction instr | instr = expr.getUnconverted() | + type = getSemanticType(instr.getResultIRType()) and + operand = getSemanticExpr(instr.getUnary()) and + // REVIEW: Merge the two operand types. + opcode.toString() = instr.getOpcode().toString() + ) + or + exists(IR::StoreInstruction instr | instr = expr.getUnconverted() | + type = getSemanticType(instr.getResultIRType()) and + operand = getSemanticExpr(instr.getSourceValue()) and + opcode instanceof Opcode::Store + ) + } + + predicate nullaryExpr(Expr expr, Opcode opcode, SemType type) { + exists(IR::LoadInstruction load | + load = expr.getUnconverted() and + type = getSemanticType(load.getResultIRType()) and + opcode instanceof Opcode::Load + ) + or + exists(IR::InitializeParameterInstruction init | + init = expr.getUnconverted() and + type = getSemanticType(init.getResultIRType()) and + opcode instanceof Opcode::InitializeParameter + ) + } + + predicate conditionalExpr( + Expr expr, SemType type, Expr condition, Expr trueResult, Expr falseResult + ) { + none() + } + + SemType getUnknownExprType(Expr expr) { result = getSemanticType(expr.getResultIRType()) } + + class BasicBlock = IR::IRBlock; + + predicate bbDominates(BasicBlock dominator, BasicBlock dominated) { + dominator.dominates(dominated) + } + + predicate hasDominanceInformation(BasicBlock block) { any() } + + private predicate id(Cpp::Locatable x, Cpp::Locatable y) { x = y } + + private predicate idOf(Cpp::Locatable x, int y) = equivalenceRelation(id/2)(x, y) + + int getBasicBlockUniqueId(BasicBlock block) { idOf(block.getFirstInstruction().getAst(), result) } + + newtype TSsaVariable = + TSsaInstruction(IR::Instruction instr) { instr.hasMemoryResult() } or + TSsaOperand(IR::Operand op) { op.isDefinitionInexact() } or + TSsaPointerArithmeticGuard(ValueNumber instr) { + exists(Guard g, IR::Operand use | + use = instr.getAUse() and use.getIRType() instanceof IR::IRAddressType + | + g.comparesLt(use, _, _, _, _) or + g.comparesLt(_, use, _, _, _) or + g.comparesEq(use, _, _, _, _) or + g.comparesEq(_, use, _, _, _) + ) + } + + class SsaVariable extends TSsaVariable { + string toString() { none() } + + Location getLocation() { none() } + + IR::Instruction asInstruction() { none() } + + ValueNumber asPointerArithGuard() { none() } + + IR::Operand asOperand() { none() } + } + + class SsaInstructionVariable extends SsaVariable, TSsaInstruction { + IR::Instruction instr; + + SsaInstructionVariable() { this = TSsaInstruction(instr) } + + final override string toString() { result = instr.toString() } + + final override Location getLocation() { result = instr.getLocation() } + + final override IR::Instruction asInstruction() { result = instr } + } + + class SsaPointerArithmeticGuard extends SsaVariable, TSsaPointerArithmeticGuard { + ValueNumber vn; + + SsaPointerArithmeticGuard() { this = TSsaPointerArithmeticGuard(vn) } + + final override string toString() { result = vn.toString() } + + final override Location getLocation() { result = vn.getLocation() } + + final override ValueNumber asPointerArithGuard() { result = vn } + } + + class SsaOperand extends SsaVariable, TSsaOperand { + IR::Operand op; + + SsaOperand() { this = TSsaOperand(op) } + + final override string toString() { result = op.toString() } + + final override Location getLocation() { result = op.getLocation() } + + final override IR::Operand asOperand() { result = op } + } + + predicate explicitUpdate(SsaVariable v, Expr sourceExpr) { + getSemanticExpr(v.asInstruction()) = sourceExpr + } + + predicate phi(SsaVariable v) { v.asInstruction() instanceof IR::PhiInstruction } + + SsaVariable getAPhiInput(SsaVariable v) { + exists(IR::PhiInstruction instr | v.asInstruction() = instr | + result.asInstruction() = instr.getAnInput() + or + result.asOperand() = instr.getAnInputOperand() + ) + } + + Expr getAUse(SsaVariable v) { + result.getUnconverted().(IR::LoadInstruction).getSourceValue() = v.asInstruction() + or + result.getUnconverted() = v.asPointerArithGuard().getAnInstruction() + } + + SemType getSsaVariableType(SsaVariable v) { + result = getSemanticType(v.asInstruction().getResultIRType()) + } + + BasicBlock getSsaVariableBasicBlock(SsaVariable v) { + result = v.asInstruction().getBlock() + or + result = v.asOperand().getUse().getBlock() + } + + private newtype TReadPosition = + TReadPositionBlock(IR::IRBlock block) or + TReadPositionPhiInputEdge(IR::IRBlock pred, IR::IRBlock succ) { + exists(IR::PhiInputOperand input | + pred = input.getPredecessorBlock() and + succ = input.getUse().getBlock() + ) + } + + class SsaReadPosition extends TReadPosition { + string toString() { none() } + + Location getLocation() { none() } + + predicate hasRead(SsaVariable v) { none() } + } + + private class SsaReadPositionBlock extends SsaReadPosition, TReadPositionBlock { + IR::IRBlock block; + + SsaReadPositionBlock() { this = TReadPositionBlock(block) } + + final override string toString() { result = block.toString() } + + final override Location getLocation() { result = block.getLocation() } + + final override predicate hasRead(SsaVariable v) { + exists(IR::Operand operand | + operand.getDef() = v.asInstruction() or + operand.getDef() = v.asPointerArithGuard().getAnInstruction() + | + not operand instanceof IR::PhiInputOperand and + operand.getUse().getBlock() = block + ) + } + } + + private class SsaReadPositionPhiInputEdge extends SsaReadPosition, TReadPositionPhiInputEdge { + IR::IRBlock pred; + IR::IRBlock succ; + + SsaReadPositionPhiInputEdge() { this = TReadPositionPhiInputEdge(pred, succ) } + + final override string toString() { result = pred.toString() + "->" + succ.toString() } + + final override Location getLocation() { result = succ.getLocation() } + + final override predicate hasRead(SsaVariable v) { + exists(IR::PhiInputOperand operand | + operand.getDef() = v.asInstruction() or + operand.getDef() = v.asPointerArithGuard().getAnInstruction() + | + operand.getPredecessorBlock() = pred and + operand.getUse().getBlock() = succ + ) + } + } + + predicate hasReadOfSsaVariable(SsaReadPosition pos, SsaVariable v) { pos.hasRead(v) } + + predicate readBlock(SsaReadPosition pos, BasicBlock block) { pos = TReadPositionBlock(block) } + + predicate phiInputEdge(SsaReadPosition pos, BasicBlock origBlock, BasicBlock phiBlock) { + pos = TReadPositionPhiInputEdge(origBlock, phiBlock) + } + + predicate phiInput(SsaReadPosition pos, SsaVariable phi, SsaVariable input) { + exists(IR::PhiInputOperand operand | + pos = TReadPositionPhiInputEdge(operand.getPredecessorBlock(), operand.getUse().getBlock()) + | + phi.asInstruction() = operand.getUse() and + ( + input.asInstruction() = operand.getDef() + or + input.asOperand() = operand + ) + ) + } + + class Bound instanceof IRBound::Bound { + string toString() { result = super.toString() } + + final Location getLocation() { result = super.getLocation() } + } + + private class ValueNumberBound extends Bound instanceof IRBound::ValueNumberBound { + override string toString() { result = IRBound::ValueNumberBound.super.toString() } + } + + predicate zeroBound(Bound bound) { bound instanceof IRBound::ZeroBound } + + predicate ssaBound(Bound bound, SsaVariable v) { + v.asInstruction() = bound.(IRBound::ValueNumberBound).getValueNumber().getAnInstruction() + } + + Expr getBoundExpr(Bound bound, int delta) { + result = getSemanticExpr(bound.(IRBound::Bound).getInstruction(delta)) + } + + class Guard = IRGuards::IRGuardCondition; + + predicate guard(Guard guard, BasicBlock block) { block = guard.getBlock() } + + Expr getGuardAsExpr(Guard guard) { result = getSemanticExpr(guard) } + + predicate equalityGuard(Guard guard, Expr e1, Expr e2, boolean polarity) { + exists(IR::Instruction left, IR::Instruction right | + getSemanticExpr(left) = e1 and + getSemanticExpr(right) = e2 and + guard.comparesEq(left.getAUse(), right.getAUse(), 0, true, polarity) + ) + } + + predicate guardDirectlyControlsBlock(Guard guard, BasicBlock controlled, boolean branch) { + guard.controls(controlled, branch) + } + + predicate guardHasBranchEdge(Guard guard, BasicBlock bb1, BasicBlock bb2, boolean branch) { + guard.controlsEdge(bb1, bb2, branch) + } + + Guard comparisonGuard(Expr e) { getSemanticExpr(result) = e } + + predicate implies_v2(Guard g1, boolean b1, Guard g2, boolean b2) { + none() // TODO + } + + /** Gets the expression associated with `instr`. */ + SemExpr getSemanticExpr(IR::Instruction instr) { result = Equiv::getEquivalenceClass(instr) } +} + +predicate getSemanticExpr = SemanticExprConfig::getSemanticExpr/1; + +SemBasicBlock getSemanticBasicBlock(IR::IRBlock block) { result = block } + +IR::IRBlock getCppBasicBlock(SemBasicBlock block) { block = result } + +SemSsaVariable getSemanticSsaVariable(IR::Instruction instr) { + result.(SemanticExprConfig::SsaVariable).asInstruction() = instr +} + +IR::Instruction getCppSsaVariableInstruction(SemSsaVariable var) { + var.(SemanticExprConfig::SsaVariable).asInstruction() = result +} + +SemBound getSemanticBound(IRBound::Bound bound) { result = bound } + +IRBound::Bound getCppBound(SemBound bound) { bound = result } + +SemGuard getSemanticGuard(IRGuards::IRGuardCondition guard) { result = guard } + +IRGuards::IRGuardCondition getCppGuard(SemGuard guard) { guard = result } diff --git a/cpp/ql/lib/semmle/code/cpp/rangeanalysis/new/internal/semantic/SemanticGuard.qll b/cpp/ql/lib/semmle/code/cpp/rangeanalysis/new/internal/semantic/SemanticGuard.qll new file mode 100644 index 00000000000..8faf6a3a1de --- /dev/null +++ b/cpp/ql/lib/semmle/code/cpp/rangeanalysis/new/internal/semantic/SemanticGuard.qll @@ -0,0 +1,65 @@ +/** + * Semantic interface to the guards library. + */ + +private import Semantic +private import SemanticExprSpecific::SemanticExprConfig as Specific + +class SemGuard instanceof Specific::Guard { + SemBasicBlock block; + + SemGuard() { Specific::guard(this, block) } + + final string toString() { result = super.toString() } + + final Specific::Location getLocation() { result = super.getLocation() } + + final predicate isEquality(SemExpr e1, SemExpr e2, boolean polarity) { + Specific::equalityGuard(this, e1, e2, polarity) + } + + final predicate directlyControls(SemBasicBlock controlled, boolean branch) { + Specific::guardDirectlyControlsBlock(this, controlled, branch) + } + + final predicate hasBranchEdge(SemBasicBlock bb1, SemBasicBlock bb2, boolean branch) { + Specific::guardHasBranchEdge(this, bb1, bb2, branch) + } + + final SemBasicBlock getBasicBlock() { result = block } + + final SemExpr asExpr() { result = Specific::getGuardAsExpr(this) } +} + +predicate semImplies_v2(SemGuard g1, boolean b1, SemGuard g2, boolean b2) { + Specific::implies_v2(g1, b1, g2, b2) +} + +/** + * Holds if `guard` directly controls the position `controlled` with the + * value `testIsTrue`. + */ +predicate semGuardDirectlyControlsSsaRead( + SemGuard guard, SemSsaReadPosition controlled, boolean testIsTrue +) { + guard.directlyControls(controlled.(SemSsaReadPositionBlock).getBlock(), testIsTrue) + or + exists(SemSsaReadPositionPhiInputEdge controlledEdge | controlledEdge = controlled | + guard.directlyControls(controlledEdge.getOrigBlock(), testIsTrue) or + guard.hasBranchEdge(controlledEdge.getOrigBlock(), controlledEdge.getPhiBlock(), testIsTrue) + ) +} + +/** + * Holds if `guard` controls the position `controlled` with the value `testIsTrue`. + */ +predicate semGuardControlsSsaRead(SemGuard guard, SemSsaReadPosition controlled, boolean testIsTrue) { + semGuardDirectlyControlsSsaRead(guard, controlled, testIsTrue) + or + exists(SemGuard guard0, boolean testIsTrue0 | + semImplies_v2(guard0, testIsTrue0, guard, testIsTrue) and + semGuardControlsSsaRead(guard0, controlled, testIsTrue0) + ) +} + +SemGuard semGetComparisonGuard(SemRelationalExpr e) { result = Specific::comparisonGuard(e) } diff --git a/cpp/ql/lib/semmle/code/cpp/rangeanalysis/new/internal/semantic/SemanticLocation.qll b/cpp/ql/lib/semmle/code/cpp/rangeanalysis/new/internal/semantic/SemanticLocation.qll new file mode 100644 index 00000000000..328f85151b5 --- /dev/null +++ b/cpp/ql/lib/semmle/code/cpp/rangeanalysis/new/internal/semantic/SemanticLocation.qll @@ -0,0 +1,23 @@ +private import semmle.code.cpp.Location + +class SemLocation instanceof Location { + /** + * Gets a textual representation of this element. + * + * The format is "file://filePath:startLine:startColumn:endLine:endColumn". + */ + string toString() { result = super.toString() } + + /** + * Holds if this element is at the specified location. + * The location spans column `startcolumn` of line `startline` to + * column `endcolumn` of line `endline` in file `filepath`. + * For more information, see + * [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/). + */ + predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + super.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) + } +} diff --git a/cpp/ql/lib/semmle/code/cpp/rangeanalysis/new/internal/semantic/SemanticOpcode.qll b/cpp/ql/lib/semmle/code/cpp/rangeanalysis/new/internal/semantic/SemanticOpcode.qll new file mode 100644 index 00000000000..bb56da71f73 --- /dev/null +++ b/cpp/ql/lib/semmle/code/cpp/rangeanalysis/new/internal/semantic/SemanticOpcode.qll @@ -0,0 +1,187 @@ +/** + * Definitions of all possible opcodes for `SemExpr`. + */ +private newtype TOpcode = + TInitializeParameter() or + TCopyValue() or + TLoad() or + TStore() or + TAdd() or + TSub() or + TMul() or + TDiv() or + TRem() or + TNegate() or + TShiftLeft() or + TShiftRight() or + TShiftRightUnsigned() or // TODO: Based on type + TBitAnd() or + TBitOr() or + TBitXor() or + TBitComplement() or + TLogicalNot() or + TCompareEQ() or + TCompareNE() or + TCompareLT() or + TCompareGT() or + TCompareLE() or + TCompareGE() or + TPointerAdd() or + TPointerSub() or + TPointerDiff() or + TConvert() or + TConstant() or + TStringConstant() or + TAddOne() or // TODO: Combine with `TAdd` + TSubOne() or // TODO: Combine with `TSub` + TConditional() or // TODO: Represent as flow + TCall() or + TBox() or + TUnbox() or + TUnknown() + +class Opcode extends TOpcode { + string toString() { result = "???" } +} + +module Opcode { + class InitializeParameter extends Opcode, TInitializeParameter { + override string toString() { result = "InitializeParameter" } + } + + class CopyValue extends Opcode, TCopyValue { + override string toString() { result = "CopyValue" } + } + + class Load extends Opcode, TLoad { + override string toString() { result = "Load" } + } + + class Store extends Opcode, TStore { + override string toString() { result = "Store" } + } + + class Add extends Opcode, TAdd { + override string toString() { result = "Add" } + } + + class PointerAdd extends Opcode, TPointerAdd { + override string toString() { result = "PointerAdd" } + } + + class Sub extends Opcode, TSub { + override string toString() { result = "Sub" } + } + + class PointerSub extends Opcode, TPointerSub { + override string toString() { result = "PointerSub" } + } + + class Mul extends Opcode, TMul { + override string toString() { result = "Mul" } + } + + class Div extends Opcode, TDiv { + override string toString() { result = "Div" } + } + + class Rem extends Opcode, TRem { + override string toString() { result = "Rem" } + } + + class Negate extends Opcode, TNegate { + override string toString() { result = "Negate" } + } + + class ShiftLeft extends Opcode, TShiftLeft { + override string toString() { result = "ShiftLeft" } + } + + class ShiftRight extends Opcode, TShiftRight { + override string toString() { result = "ShiftRight" } + } + + class ShiftRightUnsigned extends Opcode, TShiftRightUnsigned { + override string toString() { result = "ShiftRightUnsigned" } + } + + class BitAnd extends Opcode, TBitAnd { + override string toString() { result = "BitAnd" } + } + + class BitOr extends Opcode, TBitOr { + override string toString() { result = "BitOr" } + } + + class BitXor extends Opcode, TBitXor { + override string toString() { result = "BitXor" } + } + + class BitComplement extends Opcode, TBitComplement { + override string toString() { result = "BitComplement" } + } + + class LogicalNot extends Opcode, TLogicalNot { + override string toString() { result = "LogicalNot" } + } + + class CompareEQ extends Opcode, TCompareEQ { + override string toString() { result = "CompareEQ" } + } + + class CompareNE extends Opcode, TCompareNE { + override string toString() { result = "CompareNE" } + } + + class CompareLT extends Opcode, TCompareLT { + override string toString() { result = "CompareLT" } + } + + class CompareLE extends Opcode, TCompareLE { + override string toString() { result = "CompareLE" } + } + + class CompareGT extends Opcode, TCompareGT { + override string toString() { result = "CompareGT" } + } + + class CompareGE extends Opcode, TCompareGE { + override string toString() { result = "CompareGE" } + } + + class Convert extends Opcode, TConvert { + override string toString() { result = "Convert" } + } + + class AddOne extends Opcode, TAddOne { + override string toString() { result = "AddOne" } + } + + class SubOne extends Opcode, TSubOne { + override string toString() { result = "SubOne" } + } + + class Conditional extends Opcode, TConditional { + override string toString() { result = "Conditional" } + } + + class Constant extends Opcode, TConstant { + override string toString() { result = "Constant" } + } + + class StringConstant extends Opcode, TStringConstant { + override string toString() { result = "StringConstant" } + } + + class Box extends Opcode, TBox { + override string toString() { result = "Box" } + } + + class Unbox extends Opcode, TUnbox { + override string toString() { result = "Unbox" } + } + + class Unknown extends Opcode, TUnknown { + override string toString() { result = "Unknown" } + } +} diff --git a/cpp/ql/lib/semmle/code/cpp/rangeanalysis/new/internal/semantic/SemanticSSA.qll b/cpp/ql/lib/semmle/code/cpp/rangeanalysis/new/internal/semantic/SemanticSSA.qll new file mode 100644 index 00000000000..307f6e386b5 --- /dev/null +++ b/cpp/ql/lib/semmle/code/cpp/rangeanalysis/new/internal/semantic/SemanticSSA.qll @@ -0,0 +1,75 @@ +/** + * Semantic interface to the SSA library. + */ + +private import Semantic +private import SemanticExprSpecific::SemanticExprConfig as Specific + +class SemSsaVariable instanceof Specific::SsaVariable { + final string toString() { result = super.toString() } + + final Specific::Location getLocation() { result = super.getLocation() } + + final SemExpr getAUse() { result = Specific::getAUse(this) } + + final SemType getType() { result = Specific::getSsaVariableType(this) } + + final SemBasicBlock getBasicBlock() { result = Specific::getSsaVariableBasicBlock(this) } +} + +class SemSsaExplicitUpdate extends SemSsaVariable { + SemExpr sourceExpr; + + SemSsaExplicitUpdate() { Specific::explicitUpdate(this, sourceExpr) } + + final SemExpr getSourceExpr() { result = sourceExpr } +} + +class SemSsaPhiNode extends SemSsaVariable { + SemSsaPhiNode() { Specific::phi(this) } + + final SemSsaVariable getAPhiInput() { result = Specific::getAPhiInput(this) } +} + +class SemSsaReadPosition instanceof Specific::SsaReadPosition { + final string toString() { result = super.toString() } + + final Specific::Location getLocation() { result = super.getLocation() } + + final predicate hasReadOfVar(SemSsaVariable var) { Specific::hasReadOfSsaVariable(this, var) } +} + +class SemSsaReadPositionPhiInputEdge extends SemSsaReadPosition { + SemBasicBlock origBlock; + SemBasicBlock phiBlock; + + SemSsaReadPositionPhiInputEdge() { Specific::phiInputEdge(this, origBlock, phiBlock) } + + predicate phiInput(SemSsaPhiNode phi, SemSsaVariable inp) { Specific::phiInput(this, phi, inp) } + + SemBasicBlock getOrigBlock() { result = origBlock } + + SemBasicBlock getPhiBlock() { result = phiBlock } +} + +class SemSsaReadPositionBlock extends SemSsaReadPosition { + SemBasicBlock block; + + SemSsaReadPositionBlock() { Specific::readBlock(this, block) } + + SemBasicBlock getBlock() { result = block } + + SemExpr getAnExpr() { result = getBlock().getAnExpr() } +} + +/** + * Holds if `inp` is an input to `phi` along a back edge. + */ +predicate semBackEdge(SemSsaPhiNode phi, SemSsaVariable inp, SemSsaReadPositionPhiInputEdge edge) { + edge.phiInput(phi, inp) and + // Conservatively assume that every edge is a back edge if we don't have dominance information. + ( + phi.getBasicBlock().bbDominates(edge.getOrigBlock()) or + not edge.getOrigBlock().hasDominanceInformation() + ) +} diff --git a/cpp/ql/lib/semmle/code/cpp/rangeanalysis/new/internal/semantic/SemanticType.qll b/cpp/ql/lib/semmle/code/cpp/rangeanalysis/new/internal/semantic/SemanticType.qll new file mode 100644 index 00000000000..b86db02702c --- /dev/null +++ b/cpp/ql/lib/semmle/code/cpp/rangeanalysis/new/internal/semantic/SemanticType.qll @@ -0,0 +1,311 @@ +/** + * Minimal, language-neutral type system for semantic analysis. + */ + +private import SemanticTypeSpecific as Specific + +class LanguageType = Specific::Type; + +cached +private newtype TSemType = + TSemVoidType() { Specific::voidType(_) } or + TSemUnknownType() { Specific::unknownType(_) } or + TSemErrorType() { Specific::errorType(_) } or + TSemBooleanType(int byteSize) { Specific::booleanType(_, byteSize) } or + TSemIntegerType(int byteSize, boolean signed) { Specific::integerType(_, byteSize, signed) } or + TSemFloatingPointType(int byteSize) { Specific::floatingPointType(_, byteSize) } or + TSemAddressType(int byteSize) { Specific::addressType(_, byteSize) } or + TSemFunctionAddressType(int byteSize) { Specific::functionAddressType(_, byteSize) } or + TSemOpaqueType(int byteSize, Specific::OpaqueTypeTag tag) { + Specific::opaqueType(_, byteSize, tag) + } + +/** + * The language-neutral type of a semantic expression, + * The interface to `SemType` and its subclasses is the same across all languages for which the IR + * is supported, so analyses that expect to be used for multiple languages should generally use + * `SemType` rather than a language-specific type. + * + * Many types from the language-specific type system will map to a single canonical `SemType`. Two + * types that map to the same `SemType` are considered equivalent by semantic analysis. As an + * example, in C++, all pointer types map to the same instance of `SemAddressType`. + */ +class SemType extends TSemType { + /** Gets a textual representation of this type. */ + string toString() { none() } + + /** + * Gets a string that uniquely identifies this `SemType`. This string is often the same as the + * result of `SemType.toString()`, but for some types it may be more verbose to ensure uniqueness. + */ + string getIdentityString() { result = toString() } + + /** + * Gets the size of the type, in bytes, if known. + * + * This will hold for all `SemType` objects except `SemUnknownType` and `SemErrorType`. + */ + // This predicate is overridden with `pragma[noinline]` in every leaf subclass. + // This allows callers to ask for things like _the_ floating-point type of + // size 4 without getting a join that first finds all types of size 4 and + // _then_ restricts them to floating-point types. + int getByteSize() { none() } +} + +/** + * An unknown type. Generally used to represent results and operands that access an unknown set of + * memory locations, such as the side effects of a function call. + */ +class SemUnknownType extends SemType, TSemUnknownType { + final override string toString() { result = "unknown" } + + final override int getByteSize() { none() } +} + +/** + * A void type, which has no values. Used to represent the result type of an expression that does + * not produce a result. + */ +class SemVoidType extends SemType, TSemVoidType { + final override string toString() { result = "void" } + + final override int getByteSize() { result = 0 } +} + +/** + * An error type. Used when an error in the source code prevents the extractor from determining the + * proper type. + */ +class SemErrorType extends SemType, TSemErrorType { + final override string toString() { result = "error" } + + final override int getByteSize() { result = 0 } +} + +private class SemSizedType extends SemType { + int byteSize; + + SemSizedType() { + this = TSemBooleanType(byteSize) or + this = TSemIntegerType(byteSize, _) or + this = TSemFloatingPointType(byteSize) or + this = TSemAddressType(byteSize) or + this = TSemFunctionAddressType(byteSize) or + this = TSemOpaqueType(byteSize, _) + } + // Don't override `getByteSize()` here. The optimizer seems to generate better code when this is + // overridden only in the leaf classes. +} + +/** + * A Boolean type, which can hold the values `true` (non-zero) or `false` (zero). + */ +class SemBooleanType extends SemSizedType, TSemBooleanType { + final override string toString() { result = "bool" + byteSize.toString() } + + pragma[noinline] + final override int getByteSize() { result = byteSize } +} + +/** + * A numeric type. This includes `SemSignedIntegerType`, `SemUnsignedIntegerType`, and + * `SemFloatingPointType`. + */ +class SemNumericType extends SemSizedType { + SemNumericType() { + this = TSemIntegerType(byteSize, _) or + this = TSemFloatingPointType(byteSize) + } + // Don't override `getByteSize()` here. The optimizer seems to generate better code when this is + // overridden only in the leaf classes. +} + +/** + * An integer type. This includes `SemSignedIntegerType` and `SemUnsignedIntegerType`. + */ +class SemIntegerType extends SemNumericType { + boolean signed; + + SemIntegerType() { this = TSemIntegerType(byteSize, signed) } + + /** Holds if this integer type is signed. */ + final predicate isSigned() { signed = true } + + /** Holds if this integer type is unsigned. */ + final predicate isUnsigned() { not isSigned() } + // Don't override `getByteSize()` here. The optimizer seems to generate better code when this is + // overridden only in the leaf classes. +} + +/** + * A signed two's-complement integer. Also used to represent enums whose underlying type is a signed + * integer, as well as character types whose representation is signed. + */ +class SemSignedIntegerType extends SemIntegerType { + SemSignedIntegerType() { signed = true } + + final override string toString() { result = "int" + byteSize.toString() } + + pragma[noinline] + final override int getByteSize() { result = byteSize } +} + +/** + * An unsigned two's-complement integer. Also used to represent enums whose underlying type is an + * unsigned integer, as well as character types whose representation is unsigned. + */ +class SemUnsignedIntegerType extends SemIntegerType { + SemUnsignedIntegerType() { signed = false } + + final override string toString() { result = "uint" + byteSize.toString() } + + pragma[noinline] + final override int getByteSize() { result = byteSize } +} + +/** + * A floating-point type. + */ +class SemFloatingPointType extends SemNumericType, TSemFloatingPointType { + final override string toString() { result = "float" + byteSize.toString() } + + pragma[noinline] + final override int getByteSize() { result = byteSize } +} + +/** + * An address type, representing the memory address of data. Used to represent pointers, references, + * and lvalues, include those that are garbage collected. + * + * The address of a function is represented by the separate `SemFunctionAddressType`. + */ +class SemAddressType extends SemSizedType, TSemAddressType { + final override string toString() { result = "addr" + byteSize.toString() } + + pragma[noinline] + final override int getByteSize() { result = byteSize } +} + +/** + * An address type, representing the memory address of code. Used to represent function pointers, + * function references, and the target of a direct function call. + */ +class SemFunctionAddressType extends SemSizedType, TSemFunctionAddressType { + final override string toString() { result = "func" + byteSize.toString() } + + pragma[noinline] + final override int getByteSize() { result = byteSize } +} + +/** + * A type with known size that does not fit any of the other kinds of type. Used to represent + * classes, structs, unions, fixed-size arrays, pointers-to-member, and more. + */ +class SemOpaqueType extends SemSizedType, TSemOpaqueType { + Specific::OpaqueTypeTag tag; + + SemOpaqueType() { this = TSemOpaqueType(byteSize, tag) } + + final override string toString() { + result = "opaque" + byteSize.toString() + "{" + tag.toString() + "}" + } + + final override string getIdentityString() { + result = "opaque" + byteSize.toString() + "{" + Specific::getOpaqueTagIdentityString(tag) + "}" + } + + /** + * Gets the "tag" that differentiates this type from other incompatible opaque types that have the + * same size. + */ + final Specific::OpaqueTypeTag getTag() { result = tag } + + pragma[noinline] + final override int getByteSize() { result = byteSize } +} + +cached +SemType getSemanticType(Specific::Type type) { + exists(int byteSize | + Specific::booleanType(type, byteSize) and result = TSemBooleanType(byteSize) + or + exists(boolean signed | + Specific::integerType(type, byteSize, signed) and + result = TSemIntegerType(byteSize, signed) + ) + or + Specific::floatingPointType(type, byteSize) and result = TSemFloatingPointType(byteSize) + or + Specific::addressType(type, byteSize) and result = TSemAddressType(byteSize) + or + Specific::functionAddressType(type, byteSize) and result = TSemFunctionAddressType(byteSize) + or + exists(Specific::OpaqueTypeTag tag | + Specific::opaqueType(type, byteSize, tag) and result = TSemOpaqueType(byteSize, tag) + ) + ) + or + Specific::errorType(type) and result = TSemErrorType() + or + Specific::unknownType(type) and result = TSemUnknownType() +} + +private class SemNumericOrBooleanType extends SemSizedType { + SemNumericOrBooleanType() { + this instanceof SemNumericType + or + this instanceof SemBooleanType + } +} + +/** + * Holds if the conversion from `fromType` to `toType` can never overflow or underflow. + */ +predicate conversionCannotOverflow(SemNumericOrBooleanType fromType, SemNumericOrBooleanType toType) { + // Identity cast + fromType = toType + or + // Treat any cast to an FP type as safe. It can lose precision, but not overflow. + toType instanceof SemFloatingPointType and fromType = any(SemNumericType n) + or + fromType instanceof SemBooleanType and toType instanceof SemIntegerType + or + exists(SemIntegerType fromInteger, SemIntegerType toInteger, int fromSize, int toSize | + fromInteger = fromType and + toInteger = toType and + fromSize = fromInteger.getByteSize() and + toSize = toInteger.getByteSize() + | + // Conversion to a larger type. Safe unless converting signed -> unsigned. + fromSize < toSize and + ( + toInteger.isSigned() + or + not fromInteger.isSigned() + ) + ) +} + +/** + * INTERNAL: Do not use. + * Query predicates used to check invariants that should hold for all `SemType` objects. + */ +module SemTypeConsistency { + /** + * Holds if the type has no result for `getSemanticType()`. + */ + query predicate missingSemType(Specific::Type type, string message) { + not exists(getSemanticType(type)) and + message = "`Type` does not have a corresponding `SemType`." + } + + /** + * Holds if the type has more than one result for `getSemanticType()`. + */ + query predicate multipleSemTypes(Specific::Type type, string message) { + strictcount(getSemanticType(type)) > 1 and + message = + "`Type` " + type + " has multiple `SemType`s: " + + concat(getSemanticType(type).toString(), ", ") + } +} diff --git a/cpp/ql/lib/semmle/code/cpp/rangeanalysis/new/internal/semantic/SemanticTypeSpecific.qll b/cpp/ql/lib/semmle/code/cpp/rangeanalysis/new/internal/semantic/SemanticTypeSpecific.qll new file mode 100644 index 00000000000..e301587de2f --- /dev/null +++ b/cpp/ql/lib/semmle/code/cpp/rangeanalysis/new/internal/semantic/SemanticTypeSpecific.qll @@ -0,0 +1,43 @@ +/** + * C++-specific implementation of the semantic type system. + */ + +private import semmle.code.cpp.ir.IR as IR +private import cpp as Cpp +private import semmle.code.cpp.ir.internal.IRCppLanguage as Language + +class Type = IR::IRType; + +class OpaqueTypeTag = Language::OpaqueTypeTag; + +predicate voidType(Type type) { type instanceof IR::IRVoidType } + +predicate errorType(Type type) { type instanceof IR::IRErrorType } + +predicate unknownType(Type type) { type instanceof IR::IRUnknownType } + +predicate booleanType(Type type, int byteSize) { byteSize = type.(IR::IRBooleanType).getByteSize() } + +predicate integerType(Type type, int byteSize, boolean signed) { + byteSize = type.(IR::IRSignedIntegerType).getByteSize() and signed = true + or + byteSize = type.(IR::IRUnsignedIntegerType).getByteSize() and signed = false +} + +predicate floatingPointType(Type type, int byteSize) { + byteSize = type.(IR::IRFloatingPointType).getByteSize() +} + +predicate addressType(Type type, int byteSize) { byteSize = type.(IR::IRAddressType).getByteSize() } + +predicate functionAddressType(Type type, int byteSize) { + byteSize = type.(IR::IRFunctionAddressType).getByteSize() +} + +predicate opaqueType(Type type, int byteSize, OpaqueTypeTag tag) { + exists(IR::IROpaqueType opaque | opaque = type | + byteSize = opaque.getByteSize() and tag = opaque.getTag() + ) +} + +predicate getOpaqueTagIdentityString = Language::getOpaqueTagIdentityString/1; diff --git a/cpp/ql/lib/semmle/code/cpp/rangeanalysis/new/internal/semantic/analysis/Bound.qll b/cpp/ql/lib/semmle/code/cpp/rangeanalysis/new/internal/semantic/analysis/Bound.qll new file mode 100644 index 00000000000..abff447ca87 --- /dev/null +++ b/cpp/ql/lib/semmle/code/cpp/rangeanalysis/new/internal/semantic/analysis/Bound.qll @@ -0,0 +1,86 @@ +import cpp +private import semmle.code.cpp.ir.IR +private import semmle.code.cpp.ir.ValueNumbering + +private newtype TBound = + TBoundZero() or + TBoundValueNumber(ValueNumber vn) { + exists(Instruction i | + vn.getAnInstruction() = i and + ( + i.getResultIRType() instanceof IRIntegerType or + i.getResultIRType() instanceof IRAddressType + ) and + not vn.getAnInstruction() instanceof ConstantInstruction + | + i instanceof PhiInstruction + or + i instanceof InitializeParameterInstruction + or + i instanceof CallInstruction + or + i instanceof VariableAddressInstruction + or + i instanceof FieldAddressInstruction + or + i.(LoadInstruction).getSourceAddress() instanceof VariableAddressInstruction + or + i.(LoadInstruction).getSourceAddress() instanceof FieldAddressInstruction + or + i.getAUse() instanceof ArgumentOperand + or + i instanceof PointerArithmeticInstruction + or + i.getAUse() instanceof AddressOperand + ) + } + +/** + * A bound that may be inferred for an expression plus/minus an integer delta. + */ +abstract class Bound extends TBound { + abstract string toString(); + + /** Gets an expression that equals this bound plus `delta`. */ + abstract Instruction getInstruction(int delta); + + /** Gets an expression that equals this bound. */ + Instruction getInstruction() { result = getInstruction(0) } + + abstract Location getLocation(); +} + +/** + * The bound that corresponds to the integer 0. This is used to represent all + * integer bounds as bounds are always accompanied by an added integer delta. + */ +class ZeroBound extends Bound, TBoundZero { + override string toString() { result = "0" } + + override Instruction getInstruction(int delta) { + result.(ConstantValueInstruction).getValue().toInt() = delta + } + + override Location getLocation() { result instanceof UnknownDefaultLocation } +} + +/** + * A bound corresponding to the value of an `Instruction`. + */ +class ValueNumberBound extends Bound, TBoundValueNumber { + ValueNumber vn; + + ValueNumberBound() { this = TBoundValueNumber(vn) } + + /** Gets an `Instruction` that equals this bound. */ + override Instruction getInstruction(int delta) { + this = TBoundValueNumber(valueNumber(result)) and delta = 0 + } + + override string toString() { result = "ValueNumberBound" } + + override Location getLocation() { result = vn.getLocation() } + + /** Gets the value number that equals this bound. */ + ValueNumber getValueNumber() { result = vn } +} diff --git a/cpp/ql/lib/semmle/code/cpp/rangeanalysis/new/internal/semantic/analysis/ConstantAnalysis.qll b/cpp/ql/lib/semmle/code/cpp/rangeanalysis/new/internal/semantic/analysis/ConstantAnalysis.qll new file mode 100644 index 00000000000..c01b64e73d6 --- /dev/null +++ b/cpp/ql/lib/semmle/code/cpp/rangeanalysis/new/internal/semantic/analysis/ConstantAnalysis.qll @@ -0,0 +1,31 @@ +/** + * Simple constant analysis using the Semantic interface. + */ + +private import semmle.code.cpp.rangeanalysis.new.internal.semantic.Semantic +private import ConstantAnalysisSpecific as Specific + +/** An expression that always has the same integer value. */ +pragma[nomagic] +private predicate constantIntegerExpr(SemExpr e, int val) { + // An integer literal + e.(SemIntegerLiteralExpr).getIntValue() = val + or + // Copy of another constant + exists(SemSsaExplicitUpdate v, SemExpr src | + e = v.getAUse() and + src = v.getSourceExpr() and + constantIntegerExpr(src, val) + ) + or + // Language-specific enhancements + val = Specific::getIntConstantValue(e) +} + +/** An expression that always has the same integer value. */ +class SemConstantIntegerExpr extends SemExpr { + SemConstantIntegerExpr() { constantIntegerExpr(this, _) } + + /** Gets the integer value of this expression. */ + int getIntValue() { constantIntegerExpr(this, result) } +} diff --git a/cpp/ql/lib/semmle/code/cpp/rangeanalysis/new/internal/semantic/analysis/ConstantAnalysisSpecific.qll b/cpp/ql/lib/semmle/code/cpp/rangeanalysis/new/internal/semantic/analysis/ConstantAnalysisSpecific.qll new file mode 100644 index 00000000000..4713a10ebfc --- /dev/null +++ b/cpp/ql/lib/semmle/code/cpp/rangeanalysis/new/internal/semantic/analysis/ConstantAnalysisSpecific.qll @@ -0,0 +1,10 @@ +/** + * C++-specific implementation of constant analysis. + */ + +private import semmle.code.cpp.rangeanalysis.new.internal.semantic.Semantic + +/** + * Gets the constant integer value of the specified expression, if any. + */ +int getIntConstantValue(SemExpr expr) { none() } diff --git a/cpp/ql/lib/semmle/code/cpp/rangeanalysis/new/internal/semantic/analysis/FloatDelta.qll b/cpp/ql/lib/semmle/code/cpp/rangeanalysis/new/internal/semantic/analysis/FloatDelta.qll new file mode 100644 index 00000000000..1dfcc445208 --- /dev/null +++ b/cpp/ql/lib/semmle/code/cpp/rangeanalysis/new/internal/semantic/analysis/FloatDelta.qll @@ -0,0 +1,20 @@ +private import RangeAnalysisStage + +module FloatDelta implements DeltaSig { + class Delta = float; + + bindingset[d] + bindingset[result] + float toFloat(Delta d) { result = d } + + bindingset[d] + bindingset[result] + int toInt(Delta d) { result = d } + + bindingset[n] + bindingset[result] + Delta fromInt(int n) { result = n } + + bindingset[f] + Delta fromFloat(float f) { result = f } +} diff --git a/cpp/ql/lib/semmle/code/cpp/rangeanalysis/new/internal/semantic/analysis/IntDelta.qll b/cpp/ql/lib/semmle/code/cpp/rangeanalysis/new/internal/semantic/analysis/IntDelta.qll new file mode 100644 index 00000000000..83698b56229 --- /dev/null +++ b/cpp/ql/lib/semmle/code/cpp/rangeanalysis/new/internal/semantic/analysis/IntDelta.qll @@ -0,0 +1,29 @@ +private import RangeAnalysisStage + +module IntDelta implements DeltaSig { + class Delta = int; + + bindingset[d] + bindingset[result] + float toFloat(Delta d) { result = d } + + bindingset[d] + bindingset[result] + int toInt(Delta d) { result = d } + + bindingset[n] + bindingset[result] + Delta fromInt(int n) { result = n } + + bindingset[f] + Delta fromFloat(float f) { + result = + min(float diff, float res | + diff = (res - f) and res = f.ceil() + or + diff = (f - res) and res = f.floor() + | + res order by diff + ) + } +} diff --git a/cpp/ql/lib/semmle/code/cpp/rangeanalysis/new/internal/semantic/analysis/ModulusAnalysis.qll b/cpp/ql/lib/semmle/code/cpp/rangeanalysis/new/internal/semantic/analysis/ModulusAnalysis.qll new file mode 100644 index 00000000000..a05e948a2b0 --- /dev/null +++ b/cpp/ql/lib/semmle/code/cpp/rangeanalysis/new/internal/semantic/analysis/ModulusAnalysis.qll @@ -0,0 +1,341 @@ +/** + * Provides inferences of the form: `e` equals `b + v` modulo `m` where `e` is + * an expression, `b` is a `Bound` (typically zero or the value of an SSA + * variable), and `v` is an integer in the range `[0 .. m-1]`. + */ + +/* + * The main recursion has base cases in both `ssaModulus` (for guarded reads) and `semExprModulus` + * (for constant values). The most interesting recursive case is `phiModulusRankStep`, which + * handles phi inputs. + */ + +private import ModulusAnalysisSpecific::Private +private import semmle.code.cpp.rangeanalysis.new.internal.semantic.Semantic +private import ConstantAnalysis +private import RangeUtils +private import RangeAnalysisStage + +module ModulusAnalysis Bounds, UtilSig U> { + /** + * Holds if `e + delta` equals `v` at `pos`. + */ + private predicate valueFlowStepSsa(SemSsaVariable v, SemSsaReadPosition pos, SemExpr e, int delta) { + U::semSsaUpdateStep(v, e, D::fromInt(delta)) and pos.hasReadOfVar(v) + or + exists(SemGuard guard, boolean testIsTrue | + pos.hasReadOfVar(v) and + guard = U::semEqFlowCond(v, e, D::fromInt(delta), true, testIsTrue) and + semGuardDirectlyControlsSsaRead(guard, pos, testIsTrue) + ) + } + + /** + * Holds if `add` is the addition of `larg` and `rarg`, neither of which are + * `ConstantIntegerExpr`s. + */ + private predicate nonConstAddition(SemExpr add, SemExpr larg, SemExpr rarg) { + exists(SemAddExpr a | a = add | + larg = a.getLeftOperand() and + rarg = a.getRightOperand() + ) and + not larg instanceof SemConstantIntegerExpr and + not rarg instanceof SemConstantIntegerExpr + } + + /** + * Holds if `sub` is the subtraction of `larg` and `rarg`, where `rarg` is not + * a `ConstantIntegerExpr`. + */ + private predicate nonConstSubtraction(SemExpr sub, SemExpr larg, SemExpr rarg) { + exists(SemSubExpr s | s = sub | + larg = s.getLeftOperand() and + rarg = s.getRightOperand() + ) and + not rarg instanceof SemConstantIntegerExpr + } + + /** Gets an expression that is the remainder modulo `mod` of `arg`. */ + private SemExpr modExpr(SemExpr arg, int mod) { + exists(SemRemExpr rem | + result = rem and + arg = rem.getLeftOperand() and + rem.getRightOperand().(SemConstantIntegerExpr).getIntValue() = mod and + mod >= 2 + ) + or + exists(SemConstantIntegerExpr c | + mod = 2.pow([1 .. 30]) and + c.getIntValue() = mod - 1 and + result.(SemBitAndExpr).hasOperands(arg, c) + ) + } + + /** + * Gets a guard that tests whether `v` is congruent with `val` modulo `mod` on + * its `testIsTrue` branch. + */ + private SemGuard moduloCheck(SemSsaVariable v, int val, int mod, boolean testIsTrue) { + exists(SemExpr rem, SemConstantIntegerExpr c, int r, boolean polarity | + result.isEquality(rem, c, polarity) and + c.getIntValue() = r and + rem = modExpr(v.getAUse(), mod) and + ( + testIsTrue = polarity and val = r + or + testIsTrue = polarity.booleanNot() and + mod = 2 and + val = 1 - r and + (r = 0 or r = 1) + ) + ) + } + + /** + * Holds if a guard ensures that `v` at `pos` is congruent with `val` modulo `mod`. + */ + private predicate moduloGuardedRead(SemSsaVariable v, SemSsaReadPosition pos, int val, int mod) { + exists(SemGuard guard, boolean testIsTrue | + pos.hasReadOfVar(v) and + guard = moduloCheck(v, val, mod, testIsTrue) and + semGuardControlsSsaRead(guard, pos, testIsTrue) + ) + } + + /** Holds if `factor` is a power of 2 that divides `mask`. */ + bindingset[mask] + private predicate andmaskFactor(int mask, int factor) { + mask % factor = 0 and + factor = 2.pow([1 .. 30]) + } + + /** Holds if `e` is evenly divisible by `factor`. */ + private predicate evenlyDivisibleExpr(SemExpr e, int factor) { + exists(SemConstantIntegerExpr c, int k | k = c.getIntValue() | + e.(SemMulExpr).getAnOperand() = c and factor = k.abs() and factor >= 2 + or + e.(SemShiftLeftExpr).getRightOperand() = c and factor = 2.pow(k) and k > 0 + or + e.(SemBitAndExpr).getAnOperand() = c and factor = max(int f | andmaskFactor(k, f)) + ) + } + + /** + * Holds if `rix` is the number of input edges to `phi`. + */ + private predicate maxPhiInputRank(SemSsaPhiNode phi, int rix) { + rix = max(int r | rankedPhiInput(phi, _, _, r)) + } + + /** + * Gets the remainder of `val` modulo `mod`. + * + * For `mod = 0` the result equals `val` and for `mod > 1` the result is within + * the range `[0 .. mod-1]`. + */ + bindingset[val, mod] + private int remainder(int val, int mod) { + mod = 0 and result = val + or + mod > 1 and result = ((val % mod) + mod) % mod + } + + /** + * Holds if `inp` is an input to `phi` and equals `phi` modulo `mod` along `edge`. + */ + private predicate phiSelfModulus( + SemSsaPhiNode phi, SemSsaVariable inp, SemSsaReadPositionPhiInputEdge edge, int mod + ) { + exists(Bounds::SemSsaBound phibound, int v, int m | + edge.phiInput(phi, inp) and + phibound.getAVariable() = phi and + ssaModulus(inp, edge, phibound, v, m) and + mod = m.gcd(v) and + mod != 1 + ) + } + + /** + * Holds if `b + val` modulo `mod` is a candidate congruence class for `phi`. + */ + private predicate phiModulusInit(SemSsaPhiNode phi, Bounds::SemBound b, int val, int mod) { + exists(SemSsaVariable inp, SemSsaReadPositionPhiInputEdge edge | + edge.phiInput(phi, inp) and + ssaModulus(inp, edge, b, val, mod) + ) + } + + /** + * Holds if all inputs to `phi` numbered `1` to `rix` are equal to `b + val` modulo `mod`. + */ + pragma[nomagic] + private predicate phiModulusRankStep( + SemSsaPhiNode phi, Bounds::SemBound b, int val, int mod, int rix + ) { + /* + * base case. If any phi input is equal to `b + val` modulo `mod`, that's a potential congruence + * class for the phi node. + */ + + rix = 0 and + phiModulusInit(phi, b, val, mod) + or + exists(SemSsaVariable inp, SemSsaReadPositionPhiInputEdge edge, int v1, int m1 | + mod != 1 and + val = remainder(v1, mod) + | + /* + * Recursive case. If `inp` = `b + v2` mod `m2`, we combine that with the preceding potential + * congruence class `b + v1` mod `m1`. The result will be the congruence class of `v1` modulo + * the greatest common denominator of `m1`, `m2`, and `v1 - v2`. + */ + + exists(int v2, int m2 | + rankedPhiInput(pragma[only_bind_out](phi), inp, edge, rix) and + phiModulusRankStep(phi, b, v1, m1, rix - 1) and + ssaModulus(inp, edge, b, v2, m2) and + mod = m1.gcd(m2).gcd(v1 - v2) + ) + or + /* + * Recursive case. If `inp` = `phi` mod `m2`, we combine that with the preceding potential + * congruence class `b + v1` mod `m1`. The result will be a congruence class modulo the greatest + * common denominator of `m1` and `m2`. + */ + + exists(int m2 | + rankedPhiInput(phi, inp, edge, rix) and + phiModulusRankStep(phi, b, v1, m1, rix - 1) and + phiSelfModulus(phi, inp, edge, m2) and + mod = m1.gcd(m2) + ) + ) + } + + /** + * Holds if `phi` is equal to `b + val` modulo `mod`. + */ + private predicate phiModulus(SemSsaPhiNode phi, Bounds::SemBound b, int val, int mod) { + exists(int r | + maxPhiInputRank(phi, r) and + phiModulusRankStep(phi, b, val, mod, r) + ) + } + + /** + * Holds if `v` at `pos` is equal to `b + val` modulo `mod`. + */ + private predicate ssaModulus( + SemSsaVariable v, SemSsaReadPosition pos, Bounds::SemBound b, int val, int mod + ) { + phiModulus(v, b, val, mod) and pos.hasReadOfVar(v) + or + b.(Bounds::SemSsaBound).getAVariable() = v and pos.hasReadOfVar(v) and val = 0 and mod = 0 + or + exists(SemExpr e, int val0, int delta | + semExprModulus(e, b, val0, mod) and + valueFlowStepSsa(v, pos, e, delta) and + val = remainder(val0 + delta, mod) + ) + or + moduloGuardedRead(v, pos, val, mod) and b instanceof Bounds::SemZeroBound + } + + /** + * Holds if `e` is equal to `b + val` modulo `mod`. + * + * There are two cases for the modulus: + * - `mod = 0`: The equality `e = b + val` is an ordinary equality. + * - `mod > 1`: `val` lies within the range `[0 .. mod-1]`. + */ + cached + predicate semExprModulus(SemExpr e, Bounds::SemBound b, int val, int mod) { + not ignoreExprModulus(e) and + ( + e = b.getExpr(D::fromInt(val)) and mod = 0 + or + evenlyDivisibleExpr(e, mod) and + val = 0 and + b instanceof Bounds::SemZeroBound + or + exists(SemSsaVariable v, SemSsaReadPositionBlock bb | + ssaModulus(v, bb, b, val, mod) and + e = v.getAUse() and + bb.getAnExpr() = e + ) + or + exists(SemExpr mid, int val0, int delta | + semExprModulus(mid, b, val0, mod) and + U::semValueFlowStep(e, mid, D::fromInt(delta)) and + val = remainder(val0 + delta, mod) + ) + or + exists(SemConditionalExpr cond, int v1, int v2, int m1, int m2 | + cond = e and + condExprBranchModulus(cond, true, b, v1, m1) and + condExprBranchModulus(cond, false, b, v2, m2) and + mod = m1.gcd(m2).gcd(v1 - v2) and + mod != 1 and + val = remainder(v1, mod) + ) + or + exists(Bounds::SemBound b1, Bounds::SemBound b2, int v1, int v2, int m1, int m2 | + addModulus(e, true, b1, v1, m1) and + addModulus(e, false, b2, v2, m2) and + mod = m1.gcd(m2) and + mod != 1 and + val = remainder(v1 + v2, mod) + | + b = b1 and b2 instanceof Bounds::SemZeroBound + or + b = b2 and b1 instanceof Bounds::SemZeroBound + ) + or + exists(int v1, int v2, int m1, int m2 | + subModulus(e, true, b, v1, m1) and + subModulus(e, false, any(Bounds::SemZeroBound zb), v2, m2) and + mod = m1.gcd(m2) and + mod != 1 and + val = remainder(v1 - v2, mod) + ) + ) + } + + private predicate condExprBranchModulus( + SemConditionalExpr cond, boolean branch, Bounds::SemBound b, int val, int mod + ) { + semExprModulus(cond.getBranchExpr(branch), b, val, mod) + } + + private predicate addModulus(SemExpr add, boolean isLeft, Bounds::SemBound b, int val, int mod) { + exists(SemExpr larg, SemExpr rarg | nonConstAddition(add, larg, rarg) | + semExprModulus(larg, b, val, mod) and isLeft = true + or + semExprModulus(rarg, b, val, mod) and isLeft = false + ) + } + + private predicate subModulus(SemExpr sub, boolean isLeft, Bounds::SemBound b, int val, int mod) { + exists(SemExpr larg, SemExpr rarg | nonConstSubtraction(sub, larg, rarg) | + semExprModulus(larg, b, val, mod) and isLeft = true + or + semExprModulus(rarg, b, val, mod) and isLeft = false + ) + } + + /** + * Holds if `inp` is an input to `phi` along `edge` and this input has index `r` + * in an arbitrary 1-based numbering of the input edges to `phi`. + */ + private predicate rankedPhiInput( + SemSsaPhiNode phi, SemSsaVariable inp, SemSsaReadPositionPhiInputEdge edge, int r + ) { + edge.phiInput(phi, inp) and + edge = + rank[r](SemSsaReadPositionPhiInputEdge e | + e.phiInput(phi, _) + | + e order by e.getOrigBlock().getUniqueId() + ) + } +} diff --git a/cpp/ql/lib/semmle/code/cpp/rangeanalysis/new/internal/semantic/analysis/ModulusAnalysisSpecific.qll b/cpp/ql/lib/semmle/code/cpp/rangeanalysis/new/internal/semantic/analysis/ModulusAnalysisSpecific.qll new file mode 100644 index 00000000000..c0e7287e23b --- /dev/null +++ b/cpp/ql/lib/semmle/code/cpp/rangeanalysis/new/internal/semantic/analysis/ModulusAnalysisSpecific.qll @@ -0,0 +1,8 @@ +/** + * C++-specific implementation of modulus analysis. + */ +module Private { + private import semmle.code.cpp.rangeanalysis.new.internal.semantic.Semantic + + predicate ignoreExprModulus(SemExpr e) { none() } +} diff --git a/cpp/ql/lib/semmle/code/cpp/rangeanalysis/new/internal/semantic/analysis/RangeAnalysis.qll b/cpp/ql/lib/semmle/code/cpp/rangeanalysis/new/internal/semantic/analysis/RangeAnalysis.qll new file mode 100644 index 00000000000..5d15aa30fb4 --- /dev/null +++ b/cpp/ql/lib/semmle/code/cpp/rangeanalysis/new/internal/semantic/analysis/RangeAnalysis.qll @@ -0,0 +1,2 @@ +import RangeAnalysisImpl +import semmle.code.cpp.rangeanalysis.new.internal.semantic.SemanticBound diff --git a/cpp/ql/lib/semmle/code/cpp/rangeanalysis/new/internal/semantic/analysis/RangeAnalysisImpl.qll b/cpp/ql/lib/semmle/code/cpp/rangeanalysis/new/internal/semantic/analysis/RangeAnalysisImpl.qll new file mode 100644 index 00000000000..dd3a0f2e878 --- /dev/null +++ b/cpp/ql/lib/semmle/code/cpp/rangeanalysis/new/internal/semantic/analysis/RangeAnalysisImpl.qll @@ -0,0 +1,107 @@ +private import RangeAnalysisStage +private import RangeAnalysisSpecific +private import semmle.code.cpp.rangeanalysis.new.internal.semantic.analysis.FloatDelta +private import RangeUtils +private import semmle.code.cpp.rangeanalysis.new.internal.semantic.SemanticBound as SemanticBound +private import semmle.code.cpp.rangeanalysis.new.internal.semantic.SemanticLocation +private import semmle.code.cpp.rangeanalysis.new.internal.semantic.SemanticSSA + +module ConstantBounds implements BoundSig { + class SemBound instanceof SemanticBound::SemBound { + SemBound() { + this instanceof SemanticBound::SemZeroBound + or + this.(SemanticBound::SemSsaBound).getAVariable() instanceof SemSsaPhiNode + } + + string toString() { result = super.toString() } + + SemLocation getLocation() { result = super.getLocation() } + + SemExpr getExpr(float delta) { result = super.getExpr(delta) } + } + + class SemZeroBound extends SemBound instanceof SemanticBound::SemZeroBound { } + + class SemSsaBound extends SemBound instanceof SemanticBound::SemSsaBound { + SemSsaVariable getAVariable() { result = this.(SemanticBound::SemSsaBound).getAVariable() } + } +} + +private module RelativeBounds implements BoundSig { + class SemBound instanceof SemanticBound::SemBound { + SemBound() { not this instanceof SemanticBound::SemZeroBound } + + string toString() { result = super.toString() } + + SemLocation getLocation() { result = super.getLocation() } + + SemExpr getExpr(float delta) { result = super.getExpr(delta) } + } + + class SemZeroBound extends SemBound instanceof SemanticBound::SemZeroBound { } + + class SemSsaBound extends SemBound instanceof SemanticBound::SemSsaBound { + SemSsaVariable getAVariable() { result = this.(SemanticBound::SemSsaBound).getAVariable() } + } +} + +private module ConstantStage = + RangeStage>; + +private module RelativeStage = + RangeStage>; + +private newtype TSemReason = + TSemNoReason() or + TSemCondReason(SemGuard guard) { + guard = any(ConstantStage::SemCondReason reason).getCond() + or + guard = any(RelativeStage::SemCondReason reason).getCond() + } + +/** + * A reason for an inferred bound. This can either be `CondReason` if the bound + * is due to a specific condition, or `NoReason` if the bound is inferred + * without going through a bounding condition. + */ +abstract class SemReason extends TSemReason { + /** Gets a textual representation of this reason. */ + abstract string toString(); +} + +/** + * A reason for an inferred bound that indicates that the bound is inferred + * without going through a bounding condition. + */ +class SemNoReason extends SemReason, TSemNoReason { + override string toString() { result = "NoReason" } +} + +/** A reason for an inferred bound pointing to a condition. */ +class SemCondReason extends SemReason, TSemCondReason { + /** Gets the condition that is the reason for the bound. */ + SemGuard getCond() { this = TSemCondReason(result) } + + override string toString() { result = getCond().toString() } +} + +private ConstantStage::SemReason constantReason(SemReason reason) { + result instanceof ConstantStage::SemNoReason and reason instanceof SemNoReason + or + result.(ConstantStage::SemCondReason).getCond() = reason.(SemCondReason).getCond() +} + +private RelativeStage::SemReason relativeReason(SemReason reason) { + result instanceof RelativeStage::SemNoReason and reason instanceof SemNoReason + or + result.(RelativeStage::SemCondReason).getCond() = reason.(SemCondReason).getCond() +} + +predicate semBounded( + SemExpr e, SemanticBound::SemBound b, float delta, boolean upper, SemReason reason +) { + ConstantStage::semBounded(e, b, delta, upper, constantReason(reason)) + or + RelativeStage::semBounded(e, b, delta, upper, relativeReason(reason)) +} diff --git a/cpp/ql/lib/semmle/code/cpp/rangeanalysis/new/internal/semantic/analysis/RangeAnalysisSpecific.qll b/cpp/ql/lib/semmle/code/cpp/rangeanalysis/new/internal/semantic/analysis/RangeAnalysisSpecific.qll new file mode 100644 index 00000000000..2ddc20c8a33 --- /dev/null +++ b/cpp/ql/lib/semmle/code/cpp/rangeanalysis/new/internal/semantic/analysis/RangeAnalysisSpecific.qll @@ -0,0 +1,92 @@ +/** + * C++-specific implementation of range analysis. + */ + +private import semmle.code.cpp.rangeanalysis.new.internal.semantic.Semantic +private import RangeAnalysisStage +private import semmle.code.cpp.rangeanalysis.new.internal.semantic.analysis.FloatDelta + +module CppLangImpl implements LangSig { + /** + * Holds if the specified expression should be excluded from the result of `ssaRead()`. + * + * This predicate is to keep the results identical to the original Java implementation. It should be + * removed once we have the new implementation matching the old results exactly. + */ + predicate ignoreSsaReadCopy(SemExpr e) { none() } + + /** + * Ignore the bound on this expression. + * + * This predicate is to keep the results identical to the original Java implementation. It should be + * removed once we have the new implementation matching the old results exactly. + */ + predicate ignoreExprBound(SemExpr e) { none() } + + /** + * Ignore any inferred zero lower bound on this expression. + * + * This predicate is to keep the results identical to the original Java implementation. It should be + * removed once we have the new implementation matching the old results exactly. + */ + predicate ignoreZeroLowerBound(SemExpr e) { none() } + + /** + * Holds if the specified expression should be excluded from the result of `ssaRead()`. + * + * This predicate is to keep the results identical to the original Java implementation. It should be + * removed once we have the new implementation matching the old results exactly. + */ + predicate ignoreSsaReadArithmeticExpr(SemExpr e) { none() } + + /** + * Holds if the specified variable should be excluded from the result of `ssaRead()`. + * + * This predicate is to keep the results identical to the original Java implementation. It should be + * removed once we have the new implementation matching the old results exactly. + */ + predicate ignoreSsaReadAssignment(SemSsaVariable v) { none() } + + /** + * Adds additional results to `ssaRead()` that are specific to Java. + * + * This predicate handles propagation of offsets for post-increment and post-decrement expressions + * in exactly the same way as the old Java implementation. Once the new implementation matches the + * old one, we should remove this predicate and propagate deltas for all similar patterns, whether + * or not they come from a post-increment/decrement expression. + */ + SemExpr specificSsaRead(SemSsaVariable v, float delta) { none() } + + /** + * Holds if `e >= bound` (if `upper = false`) or `e <= bound` (if `upper = true`). + */ + predicate hasConstantBound(SemExpr e, float bound, boolean upper) { none() } + + /** + * Holds if `e >= bound + delta` (if `upper = false`) or `e <= bound + delta` (if `upper = true`). + */ + predicate hasBound(SemExpr e, SemExpr bound, float delta, boolean upper) { none() } + + /** + * Holds if the value of `dest` is known to be `src + delta`. + */ + predicate additionalValueFlowStep(SemExpr dest, SemExpr src, float delta) { none() } + + /** + * Gets the type that range analysis should use to track the result of the specified expression, + * if a type other than the original type of the expression is to be used. + * + * This predicate is commonly used in languages that support immutable "boxed" types that are + * actually references but whose values can be tracked as the type contained in the box. + */ + SemType getAlternateType(SemExpr e) { none() } + + /** + * Gets the type that range analysis should use to track the result of the specified source + * variable, if a type other than the original type of the expression is to be used. + * + * This predicate is commonly used in languages that support immutable "boxed" types that are + * actually references but whose values can be tracked as the type contained in the box. + */ + SemType getAlternateTypeForSsaVariable(SemSsaVariable var) { none() } +} diff --git a/cpp/ql/lib/semmle/code/cpp/rangeanalysis/new/internal/semantic/analysis/RangeAnalysisStage.qll b/cpp/ql/lib/semmle/code/cpp/rangeanalysis/new/internal/semantic/analysis/RangeAnalysisStage.qll new file mode 100644 index 00000000000..2cdf7b1e233 --- /dev/null +++ b/cpp/ql/lib/semmle/code/cpp/rangeanalysis/new/internal/semantic/analysis/RangeAnalysisStage.qll @@ -0,0 +1,1217 @@ +/** + * Provides classes and predicates for range analysis. + * + * An inferred bound can either be a specific integer, the abstract value of an + * SSA variable, or the abstract value of an interesting expression. The latter + * category includes array lengths that are not SSA variables. + * + * If an inferred bound relies directly on a condition, then this condition is + * reported as the reason for the bound. + */ + +/* + * This library tackles range analysis as a flow problem. Consider e.g.: + * ``` + * len = arr.length; + * if (x < len) { ... y = x-1; ... y ... } + * ``` + * In this case we would like to infer `y <= arr.length - 2`, and this is + * accomplished by tracking the bound through a sequence of steps: + * ``` + * arr.length --> len = .. --> x < len --> x-1 --> y = .. --> y + * ``` + * + * In its simplest form the step relation `E1 --> E2` relates two expressions + * such that `E1 <= B` implies `E2 <= B` for any `B` (with a second separate + * step relation handling lower bounds). Examples of such steps include + * assignments `E2 = E1` and conditions `x <= E1` where `E2` is a use of `x` + * guarded by the condition. + * + * In order to handle subtractions and additions with constants, and strict + * comparisons, the step relation is augmented with an integer delta. With this + * generalization `E1 --(delta)--> E2` relates two expressions and an integer + * such that `E1 <= B` implies `E2 <= B + delta` for any `B`. This corresponds + * to the predicate `boundFlowStep`. + * + * The complete range analysis is then implemented as the transitive closure of + * the step relation summing the deltas along the way. If `E1` transitively + * steps to `E2`, `delta` is the sum of deltas along the path, and `B` is an + * interesting bound equal to the value of `E1` then `E2 <= B + delta`. This + * corresponds to the predicate `bounded`. + * + * Phi nodes need a little bit of extra handling. Consider `x0 = phi(x1, x2)`. + * There are essentially two cases: + * - If `x1 <= B + d1` and `x2 <= B + d2` then `x0 <= B + max(d1,d2)`. + * - If `x1 <= B + d1` and `x2 <= x0 + d2` with `d2 <= 0` then `x0 <= B + d1`. + * The first case is for whenever a bound can be proven without taking looping + * into account. The second case is relevant when `x2` comes from a back-edge + * where we can prove that the variable has been non-increasing through the + * loop-iteration as this means that any upper bound that holds prior to the + * loop also holds for the variable during the loop. + * This generalizes to a phi node with `n` inputs, so if + * `x0 = phi(x1, ..., xn)` and `xi <= B + delta` for one of the inputs, then we + * also have `x0 <= B + delta` if we can prove either: + * - `xj <= B + d` with `d <= delta` or + * - `xj <= x0 + d` with `d <= 0` + * for each input `xj`. + * + * As all inferred bounds can be related directly to a path in the source code + * the only source of non-termination is if successive redundant (and thereby + * increasingly worse) bounds are calculated along a loop in the source code. + * We prevent this by weakening the bound to a small finite set of bounds when + * a path follows a second back-edge (we postpone weakening till the second + * back-edge as a precise bound might require traversing a loop once). + */ + +private import RangeUtils as Utils +private import SignAnalysisCommon +private import semmle.code.cpp.rangeanalysis.new.internal.semantic.analysis.ModulusAnalysis +import semmle.code.cpp.rangeanalysis.new.internal.semantic.SemanticExpr +import semmle.code.cpp.rangeanalysis.new.internal.semantic.SemanticSSA +import semmle.code.cpp.rangeanalysis.new.internal.semantic.SemanticGuard +import semmle.code.cpp.rangeanalysis.new.internal.semantic.SemanticCFG +import semmle.code.cpp.rangeanalysis.new.internal.semantic.SemanticType +import semmle.code.cpp.rangeanalysis.new.internal.semantic.SemanticOpcode +private import ConstantAnalysis +import semmle.code.cpp.rangeanalysis.new.internal.semantic.SemanticLocation + +/** + * Holds if `typ` is a small integral type with the given lower and upper bounds. + */ +private predicate typeBound(SemIntegerType typ, float lowerbound, float upperbound) { + exists(int bitSize | bitSize = typ.getByteSize() * 8 | + if typ.isSigned() + then ( + upperbound = 2.pow(bitSize - 1) - 1 and + lowerbound = -upperbound - 1 + ) else ( + lowerbound = 0 and + upperbound = 2.pow(bitSize) - 1 + ) + ) +} + +signature module DeltaSig { + bindingset[this] + class Delta; + + bindingset[d] + bindingset[result] + float toFloat(Delta d); + + bindingset[d] + bindingset[result] + int toInt(Delta d); + + bindingset[n] + bindingset[result] + Delta fromInt(int n); + + bindingset[f] + Delta fromFloat(float f); +} + +signature module LangSig { + /** + * Holds if the specified expression should be excluded from the result of `ssaRead()`. + * + * This predicate is to keep the results identical to the original Java implementation. It should be + * removed once we have the new implementation matching the old results exactly. + */ + predicate ignoreSsaReadCopy(SemExpr e); + + /** + * Holds if `e >= bound` (if `upper = false`) or `e <= bound` (if `upper = true`). + */ + predicate hasConstantBound(SemExpr e, D::Delta bound, boolean upper); + + /** + * Holds if `e >= bound + delta` (if `upper = false`) or `e <= bound + delta` (if `upper = true`). + */ + predicate hasBound(SemExpr e, SemExpr bound, D::Delta delta, boolean upper); + + /** + * Ignore the bound on this expression. + * + * This predicate is to keep the results identical to the original Java implementation. It should be + * removed once we have the new implementation matching the old results exactly. + */ + predicate ignoreExprBound(SemExpr e); + + /** + * Ignore any inferred zero lower bound on this expression. + * + * This predicate is to keep the results identical to the original Java implementation. It should be + * removed once we have the new implementation matching the old results exactly. + */ + predicate ignoreZeroLowerBound(SemExpr e); + + /** + * Holds if the specified expression should be excluded from the result of `ssaRead()`. + * + * This predicate is to keep the results identical to the original Java implementation. It should be + * removed once we have the new implementation matching the old results exactly. + */ + predicate ignoreSsaReadArithmeticExpr(SemExpr e); + + /** + * Holds if the specified variable should be excluded from the result of `ssaRead()`. + * + * This predicate is to keep the results identical to the original Java implementation. It should be + * removed once we have the new implementation matching the old results exactly. + */ + predicate ignoreSsaReadAssignment(SemSsaVariable v); + + /** + * Adds additional results to `ssaRead()` that are specific to Java. + * + * This predicate handles propagation of offsets for post-increment and post-decrement expressions + * in exactly the same way as the old Java implementation. Once the new implementation matches the + * old one, we should remove this predicate and propagate deltas for all similar patterns, whether + * or not they come from a post-increment/decrement expression. + */ + SemExpr specificSsaRead(SemSsaVariable v, D::Delta delta); + + /** + * Holds if the value of `dest` is known to be `src + delta`. + */ + predicate additionalValueFlowStep(SemExpr dest, SemExpr src, D::Delta delta); + + /** + * Gets the type that range analysis should use to track the result of the specified expression, + * if a type other than the original type of the expression is to be used. + * + * This predicate is commonly used in languages that support immutable "boxed" types that are + * actually references but whose values can be tracked as the type contained in the box. + */ + SemType getAlternateType(SemExpr e); + + /** + * Gets the type that range analysis should use to track the result of the specified source + * variable, if a type other than the original type of the expression is to be used. + * + * This predicate is commonly used in languages that support immutable "boxed" types that are + * actually references but whose values can be tracked as the type contained in the box. + */ + SemType getAlternateTypeForSsaVariable(SemSsaVariable var); +} + +signature module UtilSig { + SemExpr semSsaRead(SemSsaVariable v, DeltaParam::Delta delta); + + SemGuard semEqFlowCond( + SemSsaVariable v, SemExpr e, DeltaParam::Delta delta, boolean isEq, boolean testIsTrue + ); + + predicate semSsaUpdateStep(SemSsaExplicitUpdate v, SemExpr e, DeltaParam::Delta delta); + + predicate semValueFlowStep(SemExpr e2, SemExpr e1, DeltaParam::Delta delta); + + /** + * Gets the type used to track the specified source variable's range information. + * + * Usually, this just `e.getType()`, but the language can override this to track immutable boxed + * primitive types as the underlying primitive type. + */ + SemType getTrackedTypeForSsaVariable(SemSsaVariable var); + + /** + * Gets the type used to track the specified expression's range information. + * + * Usually, this just `e.getSemType()`, but the language can override this to track immutable boxed + * primitive types as the underlying primitive type. + */ + SemType getTrackedType(SemExpr e); +} + +signature module BoundSig { + class SemBound { + string toString(); + + SemLocation getLocation(); + + SemExpr getExpr(D::Delta delta); + } + + class SemZeroBound extends SemBound; + + class SemSsaBound extends SemBound { + SemSsaVariable getAVariable(); + } +} + +module RangeStage Bounds, LangSig LangParam, UtilSig UtilParam> { + private import Bounds + private import LangParam + private import UtilParam + private import D + + /** + * An expression that does conversion, boxing, or unboxing + */ + private class ConvertOrBoxExpr instanceof SemUnaryExpr { + ConvertOrBoxExpr() { + this instanceof SemConvertExpr + or + this instanceof SemBoxExpr + or + this instanceof SemUnboxExpr + } + + string toString() { result = super.toString() } + + SemExpr getOperand() { result = super.getOperand() } + } + + /** + * A cast that can be ignored for the purpose of range analysis. + */ + private class SafeCastExpr extends ConvertOrBoxExpr { + SafeCastExpr() { + conversionCannotOverflow(getTrackedType(pragma[only_bind_into](getOperand())), + pragma[only_bind_out](getTrackedType(this))) + } + } + + /** + * A cast to a small integral type that may overflow or underflow. + */ + private class NarrowingCastExpr extends ConvertOrBoxExpr { + NarrowingCastExpr() { + not this instanceof SafeCastExpr and + typeBound(getTrackedType(this), _, _) + } + + /** Gets the lower bound of the resulting type. */ + float getLowerBound() { typeBound(getTrackedType(this), result, _) } + + /** Gets the upper bound of the resulting type. */ + float getUpperBound() { typeBound(getTrackedType(this), _, result) } + } + + private module SignAnalysisInstantiated = SignAnalysis; // TODO: will this cause reevaluation if it's instantiated with the same DeltaSig and UtilParam multiple times? + + private import SignAnalysisInstantiated + + private module ModulusAnalysisInstantiated = ModulusAnalysis; // TODO: will this cause reevaluation if it's instantiated with the same DeltaSig and UtilParam multiple times? + + private import ModulusAnalysisInstantiated + + cached + private module RangeAnalysisCache { + cached + module RangeAnalysisPublic { + /** + * Holds if `b + delta` is a valid bound for `e`. + * - `upper = true` : `e <= b + delta` + * - `upper = false` : `e >= b + delta` + * + * The reason for the bound is given by `reason` and may be either a condition + * or `NoReason` if the bound was proven directly without the use of a bounding + * condition. + */ + cached + predicate semBounded(SemExpr e, SemBound b, D::Delta delta, boolean upper, SemReason reason) { + bounded(e, b, delta, upper, _, _, reason) and + bestBound(e, b, delta, upper) + } + } + + /** + * Holds if `guard = boundFlowCond(_, _, _, _, _) or guard = eqFlowCond(_, _, _, _, _)`. + */ + cached + predicate possibleReason(SemGuard guard) { + guard = boundFlowCond(_, _, _, _, _) or guard = semEqFlowCond(_, _, _, _, _) + } + } + + private import RangeAnalysisCache + import RangeAnalysisPublic + + /** + * Holds if `b + delta` is a valid bound for `e` and this is the best such delta. + * - `upper = true` : `e <= b + delta` + * - `upper = false` : `e >= b + delta` + */ + private predicate bestBound(SemExpr e, SemBound b, D::Delta delta, boolean upper) { + delta = min(D::Delta d | bounded(e, b, d, upper, _, _, _) | d order by D::toFloat(d)) and + upper = true + or + delta = max(D::Delta d | bounded(e, b, d, upper, _, _, _) | d order by D::toFloat(d)) and + upper = false + } + + /** + * Holds if `comp` corresponds to: + * - `upper = true` : `v <= e + delta` or `v < e + delta` + * - `upper = false` : `v >= e + delta` or `v > e + delta` + */ + private predicate boundCondition( + SemRelationalExpr comp, SemSsaVariable v, SemExpr e, D::Delta delta, boolean upper + ) { + comp.getLesserOperand() = semSsaRead(v, delta) and + e = comp.getGreaterOperand() and + upper = true + or + comp.getGreaterOperand() = semSsaRead(v, delta) and + e = comp.getLesserOperand() and + upper = false + or + exists(SemSubExpr sub, SemConstantIntegerExpr c, D::Delta d | + // (v - d) - e < c + comp.getLesserOperand() = sub and + comp.getGreaterOperand() = c and + sub.getLeftOperand() = semSsaRead(v, d) and + sub.getRightOperand() = e and + upper = true and + delta = D::fromFloat(D::toFloat(d) + c.getIntValue()) + or + // (v - d) - e > c + comp.getGreaterOperand() = sub and + comp.getLesserOperand() = c and + sub.getLeftOperand() = semSsaRead(v, d) and + sub.getRightOperand() = e and + upper = false and + delta = D::fromFloat(D::toFloat(d) + c.getIntValue()) + or + // e - (v - d) < c + comp.getLesserOperand() = sub and + comp.getGreaterOperand() = c and + sub.getLeftOperand() = e and + sub.getRightOperand() = semSsaRead(v, d) and + upper = false and + delta = D::fromFloat(D::toFloat(d) - c.getIntValue()) + or + // e - (v - d) > c + comp.getGreaterOperand() = sub and + comp.getLesserOperand() = c and + sub.getLeftOperand() = e and + sub.getRightOperand() = semSsaRead(v, d) and + upper = true and + delta = D::fromFloat(D::toFloat(d) - c.getIntValue()) + ) + } + + /** + * Holds if `comp` is a comparison between `x` and `y` for which `y - x` has a + * fixed value modulo some `mod > 1`, such that the comparison can be + * strengthened by `strengthen` when evaluating to `testIsTrue`. + */ + private predicate modulusComparison(SemRelationalExpr comp, boolean testIsTrue, int strengthen) { + exists( + SemBound b, int v1, int v2, int mod1, int mod2, int mod, boolean resultIsStrict, int d, int k + | + // If `x <= y` and `x =(mod) b + v1` and `y =(mod) b + v2` then + // `0 <= y - x =(mod) v2 - v1`. By choosing `k =(mod) v2 - v1` with + // `0 <= k < mod` we get `k <= y - x`. If the resulting comparison is + // strict then the strengthening amount is instead `k - 1` modulo `mod`: + // `x < y` means `0 <= y - x - 1 =(mod) k - 1` so `k - 1 <= y - x - 1` and + // thus `k - 1 < y - x` with `0 <= k - 1 < mod`. + semExprModulus(comp.getLesserOperand(), b, v1, mod1) and + semExprModulus(comp.getGreaterOperand(), b, v2, mod2) and + mod = mod1.gcd(mod2) and + mod != 1 and + (testIsTrue = true or testIsTrue = false) and + ( + if comp.isStrict() + then resultIsStrict = testIsTrue + else resultIsStrict = testIsTrue.booleanNot() + ) and + ( + resultIsStrict = true and d = 1 + or + resultIsStrict = false and d = 0 + ) and + ( + testIsTrue = true and k = v2 - v1 + or + testIsTrue = false and k = v1 - v2 + ) and + strengthen = (((k - d) % mod) + mod) % mod + ) + } + + /** + * Gets a condition that tests whether `v` is bounded by `e + delta`. + * + * If the condition evaluates to `testIsTrue`: + * - `upper = true` : `v <= e + delta` + * - `upper = false` : `v >= e + delta` + */ + private SemGuard boundFlowCond( + SemSsaVariable v, SemExpr e, D::Delta delta, boolean upper, boolean testIsTrue + ) { + exists( + SemRelationalExpr comp, D::Delta d1, float d2, float d3, int strengthen, boolean compIsUpper, + boolean resultIsStrict + | + comp = result.asExpr() and + boundCondition(comp, v, e, d1, compIsUpper) and + (testIsTrue = true or testIsTrue = false) and + upper = compIsUpper.booleanXor(testIsTrue.booleanNot()) and + ( + if comp.isStrict() + then resultIsStrict = testIsTrue + else resultIsStrict = testIsTrue.booleanNot() + ) and + ( + if + getTrackedTypeForSsaVariable(v) instanceof SemIntegerType or + getTrackedTypeForSsaVariable(v) instanceof SemAddressType + then + upper = true and strengthen = -1 + or + upper = false and strengthen = 1 + else strengthen = 0 + ) and + ( + exists(int k | modulusComparison(comp, testIsTrue, k) and d2 = strengthen * k) + or + not modulusComparison(comp, testIsTrue, _) and d2 = 0 + ) and + // A strict inequality `x < y` can be strengthened to `x <= y - 1`. + ( + resultIsStrict = true and d3 = strengthen + or + resultIsStrict = false and d3 = 0 + ) and + delta = D::fromFloat(D::toFloat(d1) + d2 + d3) + ) + or + exists(boolean testIsTrue0 | + semImplies_v2(result, testIsTrue, boundFlowCond(v, e, delta, upper, testIsTrue0), testIsTrue0) + ) + or + result = semEqFlowCond(v, e, delta, true, testIsTrue) and + (upper = true or upper = false) + or + // guard that tests whether `v2` is bounded by `e + delta + d1 - d2` and + // exists a guard `guardEq` such that `v = v2 - d1 + d2`. + exists( + SemSsaVariable v2, SemGuard guardEq, boolean eqIsTrue, D::Delta d1, D::Delta d2, + D::Delta oldDelta + | + guardEq = semEqFlowCond(v, semSsaRead(pragma[only_bind_into](v2), d1), d2, true, eqIsTrue) and + result = boundFlowCond(v2, e, oldDelta, upper, testIsTrue) and + // guardEq needs to control guard + guardEq.directlyControls(result.getBasicBlock(), eqIsTrue) and + delta = D::fromFloat(D::toFloat(oldDelta) - D::toFloat(d1) + D::toFloat(d2)) + ) + } + + private newtype TSemReason = + TSemNoReason() or + TSemCondReason(SemGuard guard) { possibleReason(guard) } + + /** + * A reason for an inferred bound. This can either be `CondReason` if the bound + * is due to a specific condition, or `NoReason` if the bound is inferred + * without going through a bounding condition. + */ + abstract class SemReason extends TSemReason { + /** Gets a textual representation of this reason. */ + abstract string toString(); + } + + /** + * A reason for an inferred bound that indicates that the bound is inferred + * without going through a bounding condition. + */ + class SemNoReason extends SemReason, TSemNoReason { + override string toString() { result = "NoReason" } + } + + /** A reason for an inferred bound pointing to a condition. */ + class SemCondReason extends SemReason, TSemCondReason { + /** Gets the condition that is the reason for the bound. */ + SemGuard getCond() { this = TSemCondReason(result) } + + override string toString() { result = getCond().toString() } + } + + /** + * Holds if `e + delta` is a valid bound for `v` at `pos`. + * - `upper = true` : `v <= e + delta` + * - `upper = false` : `v >= e + delta` + */ + private predicate boundFlowStepSsa( + SemSsaVariable v, SemSsaReadPosition pos, SemExpr e, D::Delta delta, boolean upper, + SemReason reason + ) { + semSsaUpdateStep(v, e, delta) and + pos.hasReadOfVar(v) and + (upper = true or upper = false) and + reason = TSemNoReason() + or + exists(SemGuard guard, boolean testIsTrue | + pos.hasReadOfVar(v) and + guard = boundFlowCond(v, e, delta, upper, testIsTrue) and + semGuardDirectlyControlsSsaRead(guard, pos, testIsTrue) and + reason = TSemCondReason(guard) + ) + } + + /** Holds if `v != e + delta` at `pos` and `v` is of integral type. */ + private predicate unequalFlowStepIntegralSsa( + SemSsaVariable v, SemSsaReadPosition pos, SemExpr e, D::Delta delta, SemReason reason + ) { + getTrackedTypeForSsaVariable(v) instanceof SemIntegerType and + exists(SemGuard guard, boolean testIsTrue | + pos.hasReadOfVar(v) and + guard = semEqFlowCond(v, e, delta, false, testIsTrue) and + semGuardDirectlyControlsSsaRead(guard, pos, testIsTrue) and + reason = TSemCondReason(guard) + ) + } + + /** Holds if `e >= 1` as determined by sign analysis. */ + private predicate strictlyPositiveIntegralExpr(SemExpr e) { + semStrictlyPositive(e) and getTrackedType(e) instanceof SemIntegerType + } + + /** Holds if `e <= -1` as determined by sign analysis. */ + private predicate strictlyNegativeIntegralExpr(SemExpr e) { + semStrictlyNegative(e) and getTrackedType(e) instanceof SemIntegerType + } + + /** + * Holds if `e1 + delta` is a valid bound for `e2`. + * - `upper = true` : `e2 <= e1 + delta` + * - `upper = false` : `e2 >= e1 + delta` + */ + private predicate boundFlowStep(SemExpr e2, SemExpr e1, D::Delta delta, boolean upper) { + semValueFlowStep(e2, e1, delta) and + (upper = true or upper = false) + or + e2.(SafeCastExpr).getOperand() = e1 and + delta = D::fromInt(0) and + (upper = true or upper = false) + or + exists(SemExpr x, SemSubExpr sub | + e2 = sub and + sub.getLeftOperand() = e1 and + sub.getRightOperand() = x + | + // `x instanceof ConstantIntegerExpr` is covered by valueFlowStep + not x instanceof SemConstantIntegerExpr and + if strictlyPositiveIntegralExpr(x) + then upper = true and delta = D::fromInt(-1) + else + if semPositive(x) + then upper = true and delta = D::fromInt(0) + else + if strictlyNegativeIntegralExpr(x) + then upper = false and delta = D::fromInt(1) + else + if semNegative(x) + then upper = false and delta = D::fromInt(0) + else none() + ) + or + e2.(SemRemExpr).getRightOperand() = e1 and + semPositive(e1) and + delta = D::fromInt(-1) and + upper = true + or + e2.(SemRemExpr).getLeftOperand() = e1 and + semPositive(e1) and + delta = D::fromInt(0) and + upper = true + or + e2.(SemBitAndExpr).getAnOperand() = e1 and + semPositive(e1) and + delta = D::fromInt(0) and + upper = true + or + e2.(SemBitOrExpr).getAnOperand() = e1 and + semPositive(e2) and + delta = D::fromInt(0) and + upper = false + or + hasBound(e2, e1, delta, upper) + } + + /** Holds if `e2 = e1 * factor` and `factor > 0`. */ + private predicate boundFlowStepMul(SemExpr e2, SemExpr e1, D::Delta factor) { + exists(SemConstantIntegerExpr c, int k | k = c.getIntValue() and k > 0 | + e2.(SemMulExpr).hasOperands(e1, c) and factor = D::fromInt(k) + or + exists(SemShiftLeftExpr e | + e = e2 and + e.getLeftOperand() = e1 and + e.getRightOperand() = c and + factor = D::fromInt(2.pow(k)) + ) + ) + } + + /** + * Holds if `e2 = e1 / factor` and `factor > 0`. + * + * This conflates division, right shift, and unsigned right shift and is + * therefore only valid for non-negative numbers. + */ + private predicate boundFlowStepDiv(SemExpr e2, SemExpr e1, D::Delta factor) { + exists(SemConstantIntegerExpr c, D::Delta k | + k = D::fromInt(c.getIntValue()) and D::toFloat(k) > 0 + | + exists(SemDivExpr e | + e = e2 and e.getLeftOperand() = e1 and e.getRightOperand() = c and factor = k + ) + or + exists(SemShiftRightExpr e | + e = e2 and + e.getLeftOperand() = e1 and + e.getRightOperand() = c and + factor = D::fromInt(2.pow(D::toInt(k))) + ) + or + exists(SemShiftRightUnsignedExpr e | + e = e2 and + e.getLeftOperand() = e1 and + e.getRightOperand() = c and + factor = D::fromInt(2.pow(D::toInt(k))) + ) + ) + } + + /** + * Holds if `b + delta` is a valid bound for `v` at `pos`. + * - `upper = true` : `v <= b + delta` + * - `upper = false` : `v >= b + delta` + */ + private predicate boundedSsa( + SemSsaVariable v, SemSsaReadPosition pos, SemBound b, D::Delta delta, boolean upper, + boolean fromBackEdge, D::Delta origdelta, SemReason reason + ) { + exists(SemExpr mid, D::Delta d1, D::Delta d2, SemReason r1, SemReason r2 | + boundFlowStepSsa(v, pos, mid, d1, upper, r1) and + bounded(mid, b, d2, upper, fromBackEdge, origdelta, r2) and + // upper = true: v <= mid + d1 <= b + d1 + d2 = b + delta + // upper = false: v >= mid + d1 >= b + d1 + d2 = b + delta + delta = D::fromFloat(D::toFloat(d1) + D::toFloat(d2)) and + (if r1 instanceof SemNoReason then reason = r2 else reason = r1) + ) + or + exists(D::Delta d, SemReason r1, SemReason r2 | + boundedSsa(v, pos, b, d, upper, fromBackEdge, origdelta, r2) or + boundedPhi(v, b, d, upper, fromBackEdge, origdelta, r2) + | + unequalIntegralSsa(v, pos, b, d, r1) and + ( + upper = true and delta = D::fromFloat(D::toFloat(d) - 1) + or + upper = false and delta = D::fromFloat(D::toFloat(d) + 1) + ) and + ( + reason = r1 + or + reason = r2 and not r2 instanceof SemNoReason + ) + ) + } + + /** + * Holds if `v != b + delta` at `pos` and `v` is of integral type. + */ + private predicate unequalIntegralSsa( + SemSsaVariable v, SemSsaReadPosition pos, SemBound b, D::Delta delta, SemReason reason + ) { + exists(SemExpr e, D::Delta d1, D::Delta d2 | + unequalFlowStepIntegralSsa(v, pos, e, d1, reason) and + boundedUpper(e, b, d1) and + boundedLower(e, b, d2) and + delta = D::fromFloat(D::toFloat(d1) + D::toFloat(d2)) + ) + } + + /** + * Holds if `b + delta` is an upper bound for `e`. + * + * This predicate only exists to prevent a bad standard order in `unequalIntegralSsa`. + */ + pragma[nomagic] + private predicate boundedUpper(SemExpr e, SemBound b, D::Delta delta) { + bounded(e, b, delta, true, _, _, _) + } + + /** + * Holds if `b + delta` is a lower bound for `e`. + * + * This predicate only exists to prevent a bad standard order in `unequalIntegralSsa`. + */ + pragma[nomagic] + private predicate boundedLower(SemExpr e, SemBound b, D::Delta delta) { + bounded(e, b, delta, false, _, _, _) + } + + /** Weakens a delta to lie in the range `[-1..1]`. */ + bindingset[delta, upper] + private D::Delta weakenDelta(boolean upper, D::Delta delta) { + delta = D::fromFloat([-1 .. 1]) and result = delta + or + upper = true and result = D::fromFloat(-1) and D::toFloat(delta) < -1 + or + upper = false and result = D::fromFloat(1) and D::toFloat(delta) > 1 + } + + /** + * Holds if `b + delta` is a valid bound for `inp` when used as an input to + * `phi` along `edge`. + * - `upper = true` : `inp <= b + delta` + * - `upper = false` : `inp >= b + delta` + */ + private predicate boundedPhiInp( + SemSsaPhiNode phi, SemSsaVariable inp, SemSsaReadPositionPhiInputEdge edge, SemBound b, + D::Delta delta, boolean upper, boolean fromBackEdge, D::Delta origdelta, SemReason reason + ) { + edge.phiInput(phi, inp) and + exists(D::Delta d, boolean fromBackEdge0 | + boundedSsa(inp, edge, b, d, upper, fromBackEdge0, origdelta, reason) + or + boundedPhi(inp, b, d, upper, fromBackEdge0, origdelta, reason) + or + b.(SemSsaBound).getAVariable() = inp and + d = D::fromFloat(0) and + (upper = true or upper = false) and + fromBackEdge0 = false and + origdelta = D::fromFloat(0) and + reason = TSemNoReason() + | + if semBackEdge(phi, inp, edge) + then + fromBackEdge = true and + ( + fromBackEdge0 = true and + delta = + D::fromFloat(D::toFloat(weakenDelta(upper, + D::fromFloat(D::toFloat(d) - D::toFloat(origdelta)))) + D::toFloat(origdelta)) + or + fromBackEdge0 = false and delta = d + ) + else ( + delta = d and fromBackEdge = fromBackEdge0 + ) + ) + } + + /** + * Holds if `b + delta` is a valid bound for `inp` when used as an input to + * `phi` along `edge`. + * - `upper = true` : `inp <= b + delta` + * - `upper = false` : `inp >= b + delta` + * + * Equivalent to `boundedPhiInp(phi, inp, edge, b, delta, upper, _, _, _)`. + */ + pragma[noinline] + private predicate boundedPhiInp1( + SemSsaPhiNode phi, SemBound b, boolean upper, SemSsaVariable inp, + SemSsaReadPositionPhiInputEdge edge, D::Delta delta + ) { + boundedPhiInp(phi, inp, edge, b, delta, upper, _, _, _) + } + + /** + * Holds if `phi` is a valid bound for `inp` when used as an input to `phi` + * along `edge`. + * - `upper = true` : `inp <= phi` + * - `upper = false` : `inp >= phi` + */ + private predicate selfBoundedPhiInp( + SemSsaPhiNode phi, SemSsaVariable inp, SemSsaReadPositionPhiInputEdge edge, boolean upper + ) { + exists(D::Delta d, SemSsaBound phibound | + phibound.getAVariable() = phi and + boundedPhiInp(phi, inp, edge, phibound, d, upper, _, _, _) and + ( + upper = true and D::toFloat(d) <= 0 + or + upper = false and D::toFloat(d) >= 0 + ) + ) + } + + /** + * Holds if `b + delta` is a valid bound for some input, `inp`, to `phi`, and + * thus a candidate bound for `phi`. + * - `upper = true` : `inp <= b + delta` + * - `upper = false` : `inp >= b + delta` + */ + pragma[noinline] + private predicate boundedPhiCand( + SemSsaPhiNode phi, boolean upper, SemBound b, D::Delta delta, boolean fromBackEdge, + D::Delta origdelta, SemReason reason + ) { + boundedPhiInp(phi, _, _, b, delta, upper, fromBackEdge, origdelta, reason) + } + + /** + * Holds if the candidate bound `b + delta` for `phi` is valid for the phi input + * `inp` along `edge`. + */ + private predicate boundedPhiCandValidForEdge( + SemSsaPhiNode phi, SemBound b, D::Delta delta, boolean upper, boolean fromBackEdge, + D::Delta origdelta, SemReason reason, SemSsaVariable inp, SemSsaReadPositionPhiInputEdge edge + ) { + boundedPhiCand(phi, upper, b, delta, fromBackEdge, origdelta, reason) and + ( + exists(D::Delta d | boundedPhiInp1(phi, b, upper, inp, edge, d) | + upper = true and D::toFloat(d) <= D::toFloat(delta) + ) + or + exists(D::Delta d | boundedPhiInp1(phi, b, upper, inp, edge, d) | + upper = false and D::toFloat(d) >= D::toFloat(delta) + ) + or + selfBoundedPhiInp(phi, inp, edge, upper) + ) + } + + /** + * Holds if `b + delta` is a valid bound for `phi`. + * - `upper = true` : `phi <= b + delta` + * - `upper = false` : `phi >= b + delta` + */ + private predicate boundedPhi( + SemSsaPhiNode phi, SemBound b, D::Delta delta, boolean upper, boolean fromBackEdge, + D::Delta origdelta, SemReason reason + ) { + forex(SemSsaVariable inp, SemSsaReadPositionPhiInputEdge edge | edge.phiInput(phi, inp) | + boundedPhiCandValidForEdge(phi, b, delta, upper, fromBackEdge, origdelta, reason, inp, edge) + ) + } + + /** + * Holds if `e` has an upper (for `upper = true`) or lower + * (for `upper = false`) bound of `b`. + */ + private predicate baseBound(SemExpr e, D::Delta b, boolean upper) { + hasConstantBound(e, b, upper) + or + upper = false and + b = D::fromInt(0) and + semPositive(e.(SemBitAndExpr).getAnOperand()) and + // REVIEW: We let the language opt out here to preserve original results. + not ignoreZeroLowerBound(e) + } + + /** + * Holds if the value being cast has an upper (for `upper = true`) or lower + * (for `upper = false`) bound within the bounds of the resulting type. + * For `upper = true` this means that the cast will not overflow and for + * `upper = false` this means that the cast will not underflow. + */ + private predicate safeNarrowingCast(NarrowingCastExpr cast, boolean upper) { + exists(D::Delta bound | + bounded(cast.getOperand(), any(SemZeroBound zb), bound, upper, _, _, _) + | + upper = true and D::toFloat(bound) <= cast.getUpperBound() + or + upper = false and D::toFloat(bound) >= cast.getLowerBound() + ) + } + + pragma[noinline] + private predicate boundedCastExpr( + NarrowingCastExpr cast, SemBound b, D::Delta delta, boolean upper, boolean fromBackEdge, + D::Delta origdelta, SemReason reason + ) { + bounded(cast.getOperand(), b, delta, upper, fromBackEdge, origdelta, reason) + } + + /** + * Computes a normal form of `x` where -0.0 has changed to +0.0. This can be + * needed on the lesser side of a floating-point comparison or on both sides of + * a floating point equality because QL does not follow IEEE in floating-point + * comparisons but instead defines -0.0 to be less than and distinct from 0.0. + */ + bindingset[x] + private float normalizeFloatUp(float x) { result = x + 0.0 } + + /** + * Holds if `b + delta` is a valid bound for `e`. + * - `upper = true` : `e <= b + delta` + * - `upper = false` : `e >= b + delta` + */ + private predicate bounded( + SemExpr e, SemBound b, D::Delta delta, boolean upper, boolean fromBackEdge, D::Delta origdelta, + SemReason reason + ) { + not ignoreExprBound(e) and + ( + e = b.getExpr(delta) and + (upper = true or upper = false) and + fromBackEdge = false and + origdelta = delta and + reason = TSemNoReason() + or + baseBound(e, delta, upper) and + b instanceof SemZeroBound and + fromBackEdge = false and + origdelta = delta and + reason = TSemNoReason() + or + exists(SemSsaVariable v, SemSsaReadPositionBlock bb | + boundedSsa(v, bb, b, delta, upper, fromBackEdge, origdelta, reason) and + e = v.getAUse() and + bb.getBlock() = e.getBasicBlock() + ) + or + exists(SemExpr mid, D::Delta d1, D::Delta d2 | + boundFlowStep(e, mid, d1, upper) and + // Constants have easy, base-case bounds, so let's not infer any recursive bounds. + not e instanceof SemConstantIntegerExpr and + bounded(mid, b, d2, upper, fromBackEdge, origdelta, reason) and + // upper = true: e <= mid + d1 <= b + d1 + d2 = b + delta + // upper = false: e >= mid + d1 >= b + d1 + d2 = b + delta + delta = D::fromFloat(D::toFloat(d1) + D::toFloat(d2)) + ) + or + exists(SemSsaPhiNode phi | + boundedPhi(phi, b, delta, upper, fromBackEdge, origdelta, reason) and + e = phi.getAUse() + ) + or + exists(SemExpr mid, D::Delta factor, D::Delta d | + boundFlowStepMul(e, mid, factor) and + not e instanceof SemConstantIntegerExpr and + bounded(mid, b, d, upper, fromBackEdge, origdelta, reason) and + b instanceof SemZeroBound and + delta = D::fromFloat(D::toFloat(d) * D::toFloat(factor)) + ) + or + exists(SemExpr mid, D::Delta factor, D::Delta d | + boundFlowStepDiv(e, mid, factor) and + not e instanceof SemConstantIntegerExpr and + bounded(mid, b, d, upper, fromBackEdge, origdelta, reason) and + b instanceof SemZeroBound and + D::toFloat(d) >= 0 and + delta = D::fromFloat(D::toFloat(d) / D::toFloat(factor)) + ) + or + exists(NarrowingCastExpr cast | + cast = e and + safeNarrowingCast(cast, upper.booleanNot()) and + boundedCastExpr(cast, b, delta, upper, fromBackEdge, origdelta, reason) + ) + or + exists( + SemConditionalExpr cond, D::Delta d1, D::Delta d2, boolean fbe1, boolean fbe2, D::Delta od1, + D::Delta od2, SemReason r1, SemReason r2 + | + cond = e and + boundedConditionalExpr(cond, b, upper, true, d1, fbe1, od1, r1) and + boundedConditionalExpr(cond, b, upper, false, d2, fbe2, od2, r2) and + ( + delta = d1 and fromBackEdge = fbe1 and origdelta = od1 and reason = r1 + or + delta = d2 and fromBackEdge = fbe2 and origdelta = od2 and reason = r2 + ) + | + upper = true and delta = D::fromFloat(D::toFloat(d1).maximum(D::toFloat(d2))) + or + upper = false and delta = D::fromFloat(D::toFloat(d1).minimum(D::toFloat(d2))) + ) + or + exists(SemExpr mid, D::Delta d, float f | + e.(SemNegateExpr).getOperand() = mid and + b instanceof SemZeroBound and + bounded(mid, b, d, upper.booleanNot(), fromBackEdge, origdelta, reason) and + f = normalizeFloatUp(-D::toFloat(d)) and + delta = D::fromFloat(f) and + if semPositive(e) then f >= 0 else any() + ) + or + exists( + SemBound bLeft, SemBound bRight, D::Delta dLeft, D::Delta dRight, boolean fbeLeft, + boolean fbeRight, D::Delta odLeft, D::Delta odRight, SemReason rLeft, SemReason rRight + | + boundedAddOperand(e, upper, bLeft, false, dLeft, fbeLeft, odLeft, rLeft) and + boundedAddOperand(e, upper, bRight, true, dRight, fbeRight, odRight, rRight) and + delta = D::fromFloat(D::toFloat(dLeft) + D::toFloat(dRight)) and + fromBackEdge = fbeLeft.booleanOr(fbeRight) + | + b = bLeft and origdelta = odLeft and reason = rLeft and bRight instanceof SemZeroBound + or + b = bRight and origdelta = odRight and reason = rRight and bLeft instanceof SemZeroBound + ) + or + exists( + SemRemExpr rem, D::Delta d_max, D::Delta d1, D::Delta d2, boolean fbe1, boolean fbe2, + D::Delta od1, D::Delta od2, SemReason r1, SemReason r2 + | + rem = e and + b instanceof SemZeroBound and + not (upper = true and semPositive(rem.getRightOperand())) and + not (upper = true and semPositive(rem.getLeftOperand())) and + boundedRemExpr(rem, true, d1, fbe1, od1, r1) and + boundedRemExpr(rem, false, d2, fbe2, od2, r2) and + ( + if D::toFloat(d1).abs() > D::toFloat(d2).abs() + then ( + d_max = d1 and fromBackEdge = fbe1 and origdelta = od1 and reason = r1 + ) else ( + d_max = d2 and fromBackEdge = fbe2 and origdelta = od2 and reason = r2 + ) + ) + | + upper = true and delta = D::fromFloat(D::toFloat(d_max).abs() - 1) + or + upper = false and delta = D::fromFloat(-D::toFloat(d_max).abs() + 1) + ) + or + exists( + D::Delta dLeft, D::Delta dRight, boolean fbeLeft, boolean fbeRight, D::Delta odLeft, + D::Delta odRight, SemReason rLeft, SemReason rRight + | + boundedMulOperand(e, upper, true, dLeft, fbeLeft, odLeft, rLeft) and + boundedMulOperand(e, upper, false, dRight, fbeRight, odRight, rRight) and + delta = D::fromFloat(D::toFloat(dLeft) * D::toFloat(dRight)) and + fromBackEdge = fbeLeft.booleanOr(fbeRight) + | + b instanceof SemZeroBound and origdelta = odLeft and reason = rLeft + or + b instanceof SemZeroBound and origdelta = odRight and reason = rRight + ) + ) + } + + pragma[nomagic] + private predicate boundedConditionalExpr( + SemConditionalExpr cond, SemBound b, boolean upper, boolean branch, D::Delta delta, + boolean fromBackEdge, D::Delta origdelta, SemReason reason + ) { + bounded(cond.getBranchExpr(branch), b, delta, upper, fromBackEdge, origdelta, reason) + } + + pragma[nomagic] + private predicate boundedAddOperand( + SemAddExpr add, boolean upper, SemBound b, boolean isLeft, D::Delta delta, boolean fromBackEdge, + D::Delta origdelta, SemReason reason + ) { + // `semValueFlowStep` already handles the case where one of the operands is a constant. + not semValueFlowStep(add, _, _) and + ( + isLeft = true and + bounded(add.getLeftOperand(), b, delta, upper, fromBackEdge, origdelta, reason) + or + isLeft = false and + bounded(add.getRightOperand(), b, delta, upper, fromBackEdge, origdelta, reason) + ) + } + + pragma[nomagic] + private predicate boundedRemExpr( + SemRemExpr rem, boolean upper, D::Delta delta, boolean fromBackEdge, D::Delta origdelta, + SemReason reason + ) { + bounded(rem.getRightOperand(), any(SemZeroBound zb), delta, upper, fromBackEdge, origdelta, + reason) + } + + /** + * Define `cmp(true) = <=` and `cmp(false) = >=`. + * + * Holds if `mul = left * right`, and in order to know if `mul cmp(upper) 0 + k` (for + * some `k`) we need to know that `left cmp(upperLeft) 0 + k1` and + * `right cmp(upperRight) 0 + k2` (for some `k1` and `k2`). + */ + pragma[nomagic] + private predicate boundedMulOperandCand( + SemMulExpr mul, SemExpr left, SemExpr right, boolean upper, boolean upperLeft, + boolean upperRight + ) { + not boundFlowStepMul(mul, _, _) and + mul.getLeftOperand() = left and + mul.getRightOperand() = right and + ( + semPositive(left) and + ( + // left, right >= 0 + semPositive(right) and + ( + // max(left * right) = max(left) * max(right) + upper = true and + upperLeft = true and + upperRight = true + or + // min(left * right) = min(left) * min(right) + upper = false and + upperLeft = false and + upperRight = false + ) + or + // left >= 0, right <= 0 + semNegative(right) and + ( + // max(left * right) = min(left) * max(right) + upper = true and + upperLeft = false and + upperRight = true + or + // min(left * right) = max(left) * min(right) + upper = false and + upperLeft = true and + upperRight = false + ) + ) + or + semNegative(left) and + ( + // left <= 0, right >= 0 + semPositive(right) and + ( + // max(left * right) = max(left) * min(right) + upper = true and + upperLeft = true and + upperRight = false + or + // min(left * right) = min(left) * max(right) + upper = false and + upperLeft = false and + upperRight = true + ) + or + // left, right <= 0 + semNegative(right) and + ( + // max(left * right) = min(left) * min(right) + upper = true and + upperLeft = false and + upperRight = false + or + // min(left * right) = max(left) * max(right) + upper = false and + upperLeft = true and + upperRight = true + ) + ) + ) + } + + /** + * Holds if `isLeft = true` and `mul`'s left operand is bounded by `delta`, + * or if `isLeft = false` and `mul`'s right operand is bounded by `delta`. + * + * If `upper = true` the computed bound contributes to an upper bound of `mul`, + * and if `upper = false` it contributes to a lower bound. + * The `fromBackEdge`, `origdelta`, `reason` triple are defined by the recursive + * call to `bounded`. + */ + pragma[nomagic] + private predicate boundedMulOperand( + SemMulExpr mul, boolean upper, boolean isLeft, D::Delta delta, boolean fromBackEdge, + D::Delta origdelta, SemReason reason + ) { + exists(boolean upperLeft, boolean upperRight, SemExpr left, SemExpr right | + boundedMulOperandCand(mul, left, right, upper, upperLeft, upperRight) + | + isLeft = true and + bounded(left, any(SemZeroBound zb), delta, upperLeft, fromBackEdge, origdelta, reason) + or + isLeft = false and + bounded(right, any(SemZeroBound zb), delta, upperRight, fromBackEdge, origdelta, reason) + ) + } +} diff --git a/cpp/ql/lib/semmle/code/cpp/rangeanalysis/new/internal/semantic/analysis/RangeUtils.qll b/cpp/ql/lib/semmle/code/cpp/rangeanalysis/new/internal/semantic/analysis/RangeUtils.qll new file mode 100644 index 00000000000..7eeed5d7975 --- /dev/null +++ b/cpp/ql/lib/semmle/code/cpp/rangeanalysis/new/internal/semantic/analysis/RangeUtils.qll @@ -0,0 +1,140 @@ +/** + * Provides utility predicates for range analysis. + */ + +private import semmle.code.cpp.rangeanalysis.new.internal.semantic.Semantic +private import RangeAnalysisSpecific +private import RangeAnalysisStage as Range +private import ConstantAnalysis + +module RangeUtil Lang> implements Range::UtilSig { + /** + * Gets an expression that equals `v - d`. + */ + SemExpr semSsaRead(SemSsaVariable v, D::Delta delta) { + // There are various language-specific extension points that can be removed once we no longer + // expect to match the original Java implementation's results exactly. + result = v.getAUse() and delta = D::fromInt(0) + or + exists(D::Delta d1, SemConstantIntegerExpr c | + result.(SemAddExpr).hasOperands(semSsaRead(v, d1), c) and + delta = D::fromFloat(D::toFloat(d1) - c.getIntValue()) and + not Lang::ignoreSsaReadArithmeticExpr(result) + ) + or + exists(SemSubExpr sub, D::Delta d1, SemConstantIntegerExpr c | + result = sub and + sub.getLeftOperand() = semSsaRead(v, d1) and + sub.getRightOperand() = c and + delta = D::fromFloat(D::toFloat(d1) + c.getIntValue()) and + not Lang::ignoreSsaReadArithmeticExpr(result) + ) + or + result = v.(SemSsaExplicitUpdate).getSourceExpr() and + delta = D::fromFloat(0) and + not Lang::ignoreSsaReadAssignment(v) + or + result = Lang::specificSsaRead(v, delta) + or + result.(SemCopyValueExpr).getOperand() = semSsaRead(v, delta) and + not Lang::ignoreSsaReadCopy(result) + or + result.(SemStoreExpr).getOperand() = semSsaRead(v, delta) + } + + /** + * Gets a condition that tests whether `v` equals `e + delta`. + * + * If the condition evaluates to `testIsTrue`: + * - `isEq = true` : `v == e + delta` + * - `isEq = false` : `v != e + delta` + */ + SemGuard semEqFlowCond( + SemSsaVariable v, SemExpr e, D::Delta delta, boolean isEq, boolean testIsTrue + ) { + exists(boolean eqpolarity | + result.isEquality(semSsaRead(v, delta), e, eqpolarity) and + (testIsTrue = true or testIsTrue = false) and + eqpolarity.booleanXor(testIsTrue).booleanNot() = isEq + ) + or + exists(boolean testIsTrue0 | + semImplies_v2(result, testIsTrue, semEqFlowCond(v, e, delta, isEq, testIsTrue0), testIsTrue0) + ) + } + + /** + * Holds if `v` is an `SsaExplicitUpdate` that equals `e + delta`. + */ + predicate semSsaUpdateStep(SemSsaExplicitUpdate v, SemExpr e, D::Delta delta) { + exists(SemExpr defExpr | defExpr = v.getSourceExpr() | + defExpr.(SemCopyValueExpr).getOperand() = e and delta = D::fromFloat(0) + or + defExpr.(SemStoreExpr).getOperand() = e and delta = D::fromFloat(0) + or + defExpr.(SemAddOneExpr).getOperand() = e and delta = D::fromFloat(1) + or + defExpr.(SemSubOneExpr).getOperand() = e and delta = D::fromFloat(-1) + or + e = defExpr and + not ( + defExpr instanceof SemCopyValueExpr or + defExpr instanceof SemStoreExpr or + defExpr instanceof SemAddOneExpr or + defExpr instanceof SemSubOneExpr + ) and + delta = D::fromFloat(0) + ) + } + + /** + * Holds if `e1 + delta` equals `e2`. + */ + predicate semValueFlowStep(SemExpr e2, SemExpr e1, D::Delta delta) { + e2.(SemCopyValueExpr).getOperand() = e1 and delta = D::fromFloat(0) + or + e2.(SemStoreExpr).getOperand() = e1 and delta = D::fromFloat(0) + or + e2.(SemAddOneExpr).getOperand() = e1 and delta = D::fromFloat(1) + or + e2.(SemSubOneExpr).getOperand() = e1 and delta = D::fromFloat(-1) + or + Lang::additionalValueFlowStep(e2, e1, delta) + or + exists(SemExpr x | e2.(SemAddExpr).hasOperands(e1, x) | + D::fromInt(x.(SemConstantIntegerExpr).getIntValue()) = delta + ) + or + exists(SemExpr x, SemSubExpr sub | + e2 = sub and + sub.getLeftOperand() = e1 and + sub.getRightOperand() = x + | + D::fromInt(-x.(SemConstantIntegerExpr).getIntValue()) = delta + ) + } + + /** + * Gets the type used to track the specified expression's range information. + * + * Usually, this just `e.getSemType()`, but the language can override this to track immutable boxed + * primitive types as the underlying primitive type. + */ + SemType getTrackedType(SemExpr e) { + result = Lang::getAlternateType(e) + or + not exists(Lang::getAlternateType(e)) and result = e.getSemType() + } + + /** + * Gets the type used to track the specified source variable's range information. + * + * Usually, this just `e.getType()`, but the language can override this to track immutable boxed + * primitive types as the underlying primitive type. + */ + SemType getTrackedTypeForSsaVariable(SemSsaVariable var) { + result = Lang::getAlternateTypeForSsaVariable(var) + or + not exists(Lang::getAlternateTypeForSsaVariable(var)) and result = var.getType() + } +} diff --git a/cpp/ql/lib/semmle/code/cpp/rangeanalysis/new/internal/semantic/analysis/Sign.qll b/cpp/ql/lib/semmle/code/cpp/rangeanalysis/new/internal/semantic/analysis/Sign.qll new file mode 100644 index 00000000000..814691d9bcd --- /dev/null +++ b/cpp/ql/lib/semmle/code/cpp/rangeanalysis/new/internal/semantic/analysis/Sign.qll @@ -0,0 +1,267 @@ +private import semmle.code.cpp.rangeanalysis.new.internal.semantic.Semantic + +newtype TSign = + TNeg() or + TZero() or + TPos() + +/** Class representing expression signs (+, -, 0). */ +class Sign extends TSign { + /** Gets the string representation of this sign. */ + string toString() { + result = "-" and this = TNeg() + or + result = "0" and this = TZero() + or + result = "+" and this = TPos() + } + + /** Gets a possible sign after incrementing an expression that has this sign. */ + Sign inc() { + this = TNeg() and result = TNeg() + or + this = TNeg() and result = TZero() + or + this = TZero() and result = TPos() + or + this = TPos() and result = TPos() + } + + /** Gets a possible sign after decrementing an expression that has this sign. */ + Sign dec() { result.inc() = this } + + /** Gets a possible sign after negating an expression that has this sign. */ + Sign neg() { + this = TNeg() and result = TPos() + or + this = TZero() and result = TZero() + or + this = TPos() and result = TNeg() + } + + /** + * Gets a possible sign after bitwise complementing an expression that has this + * sign. + */ + Sign bitnot() { + this = TNeg() and result = TPos() + or + this = TNeg() and result = TZero() + or + this = TZero() and result = TNeg() + or + this = TPos() and result = TNeg() + } + + /** + * Gets a possible sign after adding an expression with sign `s` to an expression + * that has this sign. + */ + Sign add(Sign s) { + this = TZero() and result = s + or + s = TZero() and result = this + or + this = s and this = result + or + this = TPos() and s = TNeg() + or + this = TNeg() and s = TPos() + } + + /** + * Gets a possible sign after subtracting an expression with sign `s` from an expression + * that has this sign. + */ + Sign sub(Sign s) { result = add(s.neg()) } + + /** + * Gets a possible sign after multiplying an expression with sign `s` to an expression + * that has this sign. + */ + Sign mul(Sign s) { + result = TZero() and this = TZero() + or + result = TZero() and s = TZero() + or + result = TNeg() and this = TPos() and s = TNeg() + or + result = TNeg() and this = TNeg() and s = TPos() + or + result = TPos() and this = TPos() and s = TPos() + or + result = TPos() and this = TNeg() and s = TNeg() + } + + /** + * Gets a possible sign after integer dividing an expression that has this sign + * by an expression with sign `s`. + */ + Sign div(Sign s) { + result = TZero() and s = TNeg() // ex: 3 / -5 = 0 + or + result = TZero() and s = TPos() // ex: 3 / 5 = 0 + or + result = TNeg() and this = TPos() and s = TNeg() + or + result = TNeg() and this = TNeg() and s = TPos() + or + result = TPos() and this = TPos() and s = TPos() + or + result = TPos() and this = TNeg() and s = TNeg() + } + + /** + * Gets a possible sign after modulo dividing an expression that has this sign + * by an expression with sign `s`. + */ + Sign rem(Sign s) { + result = TZero() and s = TNeg() + or + result = TZero() and s = TPos() + or + result = this and s = TNeg() + or + result = this and s = TPos() + } + + /** + * Gets a possible sign after bitwise `and` of an expression that has this sign + * and an expression with sign `s`. + */ + Sign bitand(Sign s) { + result = TZero() and this = TZero() + or + result = TZero() and s = TZero() + or + result = TZero() and this = TPos() + or + result = TZero() and s = TPos() + or + result = TNeg() and this = TNeg() and s = TNeg() + or + result = TPos() and this = TNeg() and s = TPos() + or + result = TPos() and this = TPos() and s = TNeg() + or + result = TPos() and this = TPos() and s = TPos() + } + + /** + * Gets a possible sign after bitwise `or` of an expression that has this sign + * and an expression with sign `s`. + */ + Sign bitor(Sign s) { + result = TZero() and this = TZero() and s = TZero() + or + result = TNeg() and this = TNeg() + or + result = TNeg() and s = TNeg() + or + result = TPos() and this = TPos() and s = TZero() + or + result = TPos() and this = TZero() and s = TPos() + or + result = TPos() and this = TPos() and s = TPos() + } + + /** + * Gets a possible sign after bitwise `xor` of an expression that has this sign + * and an expression with sign `s`. + */ + Sign bitxor(Sign s) { + result = TZero() and this = s + or + result = this and s = TZero() + or + result = s and this = TZero() + or + result = TPos() and this = TPos() and s = TPos() + or + result = TNeg() and this = TNeg() and s = TPos() + or + result = TNeg() and this = TPos() and s = TNeg() + or + result = TPos() and this = TNeg() and s = TNeg() + } + + /** + * Gets a possible sign after left shift of an expression that has this sign + * by an expression with sign `s`. + */ + Sign lshift(Sign s) { + result = TZero() and this = TZero() + or + result = this and s = TZero() + or + this != TZero() and s != TZero() + } + + /** + * Gets a possible sign after right shift of an expression that has this sign + * by an expression with sign `s`. + */ + Sign rshift(Sign s) { + result = TZero() and this = TZero() + or + result = this and s = TZero() + or + result = TNeg() and this = TNeg() + or + result != TNeg() and this = TPos() and s != TZero() + } + + /** + * Gets a possible sign after unsigned right shift of an expression that has + * this sign by an expression with sign `s`. + */ + Sign urshift(Sign s) { + result = TZero() and this = TZero() + or + result = this and s = TZero() + or + result != TZero() and this = TNeg() and s != TZero() + or + result != TNeg() and this = TPos() and s != TZero() + } + + /** Perform `op` on this sign. */ + Sign applyUnaryOp(Opcode op) { + op instanceof Opcode::CopyValue and result = this + or + op instanceof Opcode::Store and result = this + or + op instanceof Opcode::AddOne and result = inc() + or + op instanceof Opcode::SubOne and result = dec() + or + op instanceof Opcode::Negate and result = neg() + or + op instanceof Opcode::BitComplement and result = bitnot() + } + + /** Perform `op` on this sign and sign `s`. */ + Sign applyBinaryOp(Sign s, Opcode op) { + op instanceof Opcode::Add and result = add(s) + or + op instanceof Opcode::Sub and result = sub(s) + or + op instanceof Opcode::Mul and result = mul(s) + or + op instanceof Opcode::Div and result = div(s) + or + op instanceof Opcode::Rem and result = rem(s) + or + op instanceof Opcode::BitAnd and result = bitand(s) + or + op instanceof Opcode::BitOr and result = bitor(s) + or + op instanceof Opcode::BitXor and result = bitxor(s) + or + op instanceof Opcode::ShiftLeft and result = lshift(s) + or + op instanceof Opcode::ShiftRight and result = rshift(s) + or + op instanceof Opcode::ShiftRightUnsigned and result = urshift(s) + } +} diff --git a/cpp/ql/lib/semmle/code/cpp/rangeanalysis/new/internal/semantic/analysis/SignAnalysisCommon.qll b/cpp/ql/lib/semmle/code/cpp/rangeanalysis/new/internal/semantic/analysis/SignAnalysisCommon.qll new file mode 100644 index 00000000000..9abcdc842ab --- /dev/null +++ b/cpp/ql/lib/semmle/code/cpp/rangeanalysis/new/internal/semantic/analysis/SignAnalysisCommon.qll @@ -0,0 +1,510 @@ +/** + * Provides sign analysis to determine whether expression are always positive + * or negative. + * + * The analysis is implemented as an abstract interpretation over the + * three-valued domain `{negative, zero, positive}`. + */ + +private import RangeAnalysisStage +private import SignAnalysisSpecific as Specific +private import semmle.code.cpp.rangeanalysis.new.internal.semantic.Semantic +private import ConstantAnalysis +private import RangeUtils +private import Sign + +module SignAnalysis Utils> { + /** + * An SSA definition for which the analysis can compute the sign. + * + * The actual computation of the sign is done in an override of the `getSign()` predicate. The + * charpred of any subclass must _not_ invoke `getSign()`, directly or indirectly. This ensures + * that the charpred does not introduce negative recursion. The `getSign()` predicate may be + * recursive. + */ + abstract private class SignDef instanceof SemSsaVariable { + final string toString() { result = super.toString() } + + /** Gets the possible signs of this SSA definition. */ + abstract Sign getSign(); + } + + /** An SSA definition whose sign is computed based on standard flow. */ + abstract private class FlowSignDef extends SignDef { + abstract override Sign getSign(); + } + + /** An SSA definition whose sign is determined by the sign of that definitions source expression. */ + private class ExplicitSignDef extends FlowSignDef instanceof SemSsaExplicitUpdate { + final override Sign getSign() { result = semExprSign(super.getSourceExpr()) } + } + + /** An SSA Phi definition, whose sign is the union of the signs of its inputs. */ + private class PhiSignDef extends FlowSignDef instanceof SemSsaPhiNode { + final override Sign getSign() { + exists(SemSsaVariable inp, SemSsaReadPositionPhiInputEdge edge | + edge.phiInput(this, inp) and + result = semSsaSign(inp, edge) + ) + } + } + + /** An SSA definition whose sign is computed by a language-specific implementation. */ + abstract class CustomSignDef extends SignDef { + abstract override Sign getSign(); + } + + /** + * An expression for which the analysis can compute the sign. + * + * The actual computation of the sign is done in an override of the `getSign()` predicate. The + * charpred of any subclass must _not_ invoke `getSign()`, directly or indirectly. This ensures + * that the charpred does not introduce negative recursion. The `getSign()` predicate may be + * recursive. + * + * Concrete implementations extend one of the following subclasses: + * - `ConstantSignExpr`, for expressions with a compile-time constant value. + * - `FlowSignExpr`, for expressions whose sign can be computed from the signs of their operands. + * - `CustomsignExpr`, for expressions whose sign can be computed by a language-specific + * implementation. + * + * If the same expression matches more than one of the above subclasses, the sign is computed as + * follows: + * - The sign of a `ConstantSignExpr` is computed solely from `ConstantSignExpr.getSign()`, + * regardless of any other subclasses. + * - If a non-`ConstantSignExpr` expression matches exactly one of `FlowSignExpr` or + * `CustomSignExpr`, the sign is computed by that class' `getSign()` predicate. + * - If a non-`ConstantSignExpr` expression matches both `FlowSignExpr` and `CustomSignExpr`, the + * sign is the _intersection_ of the signs of those two classes' `getSign()` predicates. Thus, + * both classes have the opportunity to _restrict_ the set of possible signs, not to generate new + * possible signs. + * - If an expression does not match any of the three subclasses, then it can have any sign. + * + * Note that the `getSign()` predicate is introduced only in subclasses of `SignExpr`. + */ + abstract class SignExpr instanceof SemExpr { + SignExpr() { not Specific::ignoreExprSign(this) } + + final string toString() { result = super.toString() } + + abstract Sign getSign(); + } + + /** An expression whose sign is determined by its constant numeric value. */ + private class ConstantSignExpr extends SignExpr { + ConstantSignExpr() { + this instanceof SemConstantIntegerExpr or + exists(this.(SemNumericLiteralExpr).getApproximateFloatValue()) + } + + final override Sign getSign() { + exists(int i | this.(SemConstantIntegerExpr).getIntValue() = i | + i < 0 and result = TNeg() + or + i = 0 and result = TZero() + or + i > 0 and result = TPos() + ) + or + not exists(this.(SemConstantIntegerExpr).getIntValue()) and + exists(float f | f = this.(SemNumericLiteralExpr).getApproximateFloatValue() | + f < 0 and result = TNeg() + or + f = 0 and result = TZero() + or + f > 0 and result = TPos() + ) + } + } + + abstract private class NonConstantSignExpr extends SignExpr { + NonConstantSignExpr() { not this instanceof ConstantSignExpr } + + final override Sign getSign() { + // The result is the _intersection_ of the signs computed from flow and by the language. + (result = this.(FlowSignExpr).getSignRestriction() or not this instanceof FlowSignExpr) and + (result = this.(CustomSignExpr).getSignRestriction() or not this instanceof CustomSignExpr) + } + } + + /** An expression whose sign is computed from the signs of its operands. */ + abstract private class FlowSignExpr extends NonConstantSignExpr { + abstract Sign getSignRestriction(); + } + + /** An expression whose sign is computed by a language-specific implementation. */ + abstract class CustomSignExpr extends NonConstantSignExpr { + abstract Sign getSignRestriction(); + } + + /** An expression whose sign is unknown. */ + private class UnknownSignExpr extends SignExpr { + UnknownSignExpr() { + not this instanceof FlowSignExpr and + not this instanceof CustomSignExpr and + not this instanceof ConstantSignExpr and + ( + // Only track numeric types. + Utils::getTrackedType(this) instanceof SemNumericType + or + // Unless the language says to track this expression anyway. + Specific::trackUnknownNonNumericExpr(this) + ) + } + + final override Sign getSign() { semAnySign(result) } + } + + /** + * A `Load` expression whose sign is computed from the sign of its SSA definition, restricted by + * inference from any intervening guards. + */ + class UseSignExpr extends FlowSignExpr { + SemSsaVariable v; + + UseSignExpr() { v.getAUse() = this } + + override Sign getSignRestriction() { + // Propagate via SSA + // Propagate the sign from the def of `v`, incorporating any inference from guards. + result = semSsaSign(v, any(SemSsaReadPositionBlock bb | bb.getAnExpr() = this)) + or + // No block for this read. Just use the sign of the def. + // REVIEW: How can this happen? + not exists(SemSsaReadPositionBlock bb | bb.getAnExpr() = this) and + result = semSsaDefSign(v) + } + } + + /** A binary expression whose sign is computed from the signs of its operands. */ + private class BinarySignExpr extends FlowSignExpr { + SemBinaryExpr binary; + + BinarySignExpr() { binary = this } + + override Sign getSignRestriction() { + exists(SemExpr left, SemExpr right | + binaryExprOperands(binary, left, right) and + result = + semExprSign(pragma[only_bind_out](left)) + .applyBinaryOp(semExprSign(pragma[only_bind_out](right)), binary.getOpcode()) + ) + or + exists(SemDivExpr div | div = binary | + result = semExprSign(div.getLeftOperand()) and + result != TZero() and + div.getRightOperand().(SemFloatingPointLiteralExpr).getFloatValue() = 0 + ) + } + } + + /** An expression of an unsigned type. */ + private class UnsignedExpr extends FlowSignExpr { + UnsignedExpr() { Utils::getTrackedType(this) instanceof SemUnsignedIntegerType } + + override Sign getSignRestriction() { + result = TPos() or + result = TZero() + } + } + + pragma[nomagic] + private predicate binaryExprOperands(SemBinaryExpr binary, SemExpr left, SemExpr right) { + binary.getLeftOperand() = left and binary.getRightOperand() = right + } + + /** + * A `Convert`, `Box`, or `Unbox` expression. + */ + private class SemCastExpr instanceof SemUnaryExpr { + string toString() { result = super.toString() } + + SemCastExpr() { + this instanceof SemConvertExpr + or + this instanceof SemBoxExpr + or + this instanceof SemUnboxExpr + } + } + + /** A unary expression whose sign is computed from the sign of its operand. */ + private class UnarySignExpr extends FlowSignExpr { + SemUnaryExpr unary; + + UnarySignExpr() { unary = this and not this instanceof SemCastExpr } + + override Sign getSignRestriction() { + result = + semExprSign(pragma[only_bind_out](unary.getOperand())).applyUnaryOp(unary.getOpcode()) + } + } + + /** + * A `Convert`, `Box`, or `Unbox` expression, whose sign is computed based on + * the sign of its operand and the source and destination types. + */ + abstract private class CastSignExpr extends FlowSignExpr { + SemUnaryExpr cast; + + CastSignExpr() { cast = this and cast instanceof SemCastExpr } + + override Sign getSignRestriction() { result = semExprSign(cast.getOperand()) } + } + + /** + * A `Convert` expression. + */ + private class ConvertSignExpr extends CastSignExpr { + override SemConvertExpr cast; + } + + /** + * A `Box` expression. + */ + private class BoxSignExpr extends CastSignExpr { + override SemBoxExpr cast; + } + + /** + * An `Unbox` expression. + */ + private class UnboxSignExpr extends CastSignExpr { + override SemUnboxExpr cast; + + UnboxSignExpr() { + exists(SemType fromType | fromType = Utils::getTrackedType(cast.getOperand()) | + // Only numeric source types are handled here. + fromType instanceof SemNumericType + ) + } + } + + private predicate unknownSign(SemExpr e) { e instanceof UnknownSignExpr } + + /** + * Holds if `lowerbound` is a lower bound for `v` at `pos`. This is restricted + * to only include bounds for which we might determine a sign. + */ + private predicate lowerBound( + SemExpr lowerbound, SemSsaVariable v, SemSsaReadPosition pos, boolean isStrict + ) { + exists(boolean testIsTrue, SemRelationalExpr comp | + pos.hasReadOfVar(v) and + semGuardControlsSsaRead(semGetComparisonGuard(comp), pos, testIsTrue) and + not unknownSign(lowerbound) + | + testIsTrue = true and + comp.getLesserOperand() = lowerbound and + comp.getGreaterOperand() = Utils::semSsaRead(v, D::fromInt(0)) and + (if comp.isStrict() then isStrict = true else isStrict = false) + or + testIsTrue = false and + comp.getGreaterOperand() = lowerbound and + comp.getLesserOperand() = Utils::semSsaRead(v, D::fromInt(0)) and + (if comp.isStrict() then isStrict = false else isStrict = true) + ) + } + + /** + * Holds if `upperbound` is an upper bound for `v` at `pos`. This is restricted + * to only include bounds for which we might determine a sign. + */ + private predicate upperBound( + SemExpr upperbound, SemSsaVariable v, SemSsaReadPosition pos, boolean isStrict + ) { + exists(boolean testIsTrue, SemRelationalExpr comp | + pos.hasReadOfVar(v) and + semGuardControlsSsaRead(semGetComparisonGuard(comp), pos, testIsTrue) and + not unknownSign(upperbound) + | + testIsTrue = true and + comp.getGreaterOperand() = upperbound and + comp.getLesserOperand() = Utils::semSsaRead(v, D::fromInt(0)) and + (if comp.isStrict() then isStrict = true else isStrict = false) + or + testIsTrue = false and + comp.getLesserOperand() = upperbound and + comp.getGreaterOperand() = Utils::semSsaRead(v, D::fromInt(0)) and + (if comp.isStrict() then isStrict = false else isStrict = true) + ) + } + + /** + * Holds if `eqbound` is an equality/inequality for `v` at `pos`. This is + * restricted to only include bounds for which we might determine a sign. The + * boolean `isEq` gives the polarity: + * - `isEq = true` : `v = eqbound` + * - `isEq = false` : `v != eqbound` + */ + private predicate eqBound(SemExpr eqbound, SemSsaVariable v, SemSsaReadPosition pos, boolean isEq) { + exists(SemGuard guard, boolean testIsTrue, boolean polarity, SemExpr e | + pos.hasReadOfVar(pragma[only_bind_into](v)) and + semGuardControlsSsaRead(guard, pragma[only_bind_into](pos), testIsTrue) and + e = Utils::semSsaRead(pragma[only_bind_into](v), D::fromInt(0)) and + guard.isEquality(eqbound, e, polarity) and + isEq = polarity.booleanXor(testIsTrue).booleanNot() and + not unknownSign(eqbound) + ) + } + + /** + * Holds if `bound` is a bound for `v` at `pos` that needs to be positive in + * order for `v` to be positive. + */ + private predicate posBound(SemExpr bound, SemSsaVariable v, SemSsaReadPosition pos) { + upperBound(bound, v, pos, _) or + eqBound(bound, v, pos, true) + } + + /** + * Holds if `bound` is a bound for `v` at `pos` that needs to be negative in + * order for `v` to be negative. + */ + private predicate negBound(SemExpr bound, SemSsaVariable v, SemSsaReadPosition pos) { + lowerBound(bound, v, pos, _) or + eqBound(bound, v, pos, true) + } + + /** + * Holds if `bound` is a bound for `v` at `pos` that can restrict whether `v` + * can be zero. + */ + private predicate zeroBound(SemExpr bound, SemSsaVariable v, SemSsaReadPosition pos) { + lowerBound(bound, v, pos, _) or + upperBound(bound, v, pos, _) or + eqBound(bound, v, pos, _) + } + + /** Holds if `bound` allows `v` to be positive at `pos`. */ + private predicate posBoundOk(SemExpr bound, SemSsaVariable v, SemSsaReadPosition pos) { + posBound(bound, v, pos) and TPos() = semExprSign(bound) + } + + /** Holds if `bound` allows `v` to be negative at `pos`. */ + private predicate negBoundOk(SemExpr bound, SemSsaVariable v, SemSsaReadPosition pos) { + negBound(bound, v, pos) and TNeg() = semExprSign(bound) + } + + /** Holds if `bound` allows `v` to be zero at `pos`. */ + private predicate zeroBoundOk(SemExpr bound, SemSsaVariable v, SemSsaReadPosition pos) { + lowerBound(bound, v, pos, _) and TNeg() = semExprSign(bound) + or + lowerBound(bound, v, pos, false) and TZero() = semExprSign(bound) + or + upperBound(bound, v, pos, _) and TPos() = semExprSign(bound) + or + upperBound(bound, v, pos, false) and TZero() = semExprSign(bound) + or + eqBound(bound, v, pos, true) and TZero() = semExprSign(bound) + or + eqBound(bound, v, pos, false) and TZero() != semExprSign(bound) + } + + /** + * Holds if there is a bound that might restrict whether `v` has the sign `s` + * at `pos`. + */ + private predicate hasGuard(SemSsaVariable v, SemSsaReadPosition pos, Sign s) { + s = TPos() and posBound(_, v, pos) + or + s = TNeg() and negBound(_, v, pos) + or + s = TZero() and zeroBound(_, v, pos) + } + + /** + * Gets a possible sign of `v` at `pos` based on its definition, where the sign + * might be ruled out by a guard. + */ + pragma[noinline] + private Sign guardedSsaSign(SemSsaVariable v, SemSsaReadPosition pos) { + result = semSsaDefSign(v) and + pos.hasReadOfVar(v) and + hasGuard(v, pos, result) + } + + /** + * Gets a possible sign of `v` at `pos` based on its definition, where no guard + * can rule it out. + */ + pragma[noinline] + private Sign unguardedSsaSign(SemSsaVariable v, SemSsaReadPosition pos) { + result = semSsaDefSign(v) and + pos.hasReadOfVar(v) and + not hasGuard(v, pos, result) + } + + /** + * Gets a possible sign of `v` at read position `pos`, where a guard could have + * ruled out the sign but does not. + * This does not check that the definition of `v` also allows the sign. + */ + private Sign guardedSsaSignOk(SemSsaVariable v, SemSsaReadPosition pos) { + result = TPos() and + forex(SemExpr bound | posBound(bound, v, pos) | posBoundOk(bound, v, pos)) + or + result = TNeg() and + forex(SemExpr bound | negBound(bound, v, pos) | negBoundOk(bound, v, pos)) + or + result = TZero() and + forex(SemExpr bound | zeroBound(bound, v, pos) | zeroBoundOk(bound, v, pos)) + } + + /** Gets a possible sign for `v` at `pos`. */ + private Sign semSsaSign(SemSsaVariable v, SemSsaReadPosition pos) { + result = unguardedSsaSign(v, pos) + or + result = guardedSsaSign(v, pos) and + result = guardedSsaSignOk(v, pos) + } + + /** Gets a possible sign for `v`. */ + pragma[nomagic] + Sign semSsaDefSign(SemSsaVariable v) { result = v.(SignDef).getSign() } + + /** Gets a possible sign for `e`. */ + cached + Sign semExprSign(SemExpr e) { + exists(Sign s | s = e.(SignExpr).getSign() | + if + Utils::getTrackedType(e) instanceof SemUnsignedIntegerType and + s = TNeg() and + not Specific::ignoreTypeRestrictions(e) + then result = TPos() + else result = s + ) + } + + /** + * Dummy predicate that holds for any sign. This is added to improve readability + * of cases where the sign is unrestricted. + */ + predicate semAnySign(Sign s) { any() } + + /** Holds if `e` can be positive and cannot be negative. */ + predicate semPositive(SemExpr e) { + semExprSign(e) = TPos() and + not semExprSign(e) = TNeg() + } + + /** Holds if `e` can be negative and cannot be positive. */ + predicate semNegative(SemExpr e) { + semExprSign(e) = TNeg() and + not semExprSign(e) = TPos() + } + + /** Holds if `e` is strictly positive. */ + predicate semStrictlyPositive(SemExpr e) { + semExprSign(e) = TPos() and + not semExprSign(e) = TNeg() and + not semExprSign(e) = TZero() + } + + /** Holds if `e` is strictly negative. */ + predicate semStrictlyNegative(SemExpr e) { + semExprSign(e) = TNeg() and + not semExprSign(e) = TPos() and + not semExprSign(e) = TZero() + } +} diff --git a/cpp/ql/lib/semmle/code/cpp/rangeanalysis/new/internal/semantic/analysis/SignAnalysisSpecific.qll b/cpp/ql/lib/semmle/code/cpp/rangeanalysis/new/internal/semantic/analysis/SignAnalysisSpecific.qll new file mode 100644 index 00000000000..8bdb9dbc6b9 --- /dev/null +++ b/cpp/ql/lib/semmle/code/cpp/rangeanalysis/new/internal/semantic/analysis/SignAnalysisSpecific.qll @@ -0,0 +1,23 @@ +/** + * Provides C++-specific definitions for use in sign analysis. + */ + +private import semmle.code.cpp.rangeanalysis.new.internal.semantic.Semantic + +/** + * Workaround to allow certain expressions to have a negative sign, even if the type of the + * expression is unsigned. + */ +predicate ignoreTypeRestrictions(SemExpr e) { none() } + +/** + * Workaround to track the sign of certain expressions even if the type of the expression is not + * numeric. + */ +predicate trackUnknownNonNumericExpr(SemExpr e) { none() } + +/** + * Workaround to ignore tracking of certain expressions even if the type of the expression is + * numeric. + */ +predicate ignoreExprSign(SemExpr e) { none() }